summaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
Diffstat (limited to 'sql')
-rw-r--r--sql/CMakeLists.txt25
-rw-r--r--sql/event_data_objects.cc17
-rw-r--r--sql/events.cc57
-rw-r--r--sql/ha_partition.cc8
-rw-r--r--sql/ha_partition.h4
-rw-r--r--sql/handler.cc160
-rw-r--r--sql/handler.h20
-rw-r--r--sql/item_func.cc12
-rw-r--r--sql/keycaches.cc1
-rw-r--r--sql/lock.cc93
-rw-r--r--sql/log.cc247
-rw-r--r--sql/log.h23
-rw-r--r--sql/log_event.cc221
-rw-r--r--sql/mdl.cc181
-rw-r--r--sql/mdl.h17
-rw-r--r--sql/mysqld.cc910
-rw-r--r--sql/mysqld.h18
-rw-r--r--sql/protocol.cc8
-rw-r--r--sql/rpl_gtid.cc12
-rw-r--r--sql/rpl_mi.cc8
-rw-r--r--sql/rpl_record.cc20
-rw-r--r--sql/set_var.cc4
-rw-r--r--sql/set_var.h6
-rw-r--r--sql/slave.cc92
-rw-r--r--sql/sp.cc35
-rw-r--r--sql/sql_acl.cc42
-rw-r--r--sql/sql_admin.cc3
-rw-r--r--sql/sql_alter.cc21
-rw-r--r--sql/sql_base.cc58
-rw-r--r--sql/sql_builtin.cc.in9
-rw-r--r--sql/sql_cache.cc20
-rw-r--r--sql/sql_class.cc346
-rw-r--r--sql/sql_class.h76
-rw-r--r--sql/sql_connect.cc27
-rw-r--r--sql/sql_delete.cc12
-rw-r--r--sql/sql_insert.cc116
-rw-r--r--sql/sql_lex.cc11
-rw-r--r--sql/sql_parse.cc694
-rw-r--r--sql/sql_parse.h25
-rw-r--r--sql/sql_partition_admin.cc29
-rw-r--r--sql/sql_plugin.cc6
-rw-r--r--sql/sql_prepare.cc22
-rw-r--r--sql/sql_reload.cc30
-rw-r--r--sql/sql_repl.cc9
-rw-r--r--sql/sql_show.cc101
-rw-r--r--sql/sql_show.h2
-rw-r--r--sql/sql_table.cc89
-rw-r--r--sql/sql_trigger.cc62
-rw-r--r--sql/sql_truncate.cc10
-rw-r--r--sql/sql_update.cc12
-rw-r--r--sql/sql_yacc.yy12
-rw-r--r--sql/sys_vars.cc307
-rw-r--r--sql/table.cc3
-rw-r--r--sql/transaction.cc51
-rw-r--r--sql/tztime.cc11
-rw-r--r--sql/wsrep_applier.cc400
-rw-r--r--sql/wsrep_applier.h39
-rw-r--r--sql/wsrep_binlog.cc412
-rw-r--r--sql/wsrep_binlog.h56
-rw-r--r--sql/wsrep_check_opts.cc396
-rw-r--r--sql/wsrep_hton.cc615
-rw-r--r--sql/wsrep_mysqld.cc1601
-rw-r--r--sql/wsrep_mysqld.h338
-rw-r--r--sql/wsrep_notify.cc111
-rw-r--r--sql/wsrep_priv.h51
-rw-r--r--sql/wsrep_sst.cc1224
-rw-r--r--sql/wsrep_sst.h70
-rw-r--r--sql/wsrep_thd.cc666
-rw-r--r--sql/wsrep_thd.h40
-rw-r--r--sql/wsrep_utils.cc556
-rw-r--r--sql/wsrep_utils.h229
-rw-r--r--sql/wsrep_var.cc670
-rw-r--r--sql/wsrep_var.h88
-rw-r--r--sql/wsrep_xid.cc150
-rw-r--r--sql/wsrep_xid.h33
75 files changed, 12026 insertions, 134 deletions
diff --git a/sql/CMakeLists.txt b/sql/CMakeLists.txt
index ca2b059eeef..0f771d26bb5 100644
--- a/sql/CMakeLists.txt
+++ b/sql/CMakeLists.txt
@@ -14,6 +14,10 @@
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+IF(WITH_WSREP)
+ SET(WSREP_INCLUDES ${CMAKE_SOURCE_DIR}/wsrep)
+ENDIF()
+
INCLUDE_DIRECTORIES(
${CMAKE_SOURCE_DIR}/include
${CMAKE_SOURCE_DIR}/sql
@@ -21,6 +25,7 @@ ${PCRE_INCLUDES}
${ZLIB_INCLUDE_DIR}
${SSL_INCLUDE_DIRS}
${CMAKE_BINARY_DIR}/sql
+${WSREP_INCLUDES}
)
SET(GEN_SOURCES
@@ -62,6 +67,23 @@ IF(SSL_DEFINES)
ADD_DEFINITIONS(${SSL_DEFINES})
ENDIF()
+IF(WITH_WSREP)
+ SET(WSREP_SOURCES
+ wsrep_utils.cc
+ wsrep_xid.cc
+ wsrep_check_opts.cc
+ wsrep_hton.cc
+ wsrep_mysqld.cc
+ wsrep_notify.cc
+ wsrep_sst.cc
+ wsrep_var.cc
+ wsrep_binlog.cc
+ wsrep_applier.cc
+ wsrep_thd.cc
+ )
+ SET(WSREP_LIB wsrep)
+ENDIF()
+
SET (SQL_SOURCE
../sql-common/client.c compat56.cc derror.cc des_key_file.cc
discover.cc ../libmysql/errmsg.c field.cc field_conv.cc
@@ -107,7 +129,6 @@ SET (SQL_SOURCE
sql_signal.cc rpl_handler.cc mdl.cc sql_admin.cc
transaction.cc sys_vars.cc sql_truncate.cc datadict.cc
sql_reload.cc sql_cmd.h item_inetfunc.cc
-
# added in MariaDB:
sql_explain.h sql_explain.cc
sql_lifo_buffer.h sql_join_cache.h sql_join_cache.cc
@@ -119,6 +140,7 @@ SET (SQL_SOURCE
../sql-common/mysql_async.c
my_apc.cc my_apc.h
rpl_gtid.cc rpl_parallel.cc
+ ${WSREP_SOURCES}
table_cache.cc
${CMAKE_CURRENT_BINARY_DIR}/sql_builtin.cc
${GEN_SOURCES}
@@ -148,6 +170,7 @@ DTRACE_INSTRUMENT(sql)
TARGET_LINK_LIBRARIES(sql ${MYSQLD_STATIC_PLUGIN_LIBS}
mysys mysys_ssl dbug strings vio pcre ${LIBJEMALLOC}
${LIBWRAP} ${LIBCRYPT} ${LIBDL} ${CMAKE_THREAD_LIBS_INIT}
+ ${WSREP_LIB}
${SSL_LIBRARIES})
IF(WIN32)
diff --git a/sql/event_data_objects.cc b/sql/event_data_objects.cc
index 09256a34853..e7bdc42b2e6 100644
--- a/sql/event_data_objects.cc
+++ b/sql/event_data_objects.cc
@@ -1472,8 +1472,25 @@ end:
bool save_tx_read_only= thd->tx_read_only;
thd->tx_read_only= false;
+#ifdef WITH_WSREP
+ if (WSREP(thd)) {
+ // sql_print_information("sizeof(LEX) = %d", sizeof(struct LEX));
+ // sizeof(LEX) = 4512, so it's relatively safe to allocate it on stack.
+ LEX lex;
+ LEX* saved = thd->lex;
+ lex.sql_command = SQLCOM_DROP_EVENT;
+ thd->lex = &lex;
+ WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL);
+ thd->lex = saved;
+ }
+#endif
+
ret= Events::drop_event(thd, dbname, name, FALSE);
+#ifdef WITH_WSREP
+ WSREP_TO_ISOLATION_END;
+ error:
+#endif
thd->tx_read_only= save_tx_read_only;
thd->security_ctx->master_access= saved_master_access;
}
diff --git a/sql/events.cc b/sql/events.cc
index 8d78497a29e..0be9ce4e6f5 100644
--- a/sql/events.cc
+++ b/sql/events.cc
@@ -1145,7 +1145,20 @@ Events::load_events_from_db(THD *thd)
delete et;
goto end;
}
-
+#ifdef WITH_WSREP
+ // when SST from master node who initials event, the event status is ENABLED
+ // this is problematic because there are two nodes with same events and both enabled.
+ if (et->originator != (longlong) thd->variables.server_id)
+ {
+ store_record(table, record[1]);
+ table->field[ET_FIELD_STATUS]->
+ store((longlong) Event_parse_data::SLAVESIDE_DISABLED,
+ TRUE);
+ (void) table->file->ha_update_row(table->record[1], table->record[0]);
+ delete et;
+ continue;
+ }
+#endif
/**
Since the Event_queue_element object could be deleted inside
Event_queue::create_event we should save the value of dropped flag
@@ -1194,6 +1207,48 @@ end:
DBUG_RETURN(ret);
}
+#ifdef WITH_WSREP
+int wsrep_create_event_query(THD *thd, uchar** buf, size_t* buf_len)
+{
+ String log_query;
+
+ if (create_query_string(thd, &log_query))
+ {
+ WSREP_WARN("events create string failed: schema: %s, query: %s",
+ (thd->db ? thd->db : "(null)"), thd->query());
+ return 1;
+ }
+ return wsrep_to_buf_helper(thd, log_query.ptr(), log_query.length(), buf, buf_len);
+}
+static int
+wsrep_alter_query_string(THD *thd, String *buf)
+{
+ /* Append the "ALTER" part of the query */
+ if (buf->append(STRING_WITH_LEN("ALTER ")))
+ return 1;
+ /* Append definer */
+ append_definer(thd, buf, &(thd->lex->definer->user), &(thd->lex->definer->host));
+ /* Append the left part of thd->query after event name part */
+ if (buf->append(thd->lex->stmt_definition_begin,
+ thd->lex->stmt_definition_end -
+ thd->lex->stmt_definition_begin))
+ return 1;
+
+ return 0;
+}
+int wsrep_alter_event_query(THD *thd, uchar** buf, size_t* buf_len)
+{
+ String log_query;
+
+ if (wsrep_alter_query_string(thd, &log_query))
+ {
+ WSREP_WARN("events alter string failed: schema: %s, query: %s",
+ (thd->db ? thd->db : "(null)"), thd->query());
+ return 1;
+ }
+ return wsrep_to_buf_helper(thd, log_query.ptr(), log_query.length(), buf, buf_len);
+}
+#endif /* WITH_WSREP */
/**
@} (End of group Event_Scheduler)
*/
diff --git a/sql/ha_partition.cc b/sql/ha_partition.cc
index 0824fc9b76b..4441e975f93 100644
--- a/sql/ha_partition.cc
+++ b/sql/ha_partition.cc
@@ -387,7 +387,13 @@ const char *ha_partition::table_type() const
// we can do this since we only support a single engine type
return m_file[0]->table_type();
}
-
+#ifdef WITH_WSREP
+int ha_partition::wsrep_db_type() const
+{
+ // we can do this since we only support a single engine type
+ return ha_legacy_type(m_file[0]->ht);
+}
+#endif /* WITH_WSREP */
/*
Destructor method
diff --git a/sql/ha_partition.h b/sql/ha_partition.h
index 71ae84b06a0..33a1a8ab43b 100644
--- a/sql/ha_partition.h
+++ b/sql/ha_partition.h
@@ -1282,7 +1282,9 @@ public:
DBUG_ASSERT(h == m_file[i]->ht);
return h;
}
-
+#ifdef WITH_WSREP
+ virtual int wsrep_db_type() const;
+#endif /* WITH_WSREP */
friend int cmp_key_rowid_part_id(void *ptr, uchar *ref1, uchar *ref2);
};
diff --git a/sql/handler.cc b/sql/handler.cc
index 2696d69bfcf..19f648219ac 100644
--- a/sql/handler.cc
+++ b/sql/handler.cc
@@ -51,6 +51,10 @@
#include "../storage/maria/ha_maria.h"
#endif
+#ifdef WITH_WSREP
+#include "wsrep_mysqld.h"
+#include "wsrep_xid.h"
+#endif
/*
While we have legacy_db_type, we have this array to
check for dups and to find handlerton from legacy_db_type.
@@ -1157,10 +1161,27 @@ int ha_prepare(THD *thd)
{
if ((err= ht->prepare(ht, thd, all)))
{
+#ifdef WITH_WSREP
+ if (WSREP(thd) && ht->db_type== DB_TYPE_WSREP)
+ {
+ error= 1;
+ /* avoid sending error, if we need to replay */
+ if (thd->wsrep_conflict_state!= MUST_REPLAY)
+ {
+ my_error(ER_LOCK_DEADLOCK, MYF(0), err);
+ }
+ }
+ else
+ {
+ /* not wsrep hton, bail to native mysql behavior */
+#endif
my_error(ER_ERROR_DURING_COMMIT, MYF(0), err);
ha_rollback_trans(thd, all);
error=1;
break;
+#ifdef WITH_WSREP
+ }
+#endif
}
}
else
@@ -1356,7 +1377,12 @@ int ha_commit_trans(THD *thd, bool all)
mdl_request.init(MDL_key::COMMIT, "", "", MDL_INTENTION_EXCLUSIVE,
MDL_EXPLICIT);
+#ifdef WITH_WSREP
+ if (!WSREP(thd) &&
+ thd->mdl_context.acquire_lock(&mdl_request,
+#else
if (thd->mdl_context.acquire_lock(&mdl_request,
+#endif /* WITH_WSREP */
thd->variables.lock_wait_timeout))
{
ha_rollback_trans(thd, all);
@@ -1403,7 +1429,33 @@ int ha_commit_trans(THD *thd, bool all)
err= ht->prepare(ht, thd, all);
status_var_increment(thd->status_var.ha_prepare_count);
if (err)
- my_error(ER_ERROR_DURING_COMMIT, MYF(0), err);
+#ifdef WITH_WSREP
+ {
+ if (WSREP(thd) && ht->db_type== DB_TYPE_WSREP)
+ {
+ error= 1;
+ switch (err)
+ {
+ case WSREP_TRX_SIZE_EXCEEDED:
+ /* give user size exeeded erro from wsrep_api.h */
+ my_error(ER_ERROR_DURING_COMMIT, MYF(0), WSREP_SIZE_EXCEEDED);
+ break;
+ case WSREP_TRX_CERT_FAIL:
+ case WSREP_TRX_ERROR:
+ /* avoid sending error, if we need to replay */
+ if (thd->wsrep_conflict_state!= MUST_REPLAY)
+ {
+ my_error(ER_LOCK_DEADLOCK, MYF(0), err);
+ }
+ }
+ }
+ else
+ /* not wsrep hton, bail to native mysql behavior */
+#endif /* WITH_WSREP */
+ my_error(ER_ERROR_DURING_COMMIT, MYF(0), err);
+#ifdef WITH_WSREP
+ }
+#endif /* WITH_WSREP */
if (err)
goto err;
@@ -1414,6 +1466,13 @@ int ha_commit_trans(THD *thd, bool all)
DEBUG_SYNC(thd, "ha_commit_trans_after_prepare");
DBUG_EXECUTE_IF("crash_commit_after_prepare", DBUG_SUICIDE(););
+#ifdef WITH_WSREP
+ if (!error && wsrep_is_wsrep_xid(&thd->transaction.xid_state.xid))
+ {
+ // xid was rewritten by wsrep
+ xid= wsrep_xid_seqno(thd->transaction.xid_state.xid);
+ }
+#endif // WITH_WSREP
if (!is_real_trans)
{
error= commit_one_phase_2(thd, all, trans, is_real_trans);
@@ -1818,7 +1877,13 @@ static my_bool xarecover_handlerton(THD *unused, plugin_ref plugin,
got, hton_name(hton)->str);
for (int i=0; i < got; i ++)
{
+#ifdef WITH_WSREP
+ my_xid x=(wsrep_is_wsrep_xid(&info->list[i]) ?
+ wsrep_xid_seqno(info->list[i]) :
+ info->list[i].get_my_xid());
+#else
my_xid x=info->list[i].get_my_xid();
+#endif /* WITH_WSREP */
if (!x) // not "mine" - that is generated by external TM
{
#ifndef DBUG_OFF
@@ -3133,7 +3198,12 @@ int handler::update_auto_increment()
variables->auto_increment_increment);
auto_inc_intervals_count++;
/* Row-based replication does not need to store intervals in binlog */
+#ifdef WITH_WSREP
+ if (((WSREP(thd) && wsrep_emulate_bin_log) || mysql_bin_log.is_open()) &&
+ !thd->is_current_stmt_binlog_format_row())
+#else
if (mysql_bin_log.is_open() && !thd->is_current_stmt_binlog_format_row())
+#endif /* WITH_WSREP */
thd->auto_inc_intervals_in_cur_stmt_for_binlog.append(auto_inc_interval_for_cur_row.minimum(),
auto_inc_interval_for_cur_row.values(),
variables->auto_increment_increment);
@@ -5758,7 +5828,13 @@ static bool check_table_binlog_row_based(THD *thd, TABLE *table)
return (thd->is_current_stmt_binlog_format_row() &&
table->s->cached_row_logging_check &&
(thd->variables.option_bits & OPTION_BIN_LOG) &&
+#ifdef WITH_WSREP
+ /* applier and replayer should not binlog */
+ ((WSREP_EMULATE_BINLOG(thd) && (thd->wsrep_exec_mode != REPL_RECV)) ||
+ mysql_bin_log.is_open()));
+#else
mysql_bin_log.is_open());
+#endif
}
@@ -5858,6 +5934,30 @@ static int binlog_log_row(TABLE* table,
bool error= 0;
THD *const thd= table->in_use;
+#ifdef WITH_WSREP
+ /* only InnoDB tables will be replicated through binlog emulation */
+ if (WSREP_EMULATE_BINLOG(thd) &&
+ table->file->ht->db_type != DB_TYPE_INNODB &&
+ !(table->file->ht->db_type == DB_TYPE_PARTITION_DB &&
+ (((ha_partition*)(table->file))->wsrep_db_type() == DB_TYPE_INNODB)))
+ {
+ return 0;
+ }
+
+ /* enforce wsrep_max_ws_rows */
+ if (table->s->tmp_table == NO_TMP_TABLE && WSREP(thd))
+ {
+ thd->wsrep_affected_rows++;
+ if (wsrep_max_ws_rows &&
+ thd->wsrep_exec_mode != REPL_RECV &&
+ thd->wsrep_affected_rows > wsrep_max_ws_rows)
+ {
+ trans_rollback_stmt(thd) || trans_rollback(thd);
+ my_message(ER_ERROR_DURING_COMMIT, "wsrep_max_ws_rows exceeded", MYF(0));
+ return ER_ERROR_DURING_COMMIT;
+ }
+ }
+#endif /* WITH_WSREP */
if (check_table_binlog_row_based(thd, table))
{
MY_BITMAP cols;
@@ -6189,6 +6289,64 @@ void handler::set_lock_type(enum thr_lock_type lock)
table->reginfo.lock_type= lock;
}
+#ifdef WITH_WSREP
+/**
+ @details
+ This function makes the storage engine to force the victim transaction
+ to abort. Currently, only innodb has this functionality, but any SE
+ implementing the wsrep API should provide this service to support
+ multi-master operation.
+
+ @param bf_thd brute force THD asking for the abort
+ @param victim_thd victim THD to be aborted
+
+ @return
+ always 0
+*/
+
+int ha_wsrep_abort_transaction(THD *bf_thd, THD *victim_thd, my_bool signal)
+{
+ DBUG_ENTER("ha_wsrep_abort_transaction");
+ if (!WSREP(bf_thd) &&
+ !(bf_thd->variables.wsrep_OSU_method == WSREP_OSU_RSU &&
+ bf_thd->wsrep_exec_mode == TOTAL_ORDER)) {
+ DBUG_RETURN(0);
+ }
+
+ handlerton *hton= installed_htons[DB_TYPE_INNODB];
+ if (hton && hton->wsrep_abort_transaction)
+ {
+ hton->wsrep_abort_transaction(hton, bf_thd, victim_thd, signal);
+ }
+ else
+ {
+ WSREP_WARN("cannot abort InnoDB transaction");
+ }
+
+ DBUG_RETURN(0);
+}
+
+void ha_wsrep_fake_trx_id(THD *thd)
+{
+ DBUG_ENTER("ha_wsrep_fake_trx_id");
+ if (!WSREP(thd))
+ {
+ DBUG_VOID_RETURN;
+ }
+
+ handlerton *hton= installed_htons[DB_TYPE_INNODB];
+ if (hton && hton->wsrep_fake_trx_id)
+ {
+ hton->wsrep_fake_trx_id(hton, thd);
+ }
+ else
+ {
+ WSREP_WARN("cannot get fake InnoDB transaction ID");
+ }
+
+ DBUG_VOID_RETURN;
+}
+#endif /* WITH_WSREP */
#ifdef TRANS_LOG_MGM_EXAMPLE_CODE
/*
Example of transaction log management functions based on assumption that logs
diff --git a/sql/handler.h b/sql/handler.h
index 772f2e68dab..51b301ae22e 100644
--- a/sql/handler.h
+++ b/sql/handler.h
@@ -441,6 +441,7 @@ enum legacy_db_type
DB_TYPE_BINLOG=21,
DB_TYPE_PBXT=23,
DB_TYPE_PERFORMANCE_SCHEMA=28,
+ DB_TYPE_WSREP=41,
DB_TYPE_ARIA=42,
DB_TYPE_TOKUDB=43,
DB_TYPE_FIRST_DYNAMIC=44,
@@ -1234,6 +1235,7 @@ struct handlerton
enum handler_create_iterator_result
(*create_iterator)(handlerton *hton, enum handler_iterator_type type,
struct handler_iterator *fill_this_in);
+
/*
Optional clauses in the CREATE/ALTER TABLE
*/
@@ -1346,6 +1348,14 @@ struct handlerton
*/
int (*discover_table_structure)(handlerton *hton, THD* thd,
TABLE_SHARE *share, HA_CREATE_INFO *info);
+
+#ifdef WITH_WSREP
+ int (*wsrep_abort_transaction)(handlerton *hton, THD *bf_thd,
+ THD *victim_thd, my_bool signal);
+ int (*wsrep_set_checkpoint)(handlerton *hton, const XID* xid);
+ int (*wsrep_get_checkpoint)(handlerton *hton, XID* xid);
+ void (*wsrep_fake_trx_id)(handlerton *hton, THD *thd);
+#endif /* WITH_WSREP */
};
@@ -3997,6 +4007,9 @@ bool key_uses_partial_cols(TABLE_SHARE *table, uint keyno);
extern const char *ha_row_type[];
extern MYSQL_PLUGIN_IMPORT const char *tx_isolation_names[];
extern MYSQL_PLUGIN_IMPORT const char *binlog_format_names[];
+#ifdef WITH_WSREP
+extern MYSQL_PLUGIN_IMPORT const char *wsrep_binlog_format_names[];
+#endif /* WITH_WSREP */
extern TYPELIB tx_isolation_typelib;
extern const char *myisam_stats_method_names[];
extern ulong total_ha, total_ha_2pc;
@@ -4116,6 +4129,10 @@ int ha_rollback_to_savepoint(THD *thd, SAVEPOINT *sv);
bool ha_rollback_to_savepoint_can_release_mdl(THD *thd);
int ha_savepoint(THD *thd, SAVEPOINT *sv);
int ha_release_savepoint(THD *thd, SAVEPOINT *sv);
+#ifdef WITH_WSREP
+int ha_wsrep_abort_transaction(THD *bf_thd, THD *victim_thd, my_bool signal);
+void ha_wsrep_fake_trx_id(THD *thd);
+#endif /* WITH_WSREP */
/* these are called by storage engines */
void trans_register_ha(THD *thd, bool all, handlerton *ht);
@@ -4146,6 +4163,9 @@ int ha_binlog_end(THD *thd);
#define ha_binlog_wait(a) do {} while (0)
#define ha_binlog_end(a) do {} while (0)
#endif
+#ifdef WITH_WSREP
+void wsrep_brute_force_aborts();
+#endif
const char *get_canonical_filename(handler *file, const char *path,
char *tmp_path);
diff --git a/sql/item_func.cc b/sql/item_func.cc
index 0ca8f700bfc..7e4ddb7427c 100644
--- a/sql/item_func.cc
+++ b/sql/item_func.cc
@@ -2759,7 +2759,19 @@ void Item_func_rand::seed_random(Item *arg)
TODO: do not do reinit 'rand' for every execute of PS/SP if
args[0] is a constant.
*/
+#ifdef WITH_WSREP
+ uint32 tmp;
+ if (WSREP(current_thd))
+ {
+ if (current_thd->wsrep_exec_mode==REPL_RECV)
+ tmp= current_thd->wsrep_rand;
+ else
+ tmp= current_thd->wsrep_rand= (uint32) arg->val_int();
+ } else
+ tmp= (uint32) arg->val_int();
+#else
uint32 tmp= (uint32) arg->val_int();
+#endif /* WITH_WSREP */
my_rnd_init(rand, (uint32) (tmp*0x10001L+55555555L),
(uint32) (tmp*0x10000001L));
}
diff --git a/sql/keycaches.cc b/sql/keycaches.cc
index 120aa7e1029..a559c99bbd8 100644
--- a/sql/keycaches.cc
+++ b/sql/keycaches.cc
@@ -223,6 +223,7 @@ Rpl_filter *get_or_create_rpl_filter(const char *name, uint length)
void free_rpl_filter(const char *name, Rpl_filter *filter)
{
delete filter;
+ filter= 0;
}
void free_all_rpl_filters()
diff --git a/sql/lock.cc b/sql/lock.cc
index 3354da2640b..965f7dcab99 100644
--- a/sql/lock.cc
+++ b/sql/lock.cc
@@ -83,6 +83,10 @@
#include "sql_acl.h" // SUPER_ACL
#include <hash.h>
+#ifdef WITH_WSREP
+#include "wsrep_mysqld.h"
+#endif /* WITH_WSREP */
+
/**
@defgroup Locking Locking
@{
@@ -313,6 +317,10 @@ bool mysql_lock_tables(THD *thd, MYSQL_LOCK *sql_lock, uint flags)
/* Copy the lock data array. thr_multi_lock() reorders its contents. */
memmove(sql_lock->locks + sql_lock->lock_count, sql_lock->locks,
sql_lock->lock_count * sizeof(*sql_lock->locks));
+#ifdef WITH_WSREP
+ thd->lock_info.in_lock_tables= thd->in_lock_tables;
+#endif
+
/* Lock on the copied half of the lock data array. */
rc= thr_lock_errno_to_mysql[(int) thr_multi_lock(sql_lock->locks +
sql_lock->lock_count,
@@ -323,6 +331,11 @@ bool mysql_lock_tables(THD *thd, MYSQL_LOCK *sql_lock, uint flags)
end:
THD_STAGE_INFO(thd, org_stage);
+#ifdef WITH_WSREP
+ thd_proc_info(thd, "mysql_lock_tables(): unlocking tables II");
+#else /* WITH_WSREP */
+ thd_proc_info(thd, 0);
+#endif /* WITH_WSREP */
if (thd->killed)
{
@@ -335,6 +348,9 @@ end:
my_error(rc, MYF(0));
thd->set_time_after_lock();
+#ifdef WITH_WSREP
+ thd_proc_info(thd, "exit mysqld_lock_tables()");
+#endif /* WITH_WSREP */
DBUG_RETURN(rc);
}
@@ -1024,11 +1040,29 @@ void Global_read_lock::unlock_global_read_lock(THD *thd)
{
thd->mdl_context.release_lock(m_mdl_blocks_commits_lock);
m_mdl_blocks_commits_lock= NULL;
+#ifdef WITH_WSREP
+ if (WSREP(thd) || wsrep_node_is_donor())
+ {
+ wsrep_locked_seqno= WSREP_SEQNO_UNDEFINED;
+ wsrep->resume(wsrep);
+ /* resync here only if we did implicit desync earlier */
+ if (!wsrep_desync && wsrep_node_is_synced())
+ {
+ int ret = wsrep->resync(wsrep);
+ if (ret != WSREP_OK)
+ {
+ WSREP_WARN("resync failed %d for FTWRL: db: %s, query: %s", ret,
+ (thd->db ? thd->db : "(null)"), thd->query());
+ DBUG_VOID_RETURN;
+ }
+ }
+ }
+#endif /* WITH_WSREP */
}
thd->mdl_context.release_lock(m_mdl_global_shared_lock);
m_mdl_global_shared_lock= NULL;
m_state= GRL_NONE;
-
+
DBUG_VOID_RETURN;
}
@@ -1056,6 +1090,16 @@ bool Global_read_lock::make_global_read_lock_block_commit(THD *thd)
If we didn't succeed lock_global_read_lock(), or if we already suceeded
make_global_read_lock_block_commit(), do nothing.
*/
+
+#ifdef WITH_WSREP
+ if (WSREP(thd) && m_mdl_blocks_commits_lock)
+ {
+ WSREP_DEBUG("GRL was in block commit mode when entering "
+ "make_global_read_lock_block_commit");
+ DBUG_RETURN(FALSE);
+ }
+#endif /* WITH_WSREP */
+
if (m_state != GRL_ACQUIRED)
DBUG_RETURN(0);
@@ -1068,6 +1112,53 @@ bool Global_read_lock::make_global_read_lock_block_commit(THD *thd)
m_mdl_blocks_commits_lock= mdl_request.ticket;
m_state= GRL_ACQUIRED_AND_BLOCKS_COMMIT;
+#ifdef WITH_WSREP
+ /* Native threads should bail out before wsrep oprations to follow.
+ Donor servicing thread is an exception, it should pause provider but not desync,
+ as it is already desynced in donor state
+ */
+ if (!WSREP(thd) && !wsrep_node_is_donor())
+ {
+ DBUG_RETURN(FALSE);
+ }
+
+ /* if already desynced or donor, avoid double desyncing
+ if not in PC and synced, desyncing is not possible either
+ */
+ if (wsrep_desync || !wsrep_node_is_synced())
+ {
+ WSREP_DEBUG("desync set upfont, skipping implicit desync for FTWRL: %d",
+ wsrep_desync);
+ }
+ else
+ {
+ int rcode;
+ WSREP_DEBUG("running implicit desync for node");
+ rcode = wsrep->desync(wsrep);
+ if (rcode != WSREP_OK)
+ {
+ WSREP_WARN("FTWRL desync failed %d for schema: %s, query: %s",
+ rcode, (thd->db ? thd->db : "(null)"), thd->query());
+ my_message(ER_LOCK_DEADLOCK, "wsrep desync failed for FTWRL", MYF(0));
+ DBUG_RETURN(TRUE);
+ }
+ }
+
+ long long ret = wsrep->pause(wsrep);
+ if (ret >= 0)
+ {
+ wsrep_locked_seqno= ret;
+ }
+ else if (ret != -ENOSYS) /* -ENOSYS - no provider */
+ {
+ WSREP_ERROR("Failed to pause provider: %lld (%s)", -ret, strerror(-ret));
+
+ /* m_mdl_blocks_commits_lock is always NULL here */
+ wsrep_locked_seqno= WSREP_SEQNO_UNDEFINED;
+ my_error(ER_LOCK_DEADLOCK, MYF(0));
+ DBUG_RETURN(TRUE);
+ }
+#endif /* WITH_WSREP */
DBUG_RETURN(FALSE);
}
diff --git a/sql/log.cc b/sql/log.cc
index ee92f22adb8..c1c6a4758d7 100644
--- a/sql/log.cc
+++ b/sql/log.cc
@@ -52,6 +52,9 @@
#include "sql_plugin.h"
#include "rpl_handler.h"
+#ifdef WITH_WSREP
+#include "wsrep_mysqld.h"
+#endif /* WITH_WSREP */
#include "debug_sync.h"
#include "sql_show.h"
#include "my_pthread.h"
@@ -524,6 +527,9 @@ private:
};
handlerton *binlog_hton;
+#ifdef WITH_WSREP
+extern handlerton *wsrep_hton;
+#endif
bool LOGGER::is_log_table_enabled(uint log_table_type)
{
@@ -538,6 +544,66 @@ bool LOGGER::is_log_table_enabled(uint log_table_type)
}
}
+#ifdef WITH_WSREP
+IO_CACHE * get_trans_log(THD * thd)
+{
+ binlog_cache_mngr *cache_mngr = (binlog_cache_mngr*)
+ thd_get_ha_data(thd, binlog_hton);
+ if (cache_mngr)
+ {
+ return cache_mngr->get_binlog_cache_log(true);
+ }
+ else
+ {
+ WSREP_DEBUG("binlog cache not initialized, conn :%ld", thd->thread_id);
+ return NULL;
+ }
+}
+
+
+bool wsrep_trans_cache_is_empty(THD *thd)
+{
+ binlog_cache_mngr *const cache_mngr=
+ (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton);
+ return (!cache_mngr || cache_mngr->trx_cache.empty());
+}
+
+void thd_binlog_flush_pending_rows_event(THD *thd, bool stmt_end)
+{
+ thd->binlog_flush_pending_rows_event(stmt_end);
+}
+void thd_binlog_trx_reset(THD * thd)
+{
+ /*
+ todo: fix autocommit select to not call the caller
+ */
+ if (thd_get_ha_data(thd, binlog_hton) != NULL)
+ {
+ binlog_cache_mngr *const cache_mngr=
+ (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton);
+ if (cache_mngr)
+ {
+ cache_mngr->reset(false, true);
+ if (!cache_mngr->stmt_cache.empty())
+ {
+ WSREP_DEBUG("pending events in stmt cache, sql: %s", thd->query());
+ cache_mngr->stmt_cache.reset();
+ }
+ }
+ }
+ thd->clear_binlog_table_maps();
+}
+
+void thd_binlog_rollback_stmt(THD * thd)
+{
+ WSREP_DEBUG("thd_binlog_rollback_stmt :%ld", thd->thread_id);
+ binlog_cache_mngr *const cache_mngr=
+ (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton);
+ if (cache_mngr) cache_mngr->trx_cache.set_prev_position(MY_OFF_T_UNDEF);
+}
+
+#endif
+
/**
Check if a given table is opened log table
@@ -1589,7 +1655,11 @@ binlog_trans_log_savepos(THD *thd, my_off_t *pos)
DBUG_ENTER("binlog_trans_log_savepos");
DBUG_ASSERT(pos != NULL);
binlog_cache_mngr *const cache_mngr= thd->binlog_setup_trx_data();
+#ifdef WITH_WSREP
+ DBUG_ASSERT((WSREP(thd) && wsrep_emulate_bin_log) || mysql_bin_log.is_open());
+#else
DBUG_ASSERT(mysql_bin_log.is_open());
+#endif
*pos= cache_mngr->trx_cache.get_byte_position();
DBUG_PRINT("return", ("*pos: %lu", (ulong) *pos));
DBUG_VOID_RETURN;
@@ -1637,7 +1707,16 @@ binlog_trans_log_truncate(THD *thd, my_off_t pos)
int binlog_init(void *p)
{
binlog_hton= (handlerton *)p;
+#ifdef WITH_WSREP
+ if (WSREP_ON)
+ binlog_hton->state= SHOW_OPTION_YES;
+ else
+ {
+#endif /* WITH_WSREP */
binlog_hton->state=opt_bin_log ? SHOW_OPTION_YES : SHOW_OPTION_NO;
+#ifdef WITH_WSREP
+ }
+#endif /* WITH_WSREP */
binlog_hton->db_type=DB_TYPE_BINLOG;
binlog_hton->savepoint_offset= sizeof(my_off_t);
binlog_hton->close_connection= binlog_close_connection;
@@ -1653,15 +1732,36 @@ int binlog_init(void *p)
return 0;
}
+#ifdef WITH_WSREP
+#include "wsrep_binlog.h"
+#endif /* WITH_WSREP */
static int binlog_close_connection(handlerton *hton, THD *thd)
{
+ DBUG_ENTER("binlog_close_connection");
binlog_cache_mngr *const cache_mngr=
(binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton);
+#ifdef WITH_WSREP
+ if (cache_mngr && !cache_mngr->trx_cache.empty()) {
+ IO_CACHE* cache= get_trans_log(thd);
+ uchar *buf;
+ size_t len=0;
+ wsrep_write_cache_buf(cache, &buf, &len);
+ WSREP_WARN("binlog trx cache not empty (%lu bytes) @ connection close %lu",
+ len, thd->thread_id);
+ if (len > 0) wsrep_dump_rbr_buf(thd, buf, len);
+
+ cache = cache_mngr->get_binlog_cache_log(false);
+ wsrep_write_cache_buf(cache, &buf, &len);
+ WSREP_WARN("binlog stmt cache not empty (%lu bytes) @ connection close %lu",
+ len, thd->thread_id);
+ if (len > 0) wsrep_dump_rbr_buf(thd, buf, len);
+ }
+#endif /* WITH_WSREP */
DBUG_ASSERT(cache_mngr->trx_cache.empty() && cache_mngr->stmt_cache.empty());
thd_set_ha_data(thd, binlog_hton, NULL);
cache_mngr->~binlog_cache_mngr();
my_free(cache_mngr);
- return 0;
+ DBUG_RETURN(0);
}
/*
@@ -1757,6 +1857,14 @@ binlog_commit_flush_stmt_cache(THD *thd, bool all,
binlog_cache_mngr *cache_mngr)
{
DBUG_ENTER("binlog_commit_flush_stmt_cache");
+#ifdef WITH_WSREP
+ if (thd->wsrep_mysql_replicated > 0)
+ {
+ WSREP_DEBUG("avoiding binlog_commit_flush_trx_cache: %d", thd->wsrep_mysql_replicated);
+ return 0;
+ }
+#endif
+
Query_log_event end_evt(thd, STRING_WITH_LEN("COMMIT"),
FALSE, TRUE, TRUE, 0);
DBUG_RETURN(binlog_flush_cache(thd, cache_mngr, &end_evt, all, TRUE, FALSE));
@@ -1912,12 +2020,12 @@ static bool trans_cannot_safely_rollback(THD *thd, bool all)
return ((thd->variables.option_bits & OPTION_KEEP_LOG) ||
(trans_has_updated_non_trans_table(thd) &&
- thd->variables.binlog_format == BINLOG_FORMAT_STMT) ||
+ WSREP_FORMAT(thd->variables.binlog_format) == BINLOG_FORMAT_STMT) ||
(cache_mngr->trx_cache.changes_to_non_trans_temp_table() &&
- thd->variables.binlog_format == BINLOG_FORMAT_MIXED) ||
+ WSREP_FORMAT(thd->variables.binlog_format) == BINLOG_FORMAT_MIXED) ||
(trans_has_updated_non_trans_table(thd) &&
ending_single_stmt_trans(thd,all) &&
- thd->variables.binlog_format == BINLOG_FORMAT_MIXED));
+ WSREP_FORMAT(thd->variables.binlog_format) == BINLOG_FORMAT_MIXED));
}
@@ -1939,6 +2047,9 @@ static int binlog_commit(handlerton *hton, THD *thd, bool all)
DBUG_ENTER("binlog_commit");
binlog_cache_mngr *const cache_mngr=
(binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton);
+#ifdef WITH_WSREP
+ if (!cache_mngr) DBUG_RETURN(0);
+#endif /* WITH_WSREP */
DBUG_PRINT("debug",
("all: %d, in_transaction: %s, all.modified_non_trans_table: %s, stmt.modified_non_trans_table: %s",
@@ -1995,6 +2106,9 @@ static int binlog_rollback(handlerton *hton, THD *thd, bool all)
int error= 0;
binlog_cache_mngr *const cache_mngr=
(binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton);
+#ifdef WITH_WSREP
+ if (!cache_mngr) DBUG_RETURN(0);
+#endif /* WITH_WSREP */
DBUG_PRINT("debug", ("all: %s, all.modified_non_trans_table: %s, stmt.modified_non_trans_table: %s",
YESNO(all),
@@ -2023,8 +2137,12 @@ static int binlog_rollback(handlerton *hton, THD *thd, bool all)
cache_mngr->reset(false, true);
DBUG_RETURN(error);
}
-
+#ifdef WITH_WSREP
+ if (!wsrep_emulate_bin_log &&
+ mysql_bin_log.check_write_error(thd))
+#else
if (mysql_bin_log.check_write_error(thd))
+#endif
{
/*
"all == true" means that a "rollback statement" triggered the error and
@@ -2040,7 +2158,7 @@ static int binlog_rollback(handlerton *hton, THD *thd, bool all)
error |= binlog_truncate_trx_cache(thd, cache_mngr, all);
}
else if (!error)
- {
+ {
if (ending_trans(thd, all) && trans_cannot_safely_rollback(thd, all))
error= binlog_rollback_flush_trx_cache(thd, all, cache_mngr);
/*
@@ -2055,9 +2173,9 @@ static int binlog_rollback(handlerton *hton, THD *thd, bool all)
else if (ending_trans(thd, all) ||
(!(thd->variables.option_bits & OPTION_KEEP_LOG) &&
(!stmt_has_updated_non_trans_table(thd) ||
- thd->variables.binlog_format != BINLOG_FORMAT_STMT) &&
+ WSREP_FORMAT(thd->variables.binlog_format) != BINLOG_FORMAT_STMT) &&
(!cache_mngr->trx_cache.changes_to_non_trans_temp_table() ||
- thd->variables.binlog_format != BINLOG_FORMAT_MIXED)))
+ WSREP_FORMAT(thd->variables.binlog_format) != BINLOG_FORMAT_MIXED)))
error= binlog_truncate_trx_cache(thd, cache_mngr, all);
}
@@ -2164,8 +2282,15 @@ static int binlog_savepoint_set(handlerton *hton, THD *thd, void *sv)
{
DBUG_ENTER("binlog_savepoint_set");
int error= 1;
-
char buf[1024];
+#ifdef WITH_WSREP
+ if (wsrep_emulate_bin_log) DBUG_RETURN(0);
+ /*
+ Clear table maps before writing SAVEPOINT event. This enforces
+ recreation of table map events for the following row event.
+ */
+ thd->clear_binlog_table_maps();
+#endif /* WITH_WSREP */
String log_query(buf, sizeof(buf), &my_charset_bin);
if (log_query.copy(STRING_WITH_LEN("SAVEPOINT "), &my_charset_bin) ||
append_identifier(thd, &log_query,
@@ -2202,7 +2327,12 @@ static int binlog_savepoint_rollback(handlerton *hton, THD *thd, void *sv)
non-transactional table. Otherwise, truncate the binlog cache starting
from the SAVEPOINT command.
*/
+#ifdef WITH_WSREP
+ if (!wsrep_emulate_bin_log &&
+ unlikely(trans_has_updated_non_trans_table(thd) ||
+#else
if (unlikely(trans_has_updated_non_trans_table(thd) ||
+#endif
(thd->variables.option_bits & OPTION_KEEP_LOG)))
{
char buf[1024];
@@ -2216,7 +2346,10 @@ static int binlog_savepoint_rollback(handlerton *hton, THD *thd, void *sv)
TRUE, FALSE, TRUE, errcode);
DBUG_RETURN(mysql_bin_log.write(&qinfo));
}
- binlog_trans_log_truncate(thd, *(my_off_t*)sv);
+#ifdef WITH_WSREP
+ if (!wsrep_emulate_bin_log)
+#endif
+ binlog_trans_log_truncate(thd, *(my_off_t*)sv);
DBUG_RETURN(0);
}
@@ -3553,7 +3686,10 @@ bool MYSQL_BIN_LOG::open(const char *log_name,
new_xid_list_entry->binlog_id= current_binlog_id;
/* Remove any initial entries with no pending XIDs. */
while ((b= binlog_xid_count_list.head()) && b->xid_count == 0)
+ {
my_free(binlog_xid_count_list.get());
+ }
+ mysql_cond_broadcast(&COND_xid_list);
binlog_xid_count_list.push_back(new_xid_list_entry);
mysql_mutex_unlock(&LOCK_xid_list);
@@ -4074,6 +4210,7 @@ err:
DBUG_ASSERT(b->xid_count == 0);
my_free(binlog_xid_count_list.get());
}
+ mysql_cond_broadcast(&COND_xid_list);
reset_master_pending--;
mysql_mutex_unlock(&LOCK_xid_list);
}
@@ -4084,6 +4221,26 @@ err:
}
+void MYSQL_BIN_LOG::wait_for_last_checkpoint_event()
+{
+ mysql_mutex_lock(&LOCK_xid_list);
+ for (;;)
+ {
+ if (binlog_xid_count_list.is_last(binlog_xid_count_list.head()))
+ break;
+ mysql_cond_wait(&COND_xid_list, &LOCK_xid_list);
+ }
+ mysql_mutex_unlock(&LOCK_xid_list);
+
+ /*
+ LOCK_xid_list and LOCK_log are chained, so the LOCK_log will only be
+ obtained after mark_xid_done() has written the last checkpoint event.
+ */
+ mysql_mutex_lock(&LOCK_log);
+ mysql_mutex_unlock(&LOCK_log);
+}
+
+
/**
Delete relay log files prior to rli->group_relay_log_name
(i.e. all logs which are not involved in a non-finished group
@@ -5267,6 +5424,7 @@ binlog_cache_mngr *THD::binlog_setup_trx_data()
DBUG_RETURN(cache_mngr);
}
+
/*
Function to start a statement and optionally a transaction for the
binary log.
@@ -5387,7 +5545,12 @@ int THD::binlog_write_table_map(TABLE *table, bool is_transactional,
is_transactional= 1;
/* Pre-conditions */
+#ifdef WITH_WSREP
+ DBUG_ASSERT(is_current_stmt_binlog_format_row() &&
+ (WSREP_EMULATE_BINLOG(this) || mysql_bin_log.is_open()));
+#else
DBUG_ASSERT(is_current_stmt_binlog_format_row() && mysql_bin_log.is_open());
+#endif
DBUG_ASSERT(table->s->table_map_id != ULONG_MAX);
Table_map_log_event
@@ -5527,7 +5690,11 @@ MYSQL_BIN_LOG::flush_and_set_pending_rows_event(THD *thd,
bool is_transactional)
{
DBUG_ENTER("MYSQL_BIN_LOG::flush_and_set_pending_rows_event(event)");
+#ifdef WITH_WSREP
+ DBUG_ASSERT(WSREP_EMULATE_BINLOG(thd) || mysql_bin_log.is_open());
+#else
DBUG_ASSERT(mysql_bin_log.is_open());
+#endif
DBUG_PRINT("enter", ("event: 0x%lx", (long) event));
int error= 0;
@@ -5822,11 +5989,23 @@ bool MYSQL_BIN_LOG::write(Log_event *event_info, my_bool *with_annotate)
binlog_cache_data *cache_data= 0;
bool is_trans_cache= FALSE;
bool using_trans= event_info->use_trans_cache();
- bool direct= event_info->use_direct_logging();
+ bool direct;
ulong prev_binlog_id;
DBUG_ENTER("MYSQL_BIN_LOG::write(Log_event *)");
LINT_INIT(prev_binlog_id);
+#ifdef WITH_WSREP
+ /*
+ When binary logging is not enabled (--log-bin=0), wsrep-patch partially
+ enables it without opening the binlog file (MSQL_BIN_LOG::open().
+ So, avoid writing directly to binlog file.
+ */
+ if (wsrep_emulate_bin_log)
+ direct= false;
+ else
+#endif /* WITH_WSREP */
+ direct= event_info->use_direct_logging();
+
if (thd->variables.option_bits & OPTION_GTID_BEGIN)
{
DBUG_PRINT("info", ("OPTION_GTID_BEGIN was set"));
@@ -5861,7 +6040,13 @@ bool MYSQL_BIN_LOG::write(Log_event *event_info, my_bool *with_annotate)
mostly called if is_open() *was* true a few instructions before, but it
could have changed since.
*/
+#ifdef WITH_WSREP
+ /* applier and replayer can skip writing binlog events */
+ if ((WSREP_EMULATE_BINLOG(thd) && (thd->wsrep_exec_mode != REPL_RECV)) ||
+ is_open())
+#else
if (likely(is_open()))
+#endif
{
my_off_t UNINIT_VAR(my_org_b_tell);
#ifdef HAVE_REPLICATION
@@ -6212,6 +6397,15 @@ int MYSQL_BIN_LOG::rotate(bool force_rotate, bool* check_purge)
{
int error= 0;
DBUG_ENTER("MYSQL_BIN_LOG::rotate");
+#ifdef WITH_WSREP
+ if (WSREP_ON && wsrep_to_isolation)
+ {
+ *check_purge= false;
+ WSREP_DEBUG("avoiding binlog rotate due to TO isolation: %d",
+ wsrep_to_isolation);
+ DBUG_RETURN(0);
+ }
+#endif
//todo: fix the macro def and restore safe_mutex_assert_owner(&LOCK_log);
*check_purge= false;
@@ -6761,6 +6955,13 @@ MYSQL_BIN_LOG::write_transaction_to_binlog(THD *thd,
Ha_trx_info *ha_info;
DBUG_ENTER("MYSQL_BIN_LOG::write_transaction_to_binlog");
+#ifdef WITH_WSREP
+ /*
+ Control should not be allowed beyond this point in wsrep_emulate_bin_log
+ mode.
+ */
+ if (wsrep_emulate_bin_log) DBUG_RETURN(0);
+#endif /* WITH_WSREP */
entry.thd= thd;
entry.cache_mngr= cache_mngr;
entry.error= 0;
@@ -6769,6 +6970,7 @@ MYSQL_BIN_LOG::write_transaction_to_binlog(THD *thd,
entry.using_trx_cache= using_trx_cache;
entry.need_unlog= false;
ha_info= all ? thd->transaction.all.ha_list : thd->transaction.stmt.ha_list;
+
for (; ha_info; ha_info= ha_info->next())
{
if (ha_info->is_started() && ha_info->ht() != binlog_hton &&
@@ -8374,7 +8576,7 @@ ulong tc_log_page_waits= 0;
static const uchar tc_log_magic[]={(uchar) 254, 0x23, 0x05, 0x74};
-ulong opt_tc_log_size= TC_LOG_MIN_SIZE;
+ulong opt_tc_log_size;
ulong tc_log_max_pages_used=0, tc_log_page_size=0, tc_log_cur_pages_used=0;
int TC_LOG_MMAP::open(const char *opt_name)
@@ -8387,7 +8589,6 @@ int TC_LOG_MMAP::open(const char *opt_name)
DBUG_ASSERT(opt_name && opt_name[0]);
tc_log_page_size= my_getpagesize();
- DBUG_ASSERT(TC_LOG_PAGE_SIZE % tc_log_page_size == 0);
fn_format(logname,opt_name,mysql_data_home,"",MY_UNPACK_FILENAME);
if ((fd= mysql_file_open(key_file_tclog, logname, O_RDWR, MYF(0))) < 0)
@@ -8726,6 +8927,7 @@ mmap_do_checkpoint_callback(void *data)
int TC_LOG_MMAP::unlog(ulong cookie, my_xid xid)
{
pending_cookies *full_buffer= NULL;
+ uint32 ncookies= tc_log_page_size / sizeof(my_xid);
DBUG_ASSERT(*(my_xid *)(data+cookie) == xid);
/*
@@ -8739,7 +8941,7 @@ int TC_LOG_MMAP::unlog(ulong cookie, my_xid xid)
mysql_mutex_lock(&LOCK_pending_checkpoint);
if (pending_checkpoint == NULL)
{
- uint32 size= sizeof(*pending_checkpoint);
+ uint32 size= sizeof(*pending_checkpoint) + sizeof(ulong) * (ncookies - 1);
if (!(pending_checkpoint=
(pending_cookies *)my_malloc(size, MYF(MY_ZEROFILL))))
{
@@ -8750,8 +8952,7 @@ int TC_LOG_MMAP::unlog(ulong cookie, my_xid xid)
}
pending_checkpoint->cookies[pending_checkpoint->count++]= cookie;
- if (pending_checkpoint->count == sizeof(pending_checkpoint->cookies) /
- sizeof(pending_checkpoint->cookies[0]))
+ if (pending_checkpoint->count == ncookies)
{
full_buffer= pending_checkpoint;
pending_checkpoint= NULL;
@@ -8785,7 +8986,7 @@ TC_LOG_MMAP::commit_checkpoint_notify(void *cookie)
if (count == 0)
{
uint i;
- for (i= 0; i < sizeof(pending->cookies)/sizeof(pending->cookies[0]); ++i)
+ for (i= 0; i < tc_log_page_size / sizeof(my_xid); ++i)
delete_entry(pending->cookies[i]);
my_free(pending);
}
@@ -8990,7 +9191,14 @@ TC_LOG_BINLOG::log_and_order(THD *thd, my_xid xid, bool all,
binlog_cache_mngr *cache_mngr= thd->binlog_setup_trx_data();
if (!cache_mngr)
+#ifdef WITH_WSREP
+ {
+ WSREP_DEBUG("Skipping empty log_xid: %s", thd->query());
+ DBUG_RETURN(0);
+ }
+#else
DBUG_RETURN(0);
+#endif /* WITH_WSREP */
cache_mngr->using_xa= TRUE;
cache_mngr->xa_xid= xid;
@@ -9092,7 +9300,7 @@ TC_LOG_BINLOG::mark_xid_done(ulong binlog_id, bool write_checkpoint)
*/
if (unlikely(reset_master_pending))
{
- mysql_cond_signal(&COND_xid_list);
+ mysql_cond_broadcast(&COND_xid_list);
mysql_mutex_unlock(&LOCK_xid_list);
DBUG_VOID_RETURN;
}
@@ -9130,8 +9338,7 @@ TC_LOG_BINLOG::mark_xid_done(ulong binlog_id, bool write_checkpoint)
mysql_mutex_lock(&LOCK_log);
mysql_mutex_lock(&LOCK_xid_list);
--mark_xid_done_waiting;
- if (unlikely(reset_master_pending))
- mysql_cond_signal(&COND_xid_list);
+ mysql_cond_broadcast(&COND_xid_list);
/* We need to reload current_binlog_id due to release/re-take of lock. */
current= current_binlog_id;
diff --git a/sql/log.h b/sql/log.h
index 1c5e09c5c09..9eb9f88031d 100644
--- a/sql/log.h
+++ b/sql/log.h
@@ -105,7 +105,7 @@ public:
int log_and_order(THD *thd, my_xid xid, bool all,
bool need_prepare_ordered, bool need_commit_ordered)
{
- DBUG_ASSERT(0 /* Internal error - TC_LOG_DUMMY::log_and_order() called */);
+ //DBUG_ASSERT(0 /* Internal error - TC_LOG_DUMMY::log_and_order() called */);
return 1;
}
int unlog(ulong cookie, my_xid xid) { return 0; }
@@ -113,7 +113,6 @@ public:
};
#define TC_LOG_PAGE_SIZE 8192
-#define TC_LOG_MIN_SIZE (3*TC_LOG_PAGE_SIZE)
#ifdef HAVE_MMAP
class TC_LOG_MMAP: public TC_LOG
@@ -128,7 +127,7 @@ class TC_LOG_MMAP: public TC_LOG
struct pending_cookies {
uint count;
uint pending_count;
- ulong cookies[TC_LOG_PAGE_SIZE/sizeof(my_xid)];
+ ulong cookies[1];
};
private:
@@ -775,6 +774,7 @@ public:
bool need_mutex);
bool reset_logs(THD* thd, bool create_new_log,
rpl_gtid *init_state, uint32 init_state_len);
+ void wait_for_last_checkpoint_event();
void close(uint exiting);
void clear_inuse_flag_when_closing(File file);
@@ -978,12 +978,29 @@ public:
};
enum enum_binlog_format {
+ /*
+ statement-based except for cases where only row-based can work (UUID()
+ etc):
+ */
BINLOG_FORMAT_MIXED= 0, ///< statement if safe, otherwise row - autodetected
BINLOG_FORMAT_STMT= 1, ///< statement-based
BINLOG_FORMAT_ROW= 2, ///< row-based
BINLOG_FORMAT_UNSPEC=3 ///< thd_binlog_format() returns it when binlog is closed
};
+#ifdef WITH_WSREP
+IO_CACHE * get_trans_log(THD * thd);
+bool wsrep_trans_cache_is_empty(THD *thd);
+void thd_binlog_flush_pending_rows_event(THD *thd, bool stmt_end);
+void thd_binlog_trx_reset(THD * thd);
+void thd_binlog_rollback_stmt(THD * thd);
+
+#define WSREP_FORMAT(my_format) \
+ ((wsrep_forced_binlog_format != BINLOG_FORMAT_UNSPEC) ? \
+ wsrep_forced_binlog_format : my_format)
+#else
+#define WSREP_FORMAT(my_format) my_format
+#endif
int query_error_code(THD *thd, bool not_killed);
uint purge_log_get_error_code(int res);
diff --git a/sql/log_event.cc b/sql/log_event.cc
index 43e8df7b801..1ae65a02f15 100644
--- a/sql/log_event.cc
+++ b/sql/log_event.cc
@@ -44,6 +44,9 @@
#include <strfunc.h>
#include "compat56.h"
+#if WITH_WSREP
+#include "wsrep_mysqld.h"
+#endif
#endif /* MYSQL_CLIENT */
#include <base64.h>
@@ -3134,7 +3137,14 @@ Query_log_event::Query_log_event(THD* thd_arg, const char* query_arg,
master_data_written(0)
{
time_t end_time;
-
+#ifdef WITH_WSREP
+ /*
+ If Query_log_event will contain non trans keyword (not BEGIN, COMMIT,
+ SAVEPOINT or ROLLBACK) we disable PA for this transaction.
+ */
+ if (!is_trans_keyword())
+ thd->wsrep_PA_safe= false;
+#endif /* WITH_WSREP */
memset(&user, 0, sizeof(user));
memset(&host, 0, sizeof(host));
@@ -4113,7 +4123,11 @@ int Query_log_event::do_apply_event(rpl_group_info *rgi,
uint64 sub_id= 0;
rpl_gtid gtid;
Relay_log_info const *rli= rgi->rli;
+#ifdef WITH_WSREP
+ Rpl_filter *rpl_filter= (rli->mi) ? rli->mi->rpl_filter: NULL;
+#else
Rpl_filter *rpl_filter= rli->mi->rpl_filter;
+#endif /* WITH_WSREP */
bool current_stmt_is_commit;
DBUG_ENTER("Query_log_event::do_apply_event");
@@ -4602,6 +4616,21 @@ Query_log_event::do_shall_skip(rpl_group_info *rgi)
DBUG_RETURN(Log_event::EVENT_SKIP_COUNT);
}
}
+#ifdef WITH_WSREP
+ else if (wsrep_mysql_replication_bundle && WSREP_ON && thd->wsrep_mysql_replicated > 0 &&
+ (!strncasecmp(query , "BEGIN", 5) || !strncasecmp(query , "COMMIT", 6)))
+ {
+ if (++thd->wsrep_mysql_replicated < (int)wsrep_mysql_replication_bundle)
+ {
+ WSREP_DEBUG("skipping wsrep commit %d", thd->wsrep_mysql_replicated);
+ DBUG_RETURN(Log_event::EVENT_SKIP_IGNORE);
+ }
+ else
+ {
+ thd->wsrep_mysql_replicated = 0;
+ }
+ }
+#endif
DBUG_RETURN(Log_event::do_shall_skip(rgi));
}
@@ -7395,6 +7424,10 @@ int Xid_log_event::do_apply_event(rpl_group_info *rgi)
Record any GTID in the same transaction, so slave state is transactionally
consistent.
*/
+
+ /*Set wsrep_affected_rows = 0 */
+ thd->wsrep_affected_rows= 0;
+
if (rgi->gtid_pending)
{
sub_id= rgi->gtid_sub_id;
@@ -7455,6 +7488,20 @@ Xid_log_event::do_shall_skip(rpl_group_info *rgi)
thd->variables.option_bits&= ~(OPTION_BEGIN | OPTION_GTID_BEGIN);
DBUG_RETURN(Log_event::EVENT_SKIP_COUNT);
}
+#ifdef WITH_WSREP
+ else if (wsrep_mysql_replication_bundle && WSREP_ON)
+ {
+ if (++thd->wsrep_mysql_replicated < (int)wsrep_mysql_replication_bundle)
+ {
+ WSREP_DEBUG("skipping wsrep commit %d", thd->wsrep_mysql_replicated);
+ DBUG_RETURN(Log_event::EVENT_SKIP_IGNORE);
+ }
+ else
+ {
+ thd->wsrep_mysql_replicated = 0;
+ }
+ }
+#endif
DBUG_RETURN(Log_event::do_shall_skip(rgi));
}
#endif /* !MYSQL_CLIENT */
@@ -8477,6 +8524,14 @@ err:
end_io_cache(&file);
if (fd >= 0)
mysql_file_close(fd, MYF(0));
+#ifdef WITH_WSREP
+ if (WSREP(thd))
+ thd_proc_info(thd, "exit Create_file_log_event::do_apply_event()");
+ else
+ thd_proc_info(thd, 0);
+#else /* WITH_WSREP */
+ thd_proc_info(thd, 0);
+#endif /* WITH_WSREP */
return error != 0;
}
#endif /* defined(HAVE_REPLICATION) && !defined(MYSQL_CLIENT) */
@@ -8648,6 +8703,14 @@ int Append_block_log_event::do_apply_event(rpl_group_info *rgi)
err:
if (fd >= 0)
mysql_file_close(fd, MYF(0));
+#ifdef WITH_WSREP
+ if (WSREP(thd))
+ thd_proc_info(thd, "exit Append_block_log_event::do_apply_event()");
+ else
+ thd_proc_info(thd, 0);
+#else /* WITH_WSREP */
+ thd_proc_info(thd, 0);
+#endif /* WITH_WSREP */
DBUG_RETURN(error);
}
#endif
@@ -9734,6 +9797,20 @@ int Rows_log_event::do_apply_event(rpl_group_info *rgi)
if (open_and_lock_tables(thd, rgi->tables_to_lock, FALSE, 0))
{
uint actual_error= thd->get_stmt_da()->sql_errno();
+
+#ifdef WITH_WSREP
+ if (WSREP(thd))
+ {
+ WSREP_WARN("BF applier failed to open_and_lock_tables: %u, fatal: %d "
+ "wsrep = (exec_mode: %d conflict_state: %d seqno: %lld)",
+ thd->get_stmt_da()->sql_errno(),
+ thd->is_fatal_error,
+ thd->wsrep_exec_mode,
+ thd->wsrep_conflict_state,
+ (long long)wsrep_thd_trx_seqno(thd));
+ }
+#endif
+
if ((thd->is_slave_error || thd->is_fatal_error) &&
!is_parallel_retry_error(rgi, actual_error))
{
@@ -9868,7 +9945,18 @@ int Rows_log_event::do_apply_event(rpl_group_info *rgi)
}
#ifdef HAVE_QUERY_CACHE
+#ifdef WITH_WSREP
+ /*
+ Moved invalidation right before the call to rows_event_stmt_cleanup(),
+ to avoid query cache being polluted with stale entries.
+ */
+ if (! (WSREP(thd) && (thd->wsrep_exec_mode == REPL_RECV)))
+ {
+#endif /* WITH_WSREP */
query_cache.invalidate_locked_for_write(thd, rgi->tables_to_lock);
+#ifdef WITH_WSREP
+ }
+#endif /* WITH_WSREP */
#endif
}
@@ -10056,6 +10144,14 @@ int Rows_log_event::do_apply_event(rpl_group_info *rgi)
/* remove trigger's tables */
if (slave_run_triggers_for_rbr)
restore_empty_query_table_list(thd->lex);
+
+#if defined(WITH_WSREP) && defined(HAVE_QUERY_CACHE)
+ if (WSREP(thd) && thd->wsrep_exec_mode == REPL_RECV)
+ {
+ query_cache.invalidate_locked_for_write(thd, rgi->tables_to_lock);
+ }
+#endif /* WITH_WSREP && HAVE_QUERY_CACHE */
+
if (get_flags(STMT_END_F) && (error= rows_event_stmt_cleanup(rgi, thd)))
slave_rows_error_report(ERROR_LEVEL,
thd->is_error() ? 0 : error,
@@ -10913,7 +11009,12 @@ check_table_map(rpl_group_info *rgi, RPL_TABLE_LIST *table_list)
enum_tbl_map_status res= OK_TO_PROCESS;
Relay_log_info *rli= rgi->rli;
+#ifdef WITH_WSREP
+ if ((rgi->thd->slave_thread /* filtering is for slave only */ ||
+ (WSREP(rgi->thd) && rgi->thd->wsrep_applier)) &&
+#else
if (rgi->thd->slave_thread /* filtering is for slave only */ &&
+#endif /* WITH_WSREP */
(!rli->mi->rpl_filter->db_ok(table_list->db) ||
(rli->mi->rpl_filter->is_on() && !rli->mi->rpl_filter->tables_ok("", table_list))))
res= FILTERED_OUT;
@@ -11670,8 +11771,23 @@ int
Write_rows_log_event::do_exec_row(rpl_group_info *rgi)
{
DBUG_ASSERT(m_table != NULL);
+#ifdef WITH_WSREP
+#ifdef WSREP_PROC_INFO
+ char info[64];
+ info[sizeof(info) - 1] = '\0';
+ snprintf(info, sizeof(info) - 1, "Write_rows_log_event::write_row(%lld)",
+ (long long) wsrep_thd_trx_seqno(thd));
+ const char* tmp = (WSREP(thd)) ? thd_proc_info(thd, info) : NULL;
+#else
+ const char* tmp = (WSREP(thd)) ?
+ thd_proc_info(thd,"Write_rows_log_event::write_row()") : NULL;
+#endif /* WSREP_PROC_INFO */
+#endif /* WITH_WSREP */
int error= write_row(rgi, slave_exec_mode == SLAVE_EXEC_MODE_IDEMPOTENT);
+#ifdef WITH_WSREP
+ if (WSREP(thd)) thd_proc_info(thd, tmp);
+#endif /* WITH_WSREP */
if (error && !thd->is_error())
{
DBUG_ASSERT(0);
@@ -12354,11 +12470,33 @@ int Delete_rows_log_event::do_exec_row(rpl_group_info *rgi)
slave_run_triggers_for_rbr && !master_had_triggers && m_table->triggers;
DBUG_ASSERT(m_table != NULL);
+#ifdef WITH_WSREP
+#ifdef WSREP_PROC_INFO
+ char info[64];
+ info[sizeof(info) - 1] = '\0';
+ snprintf(info, sizeof(info) - 1, "Delete_rows_log_event::find_row(%lld)",
+ (long long) wsrep_thd_trx_seqno(thd));
+ const char* tmp = (WSREP(thd)) ? thd_proc_info(thd, info) : NULL;
+#else
+ const char* tmp = (WSREP(thd)) ?
+ thd_proc_info(thd,"Delete_rows_log_event::find_row()") : NULL;
+#endif /* WSREP_PROC_INFO */
+#endif /* WITH_WSREP */
if (!(error= find_row(rgi)))
{
/*
Delete the record found, located in record[0]
*/
+#ifdef WITH_WSREP
+#ifdef WSREP_PROC_INFO
+ snprintf(info, sizeof(info) - 1,
+ "Delete_rows_log_event::ha_delete_row(%lld)",
+ (long long) wsrep_thd_trx_seqno(thd));
+ if (WSREP(thd)) thd_proc_info(thd, info);
+#else
+ if (WSREP(thd)) thd_proc_info(thd,"Delete_rows_log_event::ha_delete_row()");
+#endif /* WSREP_PROC_INFO */
+#endif /* WITH_WSREP */
if (invoke_triggers &&
process_triggers(TRG_EVENT_DELETE, TRG_ACTION_BEFORE, FALSE))
error= HA_ERR_GENERIC; // in case if error is not set yet
@@ -12369,6 +12507,9 @@ int Delete_rows_log_event::do_exec_row(rpl_group_info *rgi)
error= HA_ERR_GENERIC; // in case if error is not set yet
m_table->file->ha_index_or_rnd_end();
}
+#ifdef WITH_WSREP
+ if (WSREP(thd)) thd_proc_info(thd, tmp);
+#endif /* WITH_WSREP */
return error;
}
@@ -12498,6 +12639,18 @@ Update_rows_log_event::do_exec_row(rpl_group_info *rgi)
slave_run_triggers_for_rbr && !master_had_triggers && m_table->triggers;
DBUG_ASSERT(m_table != NULL);
+#ifdef WITH_WSREP
+#ifdef WSREP_PROC_INFO
+ char info[64];
+ info[sizeof(info) - 1] = '\0';
+ snprintf(info, sizeof(info) - 1, "Update_rows_log_event::find_row(%lld)",
+ (long long) wsrep_thd_trx_seqno(thd));
+ const char* tmp = (WSREP(thd)) ? thd_proc_info(thd, info) : NULL;
+#else
+ const char* tmp = (WSREP(thd)) ?
+ thd_proc_info(thd,"Update_rows_log_event::find_row()") : NULL;
+#endif /* WSREP_PROC_INFO */
+#endif /* WITH_WSREP */
int error= find_row(rgi);
if (error)
{
@@ -12524,6 +12677,17 @@ Update_rows_log_event::do_exec_row(rpl_group_info *rgi)
store_record(m_table,record[1]);
m_curr_row= m_curr_row_end;
+#ifdef WITH_WSREP
+#ifdef WSREP_PROC_INFO
+ snprintf(info, sizeof(info) - 1,
+ "Update_rows_log_event::unpack_current_row(%lld)",
+ (long long) wsrep_thd_trx_seqno(thd));
+ if (WSREP(thd)) thd_proc_info(thd, info);
+#else
+ if (WSREP(thd))
+ thd_proc_info(thd,"Update_rows_log_event::unpack_current_row()");
+#endif /* WSREP_PROC_INFO */
+#endif /* WITH_WSREP */
/* this also updates m_curr_row_end */
if ((error= unpack_current_row(rgi)))
goto err;
@@ -12542,6 +12706,17 @@ Update_rows_log_event::do_exec_row(rpl_group_info *rgi)
DBUG_DUMP("new values", m_table->record[0], m_table->s->reclength);
#endif
+#ifdef WITH_WSREP
+#ifdef WSREP_PROC_INFO
+ snprintf(info, sizeof(info) - 1,
+ "Update_rows_log_event::ha_update_row(%lld)",
+ (long long) wsrep_thd_trx_seqno(thd));
+ if (WSREP(thd)) thd_proc_info(thd, info);
+#else
+ if (WSREP(thd)) thd_proc_info(thd,"Update_rows_log_event::ha_update_row()");
+#endif /* WSREP_PROC_INFO */
+#endif /* WITH_WSREP */
+
if (invoke_triggers &&
process_triggers(TRG_EVENT_UPDATE, TRG_ACTION_BEFORE, TRUE))
{
@@ -12557,6 +12732,9 @@ Update_rows_log_event::do_exec_row(rpl_group_info *rgi)
process_triggers(TRG_EVENT_UPDATE, TRG_ACTION_AFTER, TRUE))
error= HA_ERR_GENERIC; // in case if error is not set yet
+#ifdef WITH_WSREP
+ if (WSREP(thd)) thd_proc_info(thd, tmp);
+#endif /* WITH_WSREP */
err:
m_table->file->ha_index_or_rnd_end();
return error;
@@ -12657,6 +12835,47 @@ void Incident_log_event::pack_info(THD *thd, Protocol *protocol)
protocol->store(buf, bytes, &my_charset_bin);
}
#endif
+#if WITH_WSREP && !defined(MYSQL_CLIENT)
+/*
+ read the first event from (*buf). The size of the (*buf) is (*buf_len).
+ At the end (*buf) is shitfed to point to the following event or NULL and
+ (*buf_len) will be changed to account just being read bytes of the 1st event.
+*/
+#define WSREP_MAX_ALLOWED_PACKET 1024*1024*1024 // current protocol max
+
+Log_event* wsrep_read_log_event(
+ char **arg_buf, size_t *arg_buf_len,
+ const Format_description_log_event *description_event)
+{
+ DBUG_ENTER("wsrep_read_log_event");
+ char *head= (*arg_buf);
+
+ uint data_len = uint4korr(head + EVENT_LEN_OFFSET);
+ char *buf= (*arg_buf);
+ const char *error= 0;
+ Log_event *res= 0;
+
+ if (data_len > WSREP_MAX_ALLOWED_PACKET)
+ {
+ error = "Event too big";
+ goto err;
+ }
+
+ res= Log_event::read_log_event(buf, data_len, &error, description_event, false);
+
+err:
+ if (!res)
+ {
+ DBUG_ASSERT(error != 0);
+ sql_print_error("Error in Log_event::read_log_event(): "
+ "'%s', data_len: %d, event_type: %d",
+ error,data_len,head[EVENT_TYPE_OFFSET]);
+ }
+ (*arg_buf)+= data_len;
+ (*arg_buf_len)-= data_len;
+ DBUG_RETURN(res);
+}
+#endif
#ifdef MYSQL_CLIENT
diff --git a/sql/mdl.cc b/sql/mdl.cc
index 57d5d8e7283..0f1c961aa29 100644
--- a/sql/mdl.cc
+++ b/sql/mdl.cc
@@ -24,6 +24,20 @@
#include <mysql/service_thd_wait.h>
#include <mysql/psi/mysql_stage.h>
+#ifdef WITH_WSREP
+#include "debug_sync.h"
+#include "wsrep_mysqld.h"
+#include "wsrep_thd.h"
+extern "C" my_thread_id wsrep_thd_thread_id(THD *thd);
+extern "C" char *wsrep_thd_query(THD *thd);
+void sql_print_information(const char *format, ...)
+ ATTRIBUTE_FORMAT(printf, 1, 2);
+
+extern bool
+wsrep_grant_mdl_exception(MDL_context *requestor_ctx,
+ MDL_ticket *ticket,
+ const MDL_key *key);
+#endif /* WITH_WSREP */
#ifdef HAVE_PSI_INTERFACE
static PSI_mutex_key key_MDL_map_mutex;
static PSI_mutex_key key_MDL_wait_LOCK_wait_status;
@@ -1435,6 +1449,22 @@ MDL_wait::timed_wait(MDL_context_owner *owner, struct timespec *abs_timeout,
while (!m_wait_status && !owner->is_killed() &&
wait_result != ETIMEDOUT && wait_result != ETIME)
{
+#ifdef WITH_WSREP
+ // Allow tests to block the applier thread using the DBUG facilities
+ DBUG_EXECUTE_IF("sync.wsrep_before_mdl_wait",
+ {
+ const char act[]=
+ "now "
+ "wait_for signal.wsrep_before_mdl_wait";
+ DBUG_ASSERT(!debug_sync_set_action((owner->get_thd()),
+ STRING_WITH_LEN(act)));
+ };);
+ if (wsrep_thd_is_BF(owner->get_thd(), false))
+ {
+ wait_result= mysql_cond_wait(&m_COND_wait_status, &m_LOCK_wait_status);
+ }
+ else
+#endif /* WITH_WSREP */
wait_result= mysql_cond_timedwait(&m_COND_wait_status, &m_LOCK_wait_status,
abs_timeout);
}
@@ -1501,11 +1531,58 @@ void MDL_lock::Ticket_list::add_ticket(MDL_ticket *ticket)
called by other threads.
*/
DBUG_ASSERT(ticket->get_lock());
+#ifdef WITH_WSREP
+ if ((this == &(ticket->get_lock()->m_waiting)) &&
+ wsrep_thd_is_BF((void *)(ticket->get_ctx()->get_thd()), false))
+ {
+ Ticket_iterator itw(ticket->get_lock()->m_waiting);
+ Ticket_iterator itg(ticket->get_lock()->m_granted);
+
+ MDL_ticket *waiting, *granted;
+ MDL_ticket *prev=NULL;
+ bool added= false;
+
+ while ((waiting= itw++) && !added)
+ {
+ if (!wsrep_thd_is_BF((void *)(waiting->get_ctx()->get_thd()), true))
+ {
+ WSREP_DEBUG("MDL add_ticket inserted before: %lu %s",
+ wsrep_thd_thread_id(waiting->get_ctx()->get_thd()),
+ wsrep_thd_query(waiting->get_ctx()->get_thd()));
+ /* Insert the ticket before the first non-BF waiting thd. */
+ m_list.insert_after(prev, ticket);
+ added= true;
+ }
+ prev= waiting;
+ }
+
+ /* Otherwise, insert the ticket at the back of the waiting list. */
+ if (!added) m_list.push_back(ticket);
+
+ while ((granted= itg++))
+ {
+ if (granted->get_ctx() != ticket->get_ctx() &&
+ granted->is_incompatible_when_granted(ticket->get_type()))
+ {
+ if (!wsrep_grant_mdl_exception(ticket->get_ctx(), granted,
+ &ticket->get_lock()->key))
+ {
+ WSREP_DEBUG("MDL victim killed at add_ticket");
+ }
+ }
+ }
+ }
+ else
+ {
+#endif /* WITH_WSREP */
/*
Add ticket to the *back* of the queue to ensure fairness
among requests with the same priority.
*/
m_list.push_back(ticket);
+#ifdef WITH_WSREP
+ }
+#endif /* WITH_WSREP */
m_bitmap|= MDL_BIT(ticket->get_type());
}
@@ -1821,7 +1898,6 @@ MDL_object_lock::m_waiting_incompatible[MDL_TYPE_END] =
0
};
-
/**
Check if request for the metadata lock can be satisfied given its
current state.
@@ -1846,6 +1922,9 @@ MDL_lock::can_grant_lock(enum_mdl_type type_arg,
bool can_grant= FALSE;
bitmap_t waiting_incompat_map= incompatible_waiting_types_bitmap()[type_arg];
bitmap_t granted_incompat_map= incompatible_granted_types_bitmap()[type_arg];
+#ifdef WITH_WSREP
+ bool wsrep_can_grant= TRUE;
+#endif /* WITH_WSREP */
/*
New lock request can be satisfied iff:
@@ -1868,12 +1947,59 @@ MDL_lock::can_grant_lock(enum_mdl_type type_arg,
{
if (ticket->get_ctx() != requestor_ctx &&
ticket->is_incompatible_when_granted(type_arg))
+#ifdef WITH_WSREP
+ {
+ if (wsrep_thd_is_BF((void *)(requestor_ctx->get_thd()),false) &&
+ key.mdl_namespace() == MDL_key::GLOBAL)
+ {
+ WSREP_DEBUG("global lock granted for BF: %lu %s",
+ wsrep_thd_thread_id(requestor_ctx->get_thd()),
+ wsrep_thd_query(requestor_ctx->get_thd()));
+ can_grant = true;
+ }
+ else if (!wsrep_grant_mdl_exception(requestor_ctx, ticket, &key))
+ {
+ wsrep_can_grant= FALSE;
+ if (wsrep_log_conflicts)
+ {
+ MDL_lock * lock = ticket->get_lock();
+ WSREP_INFO(
+ "MDL conflict db=%s table=%s ticket=%d solved by %s",
+ lock->key.db_name(), lock->key.name(), ticket->get_type(), "abort"
+ );
+ }
+ }
+ else
+ {
+ can_grant= TRUE;
+ }
+ }
+#else
break;
+#endif /* WITH_WSREP */
}
+#ifdef WITH_WSREP
+ if ((ticket == NULL) && wsrep_can_grant)
+#else
if (ticket == NULL) /* Incompatible locks are our own. */
+#endif /* WITH_WSREP */
+
can_grant= TRUE;
}
}
+#ifdef WITH_WSREP
+ else
+ {
+ if (wsrep_thd_is_BF((void *)(requestor_ctx->get_thd()), false) &&
+ key.mdl_namespace() == MDL_key::GLOBAL)
+ {
+ WSREP_DEBUG("global lock granted for BF (waiting queue): %lu %s",
+ wsrep_thd_thread_id(requestor_ctx->get_thd()),
+ wsrep_thd_query(requestor_ctx->get_thd()));
+ can_grant = true;
+ }
+ }
+#endif /* WITH_WSREP */
return can_grant;
}
@@ -2980,7 +3106,12 @@ void MDL_context::release_locks_stored_before(enum_mdl_duration duration,
DBUG_VOID_RETURN;
}
-
+#ifdef WITH_WSREP
+void MDL_context::release_explicit_locks()
+{
+ release_locks_stored_before(MDL_EXPLICIT, NULL);
+}
+#endif
/**
Release all explicit locks in the context which correspond to the
same name/object as this lock request.
@@ -3288,3 +3419,49 @@ void MDL_context::set_transaction_duration_for_all_locks()
ticket->m_duration= MDL_TRANSACTION;
#endif
}
+#ifdef WITH_WSREP
+void MDL_ticket::wsrep_report(bool debug)
+{
+ if (debug)
+ {
+ const PSI_stage_info *psi_stage = m_lock->key.get_wait_state_name();
+
+ WSREP_DEBUG("MDL ticket: type: %s space: %s db: %s name: %s (%s)",
+ (get_type() == MDL_INTENTION_EXCLUSIVE) ? "intention exclusive" :
+ ((get_type() == MDL_SHARED) ? "shared" :
+ ((get_type() == MDL_SHARED_HIGH_PRIO ? "shared high prio" :
+ ((get_type() == MDL_SHARED_READ) ? "shared read" :
+ ((get_type() == MDL_SHARED_WRITE) ? "shared write" :
+ ((get_type() == MDL_SHARED_NO_WRITE) ? "shared no write" :
+ ((get_type() == MDL_SHARED_NO_READ_WRITE) ? "shared no read write" :
+ ((get_type() == MDL_EXCLUSIVE) ? "exclusive" :
+ "UNKNOWN")))))))),
+ (m_lock->key.mdl_namespace() == MDL_key::GLOBAL) ? "GLOBAL" :
+ ((m_lock->key.mdl_namespace() == MDL_key::SCHEMA) ? "SCHEMA" :
+ ((m_lock->key.mdl_namespace() == MDL_key::TABLE) ? "TABLE" :
+ ((m_lock->key.mdl_namespace() == MDL_key::TABLE) ? "FUNCTION" :
+ ((m_lock->key.mdl_namespace() == MDL_key::TABLE) ? "PROCEDURE" :
+ ((m_lock->key.mdl_namespace() == MDL_key::TABLE) ? "TRIGGER" :
+ ((m_lock->key.mdl_namespace() == MDL_key::TABLE) ? "EVENT" :
+ ((m_lock->key.mdl_namespace() == MDL_key::COMMIT) ? "COMMIT" :
+ (char *)"UNKNOWN"))))))),
+ m_lock->key.db_name(),
+ m_lock->key.name(),
+ psi_stage->m_name);
+ }
+}
+bool MDL_context::wsrep_has_explicit_locks()
+{
+ MDL_ticket *ticket = NULL;
+
+ Ticket_iterator it(m_tickets[MDL_EXPLICIT]);
+
+ while ((ticket = it++))
+ {
+ return true;
+ }
+
+ return false;
+}
+
+#endif /* WITH_WSREP */
diff --git a/sql/mdl.h b/sql/mdl.h
index 13de60284da..86f681c90f0 100644
--- a/sql/mdl.h
+++ b/sql/mdl.h
@@ -457,6 +457,7 @@ public:
MDL_key key;
public:
+
static void *operator new(size_t size, MEM_ROOT *mem_root) throw ()
{ return alloc_root(mem_root, size); }
static void operator delete(void *ptr, MEM_ROOT *mem_root) {}
@@ -586,6 +587,9 @@ public:
MDL_ticket *next_in_lock;
MDL_ticket **prev_in_lock;
public:
+#ifdef WITH_WSREP
+ void wsrep_report(bool debug);
+#endif /* WITH_WSREP */
bool has_pending_conflicting_lock() const;
MDL_context *get_ctx() const { return m_ctx; }
@@ -774,6 +778,13 @@ public:
m_tickets[MDL_EXPLICIT].is_empty());
}
+#ifdef WITH_WSREP
+ inline bool has_transactional_locks() const
+ {
+ return !m_tickets[MDL_TRANSACTION].is_empty();
+ }
+#endif /* WITH_WSREP */
+
MDL_savepoint mdl_savepoint()
{
return MDL_savepoint(m_tickets[MDL_STATEMENT].front(),
@@ -786,6 +797,9 @@ public:
void release_statement_locks();
void release_transactional_locks();
+#ifdef WITH_WSREP
+ void release_explicit_locks();
+#endif
void rollback_to_savepoint(const MDL_savepoint &mdl_savepoint);
MDL_context_owner *get_owner() { return m_owner; }
@@ -918,6 +932,9 @@ private:
MDL_ticket **out_ticket);
public:
+#ifdef WITH_WSREP
+ bool wsrep_has_explicit_locks();
+#endif /* WITH_WSREP */
THD *get_thd() const { return m_owner->get_thd(); }
void find_deadlock();
diff --git a/sql/mysqld.cc b/sql/mysqld.cc
index d9320fa3bcf..45e6b3666bf 100644
--- a/sql/mysqld.cc
+++ b/sql/mysqld.cc
@@ -71,6 +71,13 @@
#include "scheduler.h"
#include <waiting_threads.h>
#include "debug_sync.h"
+#ifdef WITH_WSREP
+#include "wsrep_mysqld.h"
+#include "wsrep_var.h"
+#include "wsrep_thd.h"
+#include "wsrep_sst.h"
+ulong wsrep_running_threads = 0; // # of currently running wsrep threads
+#endif
#include "sql_callback.h"
#include "threadpool.h"
@@ -357,7 +364,11 @@ static char *default_character_set_name;
static char *character_set_filesystem_name;
static char *lc_messages;
static char *lc_time_names_name;
+#ifndef WITH_WSREP
static char *my_bind_addr_str;
+#else
+char *my_bind_addr_str;
+#endif /* WITH_WSREP */
static char *default_collation_name;
char *default_storage_engine;
static char compiled_default_collation_name[]= MYSQL_DEFAULT_COLLATION_NAME;
@@ -370,6 +381,10 @@ static DYNAMIC_ARRAY all_options;
/* Global variables */
+#ifdef WITH_WSREP
+ulong my_bind_addr;
+bool wsrep_new_cluster= false;
+#endif /* WITH_WSREP */
bool opt_bin_log, opt_bin_log_used=0, opt_ignore_builtin_innodb= 0;
my_bool opt_log, opt_slow_log, debug_assert_if_crashed_table= 0, opt_help= 0;
static my_bool opt_abort;
@@ -463,6 +478,10 @@ ulong opt_binlog_rows_event_max_size;
my_bool opt_master_verify_checksum= 0;
my_bool opt_slave_sql_verify_checksum= 1;
const char *binlog_format_names[]= {"MIXED", "STATEMENT", "ROW", NullS};
+#ifdef WITH_WSREP
+const char *wsrep_binlog_format_names[]=
+ {"MIXED", "STATEMENT", "ROW", "NONE", NullS};
+#endif /* WITH_WSREP */
volatile sig_atomic_t calling_initgroups= 0; /**< Used in SIGSEGV handler. */
uint mysqld_port, test_flags, select_errors, dropping_tables, ha_open_options;
uint mysqld_extra_port;
@@ -733,6 +752,23 @@ mysql_cond_t COND_server_started;
int mysqld_server_started=0, mysqld_server_initialized= 0;
File_parser_dummy_hook file_parser_dummy_hook;
+#ifdef WITH_WSREP
+mysql_mutex_t LOCK_wsrep_ready;
+mysql_cond_t COND_wsrep_ready;
+mysql_mutex_t LOCK_wsrep_sst;
+mysql_cond_t COND_wsrep_sst;
+mysql_mutex_t LOCK_wsrep_sst_init;
+mysql_cond_t COND_wsrep_sst_init;
+mysql_mutex_t LOCK_wsrep_rollback;
+mysql_cond_t COND_wsrep_rollback;
+wsrep_aborting_thd_t wsrep_aborting_thd= NULL;
+mysql_mutex_t LOCK_wsrep_replaying;
+mysql_cond_t COND_wsrep_replaying;
+mysql_mutex_t LOCK_wsrep_slave_threads;
+mysql_mutex_t LOCK_wsrep_desync;
+int wsrep_replaying= 0;
+static void wsrep_close_threads(THD* thd);
+#endif /* WITH_WSREP */
/* replication parameters, if master_host is not NULL, we are a slave */
uint report_port= 0;
@@ -872,6 +908,12 @@ PSI_mutex_key key_BINLOG_LOCK_index, key_BINLOG_LOCK_xid_list,
key_LOCK_error_messages, key_LOG_INFO_lock,
key_LOCK_thread_count, key_LOCK_thread_cache,
key_PARTITION_LOCK_auto_inc;
+#ifdef WITH_WSREP
+PSI_mutex_key key_LOCK_wsrep_rollback, key_LOCK_wsrep_thd,
+ key_LOCK_wsrep_replaying, key_LOCK_wsrep_ready, key_LOCK_wsrep_sst,
+ key_LOCK_wsrep_sst_thread, key_LOCK_wsrep_sst_init,
+ key_LOCK_wsrep_slave_threads, key_LOCK_wsrep_desync;
+#endif
PSI_mutex_key key_RELAYLOG_LOCK_index;
PSI_mutex_key key_LOCK_slave_state, key_LOCK_binlog_state,
key_LOCK_rpl_thread, key_LOCK_rpl_thread_pool, key_LOCK_parallel_entry;
@@ -950,6 +992,18 @@ static PSI_mutex_info all_server_mutexes[]=
{ &key_LOCK_commit_ordered, "LOCK_commit_ordered", PSI_FLAG_GLOBAL},
{ &key_LOCK_slave_init, "LOCK_slave_init", PSI_FLAG_GLOBAL},
{ &key_LOG_INFO_lock, "LOG_INFO::lock", 0},
+#ifdef WITH_WSREP
+ { &key_LOCK_wsrep_ready, "LOCK_wsrep_ready", PSI_FLAG_GLOBAL},
+ { &key_LOCK_wsrep_sst, "LOCK_wsrep_sst", PSI_FLAG_GLOBAL},
+ { &key_LOCK_wsrep_sst_thread, "wsrep_sst_thread", 0},
+ { &key_LOCK_wsrep_sst_init, "LOCK_wsrep_sst_init", PSI_FLAG_GLOBAL},
+ { &key_LOCK_wsrep_sst, "LOCK_wsrep_sst", PSI_FLAG_GLOBAL},
+ { &key_LOCK_wsrep_rollback, "LOCK_wsrep_rollback", PSI_FLAG_GLOBAL},
+ { &key_LOCK_wsrep_thd, "THD::LOCK_wsrep_thd", 0},
+ { &key_LOCK_wsrep_replaying, "LOCK_wsrep_replaying", PSI_FLAG_GLOBAL},
+ { &key_LOCK_wsrep_slave_threads, "LOCK_wsrep_slave_threads", PSI_FLAG_GLOBAL},
+ { &key_LOCK_wsrep_desync, "LOCK_wsrep_desync", PSI_FLAG_GLOBAL},
+#endif
{ &key_LOCK_thread_count, "LOCK_thread_count", PSI_FLAG_GLOBAL},
{ &key_LOCK_thread_cache, "LOCK_thread_cache", PSI_FLAG_GLOBAL},
{ &key_PARTITION_LOCK_auto_inc, "HA_DATA_PARTITION::LOCK_auto_inc", 0},
@@ -996,6 +1050,11 @@ PSI_cond_key key_BINLOG_COND_xid_list, key_BINLOG_update_cond,
key_TABLE_SHARE_cond, key_user_level_lock_cond,
key_COND_thread_count, key_COND_thread_cache, key_COND_flush_thread_cache,
key_BINLOG_COND_queue_busy;
+#ifdef WITH_WSREP
+PSI_cond_key key_COND_wsrep_rollback,
+ key_COND_wsrep_replaying, key_COND_wsrep_ready, key_COND_wsrep_sst,
+ key_COND_wsrep_sst_init, key_COND_wsrep_sst_thread;
+#endif /* WITH_WSREP */
PSI_cond_key key_RELAYLOG_update_cond, key_COND_wakeup_ready,
key_COND_wait_commit;
PSI_cond_key key_RELAYLOG_COND_queue_busy;
@@ -1045,6 +1104,14 @@ static PSI_cond_info all_server_conds[]=
{ &key_user_level_lock_cond, "User_level_lock::cond", 0},
{ &key_COND_thread_count, "COND_thread_count", PSI_FLAG_GLOBAL},
{ &key_COND_thread_cache, "COND_thread_cache", PSI_FLAG_GLOBAL},
+#ifdef WITH_WSREP
+ { &key_COND_wsrep_ready, "COND_wsrep_ready", PSI_FLAG_GLOBAL},
+ { &key_COND_wsrep_sst, "COND_wsrep_sst", PSI_FLAG_GLOBAL},
+ { &key_COND_wsrep_sst_init, "COND_wsrep_sst_init", PSI_FLAG_GLOBAL},
+ { &key_COND_wsrep_sst_thread, "wsrep_sst_thread", 0},
+ { &key_COND_wsrep_rollback, "COND_wsrep_rollback", PSI_FLAG_GLOBAL},
+ { &key_COND_wsrep_replaying, "COND_wsrep_replaying", PSI_FLAG_GLOBAL},
+#endif
{ &key_COND_flush_thread_cache, "COND_flush_thread_cache", PSI_FLAG_GLOBAL},
{ &key_COND_rpl_thread, "COND_rpl_thread", 0},
{ &key_COND_rpl_thread_queue, "COND_rpl_thread_queue", 0},
@@ -1403,7 +1470,9 @@ bool mysqld_embedded=0;
bool mysqld_embedded=1;
#endif
+#ifndef EMBEDDED_LIBRARY
static my_bool plugins_are_initialized= FALSE;
+#endif
#ifndef DBUG_OFF
static const char* default_dbug_option;
@@ -1626,6 +1695,11 @@ static void close_connections(void)
if (tmp->slave_thread)
continue;
+#ifdef WITH_WSREP
+ /* skip wsrep system threads as well */
+ if (WSREP(tmp) && (tmp->wsrep_exec_mode==REPL_RECV || tmp->wsrep_applier))
+ continue;
+#endif
tmp->killed= KILL_SERVER_HARD;
MYSQL_CALLBACK(thread_scheduler, post_kill_notification, (tmp));
mysql_mutex_lock(&tmp->LOCK_thd_data);
@@ -1702,6 +1776,33 @@ static void close_connections(void)
close_connection(tmp,ER_SERVER_SHUTDOWN);
}
#endif
+#ifdef WITH_WSREP
+ /*
+ * TODO: this code block may turn out redundant. wsrep->disconnect()
+ * should terminate slave threads gracefully, and we don't need
+ * to signal them here.
+ * The code here makes sure mysqld will not hang during shutdown
+ * even if wsrep provider has problems in shutting down.
+ */
+ if (WSREP(tmp) && tmp->wsrep_exec_mode==REPL_RECV)
+ {
+ sql_print_information("closing wsrep system thread");
+ tmp->killed= KILL_CONNECTION;
+ MYSQL_CALLBACK(thread_scheduler, post_kill_notification, (tmp));
+ if (tmp->mysys_var)
+ {
+ tmp->mysys_var->abort=1;
+ mysql_mutex_lock(&tmp->mysys_var->mutex);
+ if (tmp->mysys_var->current_cond)
+ {
+ mysql_mutex_lock(tmp->mysys_var->current_mutex);
+ mysql_cond_broadcast(tmp->mysys_var->current_cond);
+ mysql_mutex_unlock(tmp->mysys_var->current_mutex);
+ }
+ mysql_mutex_unlock(&tmp->mysys_var->mutex);
+ }
+ }
+#endif
DBUG_PRINT("quit",("Unlocking LOCK_thread_count"));
mysql_mutex_unlock(&LOCK_thread_count);
}
@@ -1856,8 +1957,16 @@ static void __cdecl kill_server(int sig_ptr)
}
}
#endif
+#ifdef WITH_WSREP
+ /* Stop wsrep threads in case they are running. */
+ wsrep_stop_replication(NULL);
+#endif
close_connections();
+#ifdef WITH_WSREP
+ if (wsrep_inited == 1)
+ wsrep_deinit(true);
+#endif
if (sig != MYSQL_KILL_SIGNAL &&
sig != 0)
unireg_abort(1); /* purecov: inspected */
@@ -1952,6 +2061,25 @@ extern "C" void unireg_abort(int exit_code)
usage();
if (exit_code)
sql_print_error("Aborting\n");
+#ifdef WITH_WSREP
+ if (wsrep)
+ {
+ /* This is an abort situation, we cannot expect to gracefully close all
+ * wsrep threads here, we can only diconnect from service */
+ wsrep_close_client_connections(FALSE);
+ shutdown_in_progress= 1;
+ THD* thd(0);
+ wsrep->disconnect(wsrep);
+ WSREP_INFO("Service disconnected.");
+ wsrep_close_threads(thd); /* this won't close all threads */
+ sleep(1); /* so give some time to exit for those which can */
+ WSREP_INFO("Some threads may fail to exit.");
+
+ /* In bootstrap mode we deinitialize wsrep here. */
+ if (opt_bootstrap && wsrep_inited)
+ wsrep_deinit(true);
+ }
+#endif // WITH_WSREP
clean_up(!opt_abort && (exit_code || !opt_bootstrap)); /* purecov: inspected */
DBUG_PRINT("quit",("done with cleanup in unireg_abort"));
mysqld_exit(exit_code);
@@ -2167,6 +2295,20 @@ static void clean_up_mutexes()
mysql_cond_destroy(&COND_thread_count);
mysql_cond_destroy(&COND_thread_cache);
mysql_cond_destroy(&COND_flush_thread_cache);
+#ifdef WITH_WSREP
+ (void) mysql_mutex_destroy(&LOCK_wsrep_ready);
+ (void) mysql_cond_destroy(&COND_wsrep_ready);
+ (void) mysql_mutex_destroy(&LOCK_wsrep_sst);
+ (void) mysql_cond_destroy(&COND_wsrep_sst);
+ (void) mysql_mutex_destroy(&LOCK_wsrep_sst_init);
+ (void) mysql_cond_destroy(&COND_wsrep_sst_init);
+ (void) mysql_mutex_destroy(&LOCK_wsrep_rollback);
+ (void) mysql_cond_destroy(&COND_wsrep_rollback);
+ (void) mysql_mutex_destroy(&LOCK_wsrep_replaying);
+ (void) mysql_cond_destroy(&COND_wsrep_replaying);
+ (void) mysql_mutex_destroy(&LOCK_wsrep_slave_threads);
+ (void) mysql_mutex_destroy(&LOCK_wsrep_desync);
+#endif
mysql_mutex_destroy(&LOCK_server_started);
mysql_cond_destroy(&COND_server_started);
mysql_mutex_destroy(&LOCK_prepare_ordered);
@@ -2451,6 +2593,10 @@ static MYSQL_SOCKET activate_tcp_port(uint port)
socket_errno);
unireg_abort(1);
}
+#if defined(WITH_WSREP) && defined(HAVE_FCNTL) && defined(FD_CLOEXEC)
+ (void) fcntl(mysql_socket_getfd(ip_sock), F_SETFD, FD_CLOEXEC);
+#endif /* WITH_WSREP */
+
DBUG_RETURN(ip_sock);
}
@@ -2568,6 +2714,9 @@ static void network_init(void)
if (mysql_socket_listen(unix_sock,(int) back_log) < 0)
sql_print_warning("listen() on Unix socket failed with error %d",
socket_errno);
+#if defined(WITH_WSREP) && defined(HAVE_FCNTL)
+ (void) fcntl(mysql_socket_getfd(unix_sock), F_SETFD, FD_CLOEXEC);
+#endif /* WITH_WSREP */
}
#endif
DBUG_PRINT("info",("server started"));
@@ -2642,9 +2791,19 @@ void thd_cleanup(THD *thd)
void dec_connection_count(THD *thd)
{
- mysql_mutex_lock(&LOCK_connection_count);
- (*thd->scheduler->connection_count)--;
- mysql_mutex_unlock(&LOCK_connection_count);
+#ifdef WITH_WSREP
+ /*
+ Do not decrement when its wsrep system thread. wsrep_applier is set for
+ applier as well as rollbacker threads.
+ */
+ if (!thd->wsrep_applier)
+#endif /* WITH_WSREP */
+ {
+ DBUG_ASSERT(*thd->scheduler->connection_count > 0);
+ mysql_mutex_lock(&LOCK_connection_count);
+ (*thd->scheduler->connection_count)--;
+ mysql_mutex_unlock(&LOCK_connection_count);
+ }
}
@@ -2814,10 +2973,19 @@ static bool cache_thread()
bool one_thread_per_connection_end(THD *thd, bool put_in_cache)
{
DBUG_ENTER("one_thread_per_connection_end");
+#ifdef WITH_WSREP
+ const bool wsrep_applier(thd->wsrep_applier);
+#endif
+
unlink_thd(thd);
/* Mark that current_thd is not valid anymore */
set_current_thd(0);
+
+#ifdef WITH_WSREP
+ if (!wsrep_applier && put_in_cache && cache_thread())
+#else
if (put_in_cache && cache_thread())
+#endif /* WITH_WSREP */
DBUG_RETURN(0); // Thread is reused
signal_thd_deleted();
@@ -3251,8 +3419,8 @@ pthread_handler_t signal_hand(void *arg __attribute__((unused)))
should not be any other mysql_cond_signal() calls.
*/
mysql_mutex_lock(&LOCK_thread_count);
- mysql_mutex_unlock(&LOCK_thread_count);
mysql_cond_broadcast(&COND_thread_count);
+ mysql_mutex_unlock(&LOCK_thread_count);
(void) pthread_sigmask(SIG_BLOCK,&set,NULL);
for (;;)
@@ -3930,7 +4098,6 @@ static int init_common_variables()
}
else
opt_log_basename= glob_hostname;
-
if (!*pidfile_name)
{
strmake(pidfile_name, opt_log_basename, sizeof(pidfile_name)-5);
@@ -3989,7 +4156,11 @@ static int init_common_variables()
compile_time_assert(sizeof(com_status_vars)/sizeof(com_status_vars[0]) - 1 ==
SQLCOM_END + 8);
#endif
-
+#ifdef WITH_WSREP
+ /* This is a protection against mutually incompatible option values. */
+ if (WSREP_ON && wsrep_check_opts (remaining_argc, remaining_argv))
+ global_system_variables.wsrep_on= 0;
+#endif /* WITH_WSREP */
if (get_options(&remaining_argc, &remaining_argv))
return 1;
set_server_version();
@@ -4415,6 +4586,28 @@ static int init_thread_environment()
rpl_init_gtid_waiting();
#endif
+#ifdef WITH_WSREP
+ mysql_mutex_init(key_LOCK_wsrep_ready,
+ &LOCK_wsrep_ready, MY_MUTEX_INIT_FAST);
+ mysql_cond_init(key_COND_wsrep_ready, &COND_wsrep_ready, NULL);
+ mysql_mutex_init(key_LOCK_wsrep_sst,
+ &LOCK_wsrep_sst, MY_MUTEX_INIT_FAST);
+ mysql_cond_init(key_COND_wsrep_sst, &COND_wsrep_sst, NULL);
+ mysql_mutex_init(key_LOCK_wsrep_sst_init,
+ &LOCK_wsrep_sst_init, MY_MUTEX_INIT_FAST);
+ mysql_cond_init(key_COND_wsrep_sst_init, &COND_wsrep_sst_init, NULL);
+ mysql_mutex_init(key_LOCK_wsrep_rollback,
+ &LOCK_wsrep_rollback, MY_MUTEX_INIT_FAST);
+ mysql_cond_init(key_COND_wsrep_rollback, &COND_wsrep_rollback, NULL);
+ mysql_mutex_init(key_LOCK_wsrep_replaying,
+ &LOCK_wsrep_replaying, MY_MUTEX_INIT_FAST);
+ mysql_cond_init(key_COND_wsrep_replaying, &COND_wsrep_replaying, NULL);
+ mysql_mutex_init(key_LOCK_wsrep_slave_threads,
+ &LOCK_wsrep_slave_threads, MY_MUTEX_INIT_FAST);
+ mysql_mutex_init(key_LOCK_wsrep_desync,
+ &LOCK_wsrep_desync, MY_MUTEX_INIT_FAST);
+#endif
+
DBUG_RETURN(0);
}
@@ -4667,10 +4860,18 @@ static int init_server_components()
/* need to configure logging before initializing storage engines */
if (!opt_bin_log_used)
{
+#ifdef WITH_WSREP
+ if (!WSREP_ON && opt_log_slave_updates)
+#else
if (opt_log_slave_updates)
+#endif
sql_print_warning("You need to use --log-bin to make "
"--log-slave-updates work.");
+#ifdef WITH_WSREP
+ if (!WSREP_ON && binlog_format_used)
+#else
if (binlog_format_used)
+#endif
sql_print_warning("You need to use --log-bin to make "
"--binlog-format work.");
}
@@ -4697,8 +4898,6 @@ static int init_server_components()
}
#endif
- DBUG_ASSERT(!opt_bin_log || opt_bin_logname);
-
if (opt_bin_log)
{
/* Reports an error and aborts, if the --log-bin's path
@@ -4747,10 +4946,72 @@ static int init_server_components()
{
opt_bin_logname= my_once_strdup(buf, MYF(MY_WME));
}
+#ifdef WITH_WSREP /* WSREP BEFORE SE */
+ /*
+ Wsrep initialization must happen at this point, because:
+ - opt_bin_logname must be known when starting replication
+ since SST may need it
+ - SST may modify binlog index file, so it must be opened
+ after SST has happened
+ */
+ }
+ if (!wsrep_recovery && !opt_help)
+ {
+ if (opt_bootstrap) // bootsrap option given - disable wsrep functionality
+ {
+ wsrep_provider_init(WSREP_NONE);
+ if (wsrep_init()) unireg_abort(1);
+ }
+ else // full wsrep initialization
+ {
+ // add basedir/bin to PATH to resolve wsrep script names
+ char* const tmp_path((char*)alloca(strlen(mysql_home) +
+ strlen("/bin") + 1));
+ if (tmp_path)
+ {
+ strcpy(tmp_path, mysql_home);
+ strcat(tmp_path, "/bin");
+ wsrep_prepend_PATH(tmp_path);
+ }
+ else
+ {
+ WSREP_ERROR("Could not append %s/bin to PATH", mysql_home);
+ }
+
+ if (wsrep_before_SE())
+ {
+ set_ports(); // this is also called in network_init() later but we need
+ // to know mysqld_port now - lp:1071882
+ /*
+ Plugin initialization (plugin_init()) hasn't happened yet, set
+ maria_hton to 0.
+ */
+ maria_hton= 0;
+ wsrep_init_startup(true);
+ }
+ }
+ }
+ if (opt_bin_log)
+ {
+ /*
+ Variable ln is not defined at this scope. We use opt_bin_logname instead.
+ It should be the same as ln since
+ - mysql_bin_log.generate_name() returns first argument if new log name
+ is not generated
+ - if new log name is generated, return value is assigned to ln and copied
+ to opt_bin_logname above
+ */
+ if (mysql_bin_log.open_index_file(opt_binlog_index_name, opt_bin_logname,
+ TRUE))
+ {
+ unireg_abort(1);
+ }
+#else
if (mysql_bin_log.open_index_file(opt_binlog_index_name, ln, TRUE))
{
unireg_abort(1);
}
+#endif /* WITH_WSREP */
}
/* call ha_init_key_cache() on all key caches to init them */
@@ -4774,6 +5035,49 @@ static int init_server_components()
}
plugins_are_initialized= TRUE; /* Don't separate from init function */
+#ifdef WITH_WSREP
+ /* Wait for wsrep threads to get created. */
+ if (wsrep_creating_startup_threads == 1) {
+ mysql_mutex_lock(&LOCK_thread_count);
+ while (wsrep_running_threads < 2)
+ {
+ mysql_cond_wait(&COND_thread_count, &LOCK_thread_count);
+ }
+
+ /* Now is the time to initialize threads for queries. */
+ THD *tmp;
+ I_List_iterator<THD> it(threads);
+ while ((tmp= it++))
+ {
+ if (tmp->wsrep_applier == true)
+ {
+ /*
+ Save/restore server_status and variables.option_bits and they get
+ altered during init_for_queries().
+ */
+ unsigned int server_status_saved= tmp->server_status;
+ ulonglong option_bits_saved= tmp->variables.option_bits;
+
+ /*
+ Set THR_THD to temporarily point to this THD to register all the
+ variables that allocates memory for this THD.
+ */
+ THD *current_thd_saved= current_thd;
+ set_current_thd(tmp);
+
+ tmp->init_for_queries();
+
+ /* Restore current_thd. */
+ set_current_thd(current_thd_saved);
+
+ tmp->server_status= server_status_saved;
+ tmp->variables.option_bits= option_bits_saved;
+ }
+ }
+ mysql_mutex_unlock(&LOCK_thread_count);
+ }
+#endif
+
/* we do want to exit if there are any other unknown options */
if (remaining_argc > 1)
{
@@ -4900,8 +5204,33 @@ static int init_server_components()
internal_tmp_table_max_key_segments= myisam_max_key_segments();
#endif
+#ifdef WITH_WSREP
+ if (!opt_bin_log)
+ {
+ wsrep_emulate_bin_log= 1;
+ }
+#endif
+
tc_log= get_tc_log_implementation();
+#ifdef WITH_WSREP
+ if (tc_log == &tc_log_mmap)
+ {
+ /*
+ wsrep hton raises total_ha_2pc count to 2, even in native mysql mode.
+ Have to force using tc_log_dummy here, as tc_log_mmap segfaults.
+ */
+ if (WSREP_ON || total_ha_2pc <= 2)
+ tc_log= &tc_log_dummy;
+ }
+
+ WSREP_DEBUG("Initial TC log open: %s",
+ (tc_log == &mysql_bin_log) ? "binlog" :
+ (tc_log == &tc_log_mmap) ? "mmap" :
+ (tc_log == &tc_log_dummy) ? "dummy" : "unknown"
+ );
+#endif
+
if (tc_log->open(opt_bin_log ? opt_bin_logname : opt_tc_log_file))
{
sql_print_error("Can't init tc log");
@@ -4992,6 +5321,437 @@ static void create_shutdown_thread()
#endif /* EMBEDDED_LIBRARY */
+#ifdef WITH_WSREP
+typedef void (*wsrep_thd_processor_fun)(THD *);
+
+pthread_handler_t start_wsrep_THD(void *arg)
+{
+ THD *thd;
+ wsrep_thd_processor_fun processor= (wsrep_thd_processor_fun)arg;
+
+ if (my_thread_init() || (!(thd= new THD(true))))
+ {
+ goto error;
+ }
+
+ mysql_mutex_lock(&LOCK_thread_count);
+ thd->thread_id=thread_id++;
+
+ thd->real_id=pthread_self(); // Keep purify happy
+ thread_count++;
+ thread_created++;
+ threads.append(thd);
+
+ my_net_init(&thd->net,(st_vio*) 0, MYF(0));
+
+ DBUG_PRINT("wsrep",(("creating thread %lld"), (long long)thd->thread_id));
+ thd->prior_thr_create_utime= thd->start_utime= microsecond_interval_timer();
+ (void) mysql_mutex_unlock(&LOCK_thread_count);
+
+ /* from bootstrap()... */
+ thd->bootstrap=1;
+ thd->max_client_packet_length= thd->net.max_packet;
+ thd->security_ctx->master_access= ~(ulong)0;
+
+ /* from handle_one_connection... */
+ pthread_detach_this_thread();
+
+ mysql_thread_set_psi_id(thd->thread_id);
+ thd->thr_create_utime= microsecond_interval_timer();
+
+ if (MYSQL_CALLBACK_ELSE(thread_scheduler, init_new_connection_thread, (), 0))
+ {
+ close_connection(thd, ER_OUT_OF_RESOURCES);
+ statistic_increment(aborted_connects,&LOCK_status);
+ MYSQL_CALLBACK(thread_scheduler, end_thread, (thd, 0));
+ goto error;
+ }
+
+// </5.1.17>
+ /*
+ handle_one_connection() is normally the only way a thread would
+ start and would always be on the very high end of the stack ,
+ therefore, the thread stack always starts at the address of the
+ first local variable of handle_one_connection, which is thd. We
+ need to know the start of the stack so that we could check for
+ stack overruns.
+ */
+ DBUG_PRINT("wsrep", ("handle_one_connection called by thread %lld\n",
+ (long long)thd->thread_id));
+ /* now that we've called my_thread_init(), it is safe to call DBUG_* */
+
+ thd->thread_stack= (char*) &thd;
+ if (thd->store_globals())
+ {
+ close_connection(thd, ER_OUT_OF_RESOURCES);
+ statistic_increment(aborted_connects,&LOCK_status);
+ MYSQL_CALLBACK(thread_scheduler, end_thread, (thd, 0));
+ delete thd;
+ goto error;
+ }
+
+ thd->system_thread= SYSTEM_THREAD_SLAVE_SQL;
+ thd->security_ctx->skip_grants();
+
+ /* handle_one_connection() again... */
+ //thd->version= refresh_version;
+ thd->proc_info= 0;
+ thd->set_command(COM_SLEEP);
+
+ if (wsrep_creating_startup_threads == 0)
+ {
+ thd->init_for_queries();
+ }
+
+ mysql_mutex_lock(&LOCK_thread_count);
+ wsrep_running_threads++;
+ mysql_cond_broadcast(&COND_thread_count);
+
+ if (wsrep_running_threads > 2)
+ {
+ wsrep_creating_startup_threads= 0;
+ }
+
+ mysql_mutex_unlock(&LOCK_thread_count);
+
+ processor(thd);
+
+ close_connection(thd, 0);
+
+ mysql_mutex_lock(&LOCK_thread_count);
+ wsrep_running_threads--;
+ WSREP_DEBUG("wsrep running threads now: %lu", wsrep_running_threads);
+ mysql_cond_broadcast(&COND_thread_count);
+ mysql_mutex_unlock(&LOCK_thread_count);
+
+ // Note: We can't call THD destructor without crashing
+ // if plugins have not been initialized. However, in most of the
+ // cases this means that pre SE initialization SST failed and
+ // we are going to exit anyway.
+ if (plugins_are_initialized)
+ {
+ net_end(&thd->net);
+ MYSQL_CALLBACK(thread_scheduler, end_thread, (thd, 1));
+ }
+ else
+ {
+ // TODO: lightweight cleanup to get rid of:
+ // 'Error in my_thread_global_end(): 2 threads didn't exit'
+ // at server shutdown
+ }
+
+ my_thread_end();
+ if (thread_handling > SCHEDULER_ONE_THREAD_PER_CONNECTION)
+ {
+ mysql_mutex_lock(&LOCK_thread_count);
+ delete thd;
+ thread_count--;
+ mysql_mutex_unlock(&LOCK_thread_count);
+ }
+ return(NULL);
+
+error:
+ WSREP_ERROR("Failed to create/initialize system thread");
+
+ /* Abort if its the first applier/rollbacker thread. */
+ if (wsrep_creating_startup_threads == 1)
+ unireg_abort(1);
+ else
+ return NULL;
+}
+
+/**/
+static bool abort_replicated(THD *thd)
+{
+ bool ret_code= false;
+ if (thd->wsrep_query_state== QUERY_COMMITTING)
+ {
+ WSREP_DEBUG("aborting replicated trx: %lu", thd->real_id);
+
+ (void)wsrep_abort_thd(thd, thd, TRUE);
+ ret_code= true;
+ }
+ return ret_code;
+}
+/**/
+static inline bool is_client_connection(THD *thd)
+{
+#if REMOVE
+// REMOVE THIS LATER (lp:777201). Below we had to add an explicit check for
+// wsrep_applier since wsrep_exec_mode didn't seem to always work
+if (thd->wsrep_applier && thd->wsrep_exec_mode != REPL_RECV)
+WSREP_WARN("applier has wsrep_exec_mode = %d", thd->wsrep_exec_mode);
+
+ if ( thd->slave_thread || /* declared as mysql slave */
+ thd->system_thread || /* declared as system thread */
+ !thd->vio_ok() || /* server internal thread */
+ thd->wsrep_exec_mode==REPL_RECV || /* applier or replaying thread */
+ thd->wsrep_applier || /* wsrep slave applier */
+ !thd->variables.wsrep_on) /* client, but fenced outside wsrep */
+ return false;
+
+ return true;
+#else
+ return (thd->wsrep_client_thread && thd->variables.wsrep_on);
+#endif /* REMOVE */
+}
+
+static inline bool is_replaying_connection(THD *thd)
+{
+ bool ret;
+
+ mysql_mutex_lock(&thd->LOCK_wsrep_thd);
+ ret= (thd->wsrep_conflict_state == REPLAYING) ? true : false;
+ mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
+
+ return ret;
+}
+
+static inline bool is_committing_connection(THD *thd)
+{
+ bool ret;
+
+ mysql_mutex_lock(&thd->LOCK_wsrep_thd);
+ ret= (thd->wsrep_query_state == QUERY_COMMITTING) ? true : false;
+ mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
+
+ return ret;
+}
+
+static bool have_client_connections()
+{
+ THD *tmp;
+
+ I_List_iterator<THD> it(threads);
+ while ((tmp=it++))
+ {
+ DBUG_PRINT("quit",("Informing thread %ld that it's time to die",
+ tmp->thread_id));
+ if (is_client_connection(tmp) && tmp->killed == KILL_CONNECTION)
+ {
+ (void)abort_replicated(tmp);
+ return true;
+ }
+ }
+ return false;
+}
+
+static void wsrep_close_thread(THD *thd)
+{
+ thd->killed= KILL_CONNECTION;
+ MYSQL_CALLBACK(thread_scheduler, post_kill_notification, (thd));
+ if (thd->mysys_var)
+ {
+ thd->mysys_var->abort=1;
+ mysql_mutex_lock(&thd->mysys_var->mutex);
+ if (thd->mysys_var->current_cond)
+ {
+ mysql_mutex_lock(thd->mysys_var->current_mutex);
+ mysql_cond_broadcast(thd->mysys_var->current_cond);
+ mysql_mutex_unlock(thd->mysys_var->current_mutex);
+ }
+ mysql_mutex_unlock(&thd->mysys_var->mutex);
+ }
+}
+
+static my_bool have_committing_connections()
+{
+ THD *tmp;
+ mysql_mutex_lock(&LOCK_thread_count); // For unlink from list
+
+ I_List_iterator<THD> it(threads);
+ while ((tmp=it++))
+ {
+ if (!is_client_connection(tmp))
+ continue;
+
+ if (is_committing_connection(tmp))
+ {
+ return TRUE;
+ }
+ }
+ mysql_mutex_unlock(&LOCK_thread_count);
+ return FALSE;
+}
+
+int wsrep_wait_committing_connections_close(int wait_time)
+{
+ int sleep_time= 100;
+
+ while (have_committing_connections() && wait_time > 0)
+ {
+ WSREP_DEBUG("wait for committing transaction to close: %d", wait_time);
+ my_sleep(sleep_time);
+ wait_time -= sleep_time;
+ }
+ if (have_committing_connections())
+ {
+ return 1;
+ }
+ return 0;
+}
+
+void wsrep_close_client_connections(my_bool wait_to_end)
+{
+ /*
+ First signal all threads that it's time to die
+ */
+
+ THD *tmp;
+ mysql_mutex_lock(&LOCK_thread_count); // For unlink from list
+
+ bool kill_cached_threads_saved= kill_cached_threads;
+ kill_cached_threads= true; // prevent future threads caching
+ mysql_cond_broadcast(&COND_thread_cache); // tell cached threads to die
+
+ I_List_iterator<THD> it(threads);
+ while ((tmp=it++))
+ {
+ DBUG_PRINT("quit",("Informing thread %ld that it's time to die",
+ tmp->thread_id));
+ /* We skip slave threads & scheduler on this first loop through. */
+ if (!is_client_connection(tmp))
+ continue;
+
+ if (is_replaying_connection(tmp))
+ {
+ tmp->killed= KILL_CONNECTION;
+ continue;
+ }
+
+ /* replicated transactions must be skipped */
+ if (abort_replicated(tmp))
+ continue;
+
+ WSREP_DEBUG("closing connection %ld", tmp->thread_id);
+ wsrep_close_thread(tmp);
+ }
+ mysql_mutex_unlock(&LOCK_thread_count);
+
+ if (thread_count)
+ sleep(2); // Give threads time to die
+
+ mysql_mutex_lock(&LOCK_thread_count);
+ /*
+ Force remaining threads to die by closing the connection to the client
+ */
+
+ I_List_iterator<THD> it2(threads);
+ while ((tmp=it2++))
+ {
+#ifndef __bsdi__ // Bug in BSDI kernel
+ if (is_client_connection(tmp) &&
+ !abort_replicated(tmp) &&
+ !is_replaying_connection(tmp))
+ {
+ WSREP_INFO("killing local connection: %ld",tmp->thread_id);
+ close_connection(tmp,0);
+ }
+#endif
+ }
+
+ DBUG_PRINT("quit",("Waiting for threads to die (count=%u)",thread_count));
+ WSREP_DEBUG("waiting for client connections to close: %u", thread_count);
+
+ while (wait_to_end && have_client_connections())
+ {
+ mysql_cond_wait(&COND_thread_count, &LOCK_thread_count);
+ DBUG_PRINT("quit",("One thread died (count=%u)", thread_count));
+ }
+
+ kill_cached_threads= kill_cached_threads_saved;
+
+ mysql_mutex_unlock(&LOCK_thread_count);
+
+ /* All client connection threads have now been aborted */
+}
+
+void wsrep_close_applier(THD *thd)
+{
+ WSREP_DEBUG("closing applier %ld", thd->thread_id);
+ wsrep_close_thread(thd);
+}
+
+static void wsrep_close_threads(THD *thd)
+{
+ THD *tmp;
+ mysql_mutex_lock(&LOCK_thread_count); // For unlink from list
+
+ I_List_iterator<THD> it(threads);
+ while ((tmp=it++))
+ {
+ DBUG_PRINT("quit",("Informing thread %ld that it's time to die",
+ tmp->thread_id));
+ /* We skip slave threads & scheduler on this first loop through. */
+ if (tmp->wsrep_applier && tmp != thd)
+ {
+ WSREP_DEBUG("closing wsrep thread %ld", tmp->thread_id);
+ wsrep_close_thread (tmp);
+ }
+ }
+
+ mysql_mutex_unlock(&LOCK_thread_count);
+}
+
+void wsrep_wait_appliers_close(THD *thd)
+{
+ /* Wait for wsrep appliers to gracefully exit */
+ mysql_mutex_lock(&LOCK_thread_count);
+ while (wsrep_running_threads > 1)
+ // 1 is for rollbacker thread which needs to be killed explicitly.
+ // This gotta be fixed in a more elegant manner if we gonna have arbitrary
+ // number of non-applier wsrep threads.
+ {
+ if (thread_handling > SCHEDULER_ONE_THREAD_PER_CONNECTION)
+ {
+ mysql_mutex_unlock(&LOCK_thread_count);
+ my_sleep(100);
+ mysql_mutex_lock(&LOCK_thread_count);
+ }
+ else
+ mysql_cond_wait(&COND_thread_count,&LOCK_thread_count);
+ DBUG_PRINT("quit",("One applier died (count=%u)",thread_count));
+ }
+ mysql_mutex_unlock(&LOCK_thread_count);
+ /* Now kill remaining wsrep threads: rollbacker */
+ wsrep_close_threads (thd);
+ /* and wait for them to die */
+ mysql_mutex_lock(&LOCK_thread_count);
+ while (wsrep_running_threads > 0)
+ {
+ if (thread_handling > SCHEDULER_ONE_THREAD_PER_CONNECTION)
+ {
+ mysql_mutex_unlock(&LOCK_thread_count);
+ my_sleep(100);
+ mysql_mutex_lock(&LOCK_thread_count);
+ }
+ else
+ mysql_cond_wait(&COND_thread_count,&LOCK_thread_count);
+ DBUG_PRINT("quit",("One thread died (count=%u)",thread_count));
+ }
+ mysql_mutex_unlock(&LOCK_thread_count);
+
+ /* All wsrep applier threads have now been aborted. However, if this thread
+ is also applier, we are still running...
+ */
+}
+
+void wsrep_kill_mysql(THD *thd)
+{
+ if (mysqld_server_started)
+ {
+ if (!shutdown_in_progress)
+ {
+ WSREP_INFO("starting shutdown");
+ kill_mysql();
+ }
+ }
+ else
+ {
+ unireg_abort(1);
+ }
+}
+#endif /* WITH_WSREP */
#if (defined(_WIN32) || defined(HAVE_SMEM)) && !defined(EMBEDDED_LIBRARY)
static void handle_connections_methods()
@@ -5390,6 +6150,15 @@ int mysqld_main(int argc, char **argv)
}
#endif
+#ifdef WITH_WSREP /* WSREP AFTER SE */
+ if (wsrep_recovery)
+ {
+ select_thread_in_use= 0;
+ wsrep_recover();
+ unireg_abort(0);
+ }
+#endif /* WITH_WSREP */
+
/*
init signals & alarm
After this we can't quit by a simple unireg_abort
@@ -5448,8 +6217,33 @@ int mysqld_main(int argc, char **argv)
if (Events::init((THD*) 0, opt_noacl || opt_bootstrap))
unireg_abort(1);
+#ifdef WITH_WSREP /* WSREP AFTER SE */
if (opt_bootstrap)
{
+ /*! bootstrap wsrep init was taken care of above */
+ }
+ else
+ {
+ wsrep_SE_initialized();
+
+ if (wsrep_before_SE())
+ {
+ /*! in case of no SST wsrep waits in view handler callback */
+ wsrep_SE_init_grab();
+ wsrep_SE_init_done();
+ /*! in case of SST wsrep waits for wsrep->sst_received */
+ wsrep_sst_continue();
+ }
+ else
+ {
+ wsrep_init_startup (false);
+ }
+
+ wsrep_create_appliers(wsrep_slave_threads - 1);
+ }
+#endif /* WITH_WSREP */
+ if (opt_bootstrap)
+ {
select_thread_in_use= 0; // Allow 'kill' to work
bootstrap(mysql_stdin);
if (!kill_in_progress)
@@ -5492,7 +6286,14 @@ int mysqld_main(int argc, char **argv)
(char*) "" : mysqld_unix_port),
mysqld_port,
MYSQL_COMPILATION_COMMENT);
- fclose(stdin);
+
+ // try to keep fd=0 busy
+ if (!freopen(IF_WIN("NUL","/dev/null"), "r", stdin))
+ {
+ // fall back on failure
+ fclose(stdin);
+ }
+
#if defined(_WIN32) && !defined(EMBEDDED_LIBRARY)
Service.SetRunning();
#endif
@@ -5519,6 +6320,9 @@ int mysqld_main(int argc, char **argv)
#ifdef EXTRA_DEBUG2
sql_print_error("Before Lock_thread_count");
#endif
+#ifdef WITH_WSREP
+ WSREP_DEBUG("Before Lock_thread_count");
+#endif
mysql_mutex_lock(&LOCK_thread_count);
DBUG_PRINT("quit", ("Got thread_count mutex"));
select_thread_in_use=0; // For close_connections
@@ -5784,6 +6588,9 @@ static void bootstrap(MYSQL_FILE *file)
DBUG_ENTER("bootstrap");
THD *thd= new THD;
+#ifdef WITH_WSREP
+ thd->variables.wsrep_on= 0;
+#endif
thd->bootstrap=1;
my_net_init(&thd->net,(st_vio*) 0, MYF(0));
thd->max_client_packet_length= thd->net.max_packet;
@@ -6185,6 +6992,9 @@ void handle_connections_sockets()
sleep(1); // Give other threads some time
continue;
}
+#if defined(WITH_WSREP) && defined(HAVE_FCNTL) && defined(FD_CLOEXEC)
+ (void) fcntl(mysql_socket_getfd(new_sock), F_SETFD, FD_CLOEXEC);
+#endif /* WITH_WSREP */
#ifdef HAVE_LIBWRAP
{
@@ -6928,12 +7738,6 @@ struct my_option my_long_options[]=
"more than one storage engine, when binary log is disabled).",
&opt_tc_log_file, &opt_tc_log_file, 0, GET_STR,
REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
-#ifdef HAVE_MMAP
- {"log-tc-size", 0, "Size of transaction coordinator log.",
- &opt_tc_log_size, &opt_tc_log_size, 0, GET_ULONG,
- REQUIRED_ARG, TC_LOG_MIN_SIZE, TC_LOG_MIN_SIZE, (ulonglong) ULONG_MAX, 0,
- TC_LOG_PAGE_SIZE, 0},
-#endif
{"master-info-file", 0,
"The location and name of the file that remembers the master and where "
"the I/O replication thread is in the master's binlogs. Defaults to "
@@ -7117,6 +7921,13 @@ struct my_option my_long_options[]=
{"table_cache", 0, "Deprecated; use --table-open-cache instead.",
&tc_size, &tc_size, 0, GET_ULONG,
REQUIRED_ARG, TABLE_OPEN_CACHE_DEFAULT, 1, 512*1024L, 0, 1, 0},
+#ifdef WITH_WSREP
+ {"wsrep-new-cluster", 0, "Bootstrap a cluster. It works by overriding the "
+ "current value of wsrep_cluster_address. It is recommended not to add this "
+ "option to the config file as this will trigger bootstrap on every server "
+ "start.", &wsrep_new_cluster, &wsrep_new_cluster, 0, GET_BOOL, NO_ARG,
+ 0, 0, 0, 0, 0, 0},
+#endif
/* The following options exist in 5.6 but not in 10.0 */
MYSQL_TO_BE_IMPLEMENTED_OPTION("default-tmp-storage-engine"),
@@ -7968,6 +8779,21 @@ SHOW_VAR status_vars[]= {
#ifdef ENABLED_PROFILING
{"Uptime_since_flush_status",(char*) &show_flushstatustime, SHOW_SIMPLE_FUNC},
#endif
+#ifdef WITH_WSREP
+ {"wsrep_connected", (char*) &wsrep_connected, SHOW_BOOL},
+ {"wsrep_ready", (char*) &wsrep_ready, SHOW_BOOL},
+ {"wsrep_cluster_state_uuid", (char*) &wsrep_cluster_state_uuid,SHOW_CHAR_PTR},
+ {"wsrep_cluster_conf_id", (char*) &wsrep_cluster_conf_id, SHOW_LONGLONG},
+ {"wsrep_cluster_status", (char*) &wsrep_cluster_status, SHOW_CHAR_PTR},
+ {"wsrep_cluster_size", (char*) &wsrep_cluster_size, SHOW_LONG_NOFLUSH},
+ {"wsrep_local_index", (char*) &wsrep_local_index, SHOW_LONG_NOFLUSH},
+ {"wsrep_local_bf_aborts", (char*) &wsrep_show_bf_aborts, SHOW_SIMPLE_FUNC},
+ {"wsrep_provider_name", (char*) &wsrep_provider_name, SHOW_CHAR_PTR},
+ {"wsrep_provider_version", (char*) &wsrep_provider_version, SHOW_CHAR_PTR},
+ {"wsrep_provider_vendor", (char*) &wsrep_provider_vendor, SHOW_CHAR_PTR},
+ {"wsrep_thread_count", (char*) &wsrep_running_threads, SHOW_LONG_NOFLUSH},
+ {"wsrep", (char*) &wsrep_show_status, SHOW_FUNC},
+#endif
{NullS, NullS, SHOW_LONG}
};
@@ -8309,6 +9135,10 @@ static int mysql_init_variables(void)
tmpenv = DEFAULT_MYSQL_HOME;
strmake_buf(mysql_home, tmpenv);
#endif
+#ifdef WITH_WSREP
+ if (wsrep_init_vars())
+ return 1;
+#endif
return 0;
}
@@ -8559,6 +9389,14 @@ mysqld_get_one_option(int optid,
case OPT_LOWER_CASE_TABLE_NAMES:
lower_case_table_names_used= 1;
break;
+#ifdef WITH_WSREP
+ case OPT_WSREP_START_POSITION:
+ wsrep_start_position_init (argument);
+ break;
+ case OPT_WSREP_SST_AUTH:
+ wsrep_sst_auth_init (argument);
+ break;
+#endif
#if defined(ENABLED_DEBUG_SYNC)
case OPT_DEBUG_SYNC_TIMEOUT:
/*
@@ -8859,6 +9697,40 @@ static int get_options(int *argc_ptr, char ***argv_ptr)
else
global_system_variables.option_bits&= ~OPTION_BIG_SELECTS;
+#ifdef WITH_WSREP
+ if (!opt_bootstrap && WSREP_PROVIDER_EXISTS &&
+ global_system_variables.binlog_format != BINLOG_FORMAT_ROW) {
+
+ WSREP_ERROR ("Only binlog_format = 'ROW' is currently supported. "
+ "Configured value: '%s'. Please adjust your configuration.",
+ binlog_format_names[global_system_variables.binlog_format]);
+ return 1;
+ }
+
+ if (global_system_variables.wsrep_causal_reads) {
+ WSREP_WARN("option --wsrep-causal-reads is deprecated");
+ if (!(global_system_variables.wsrep_sync_wait &
+ WSREP_SYNC_WAIT_BEFORE_READ)) {
+ WSREP_WARN("--wsrep-causal-reads=ON takes precedence over --wsrep-sync-wait=%u. "
+ "WSREP_SYNC_WAIT_BEFORE_READ is on",
+ global_system_variables.wsrep_sync_wait);
+ global_system_variables.wsrep_sync_wait |= WSREP_SYNC_WAIT_BEFORE_READ;
+ } else {
+ // they are both turned on.
+ }
+ } else {
+ if (global_system_variables.wsrep_sync_wait &
+ WSREP_SYNC_WAIT_BEFORE_READ) {
+ WSREP_WARN("--wsrep-sync-wait=%u takes precedence over --wsrep-causal-reads=OFF. "
+ "WSREP_SYNC_WAIT_BEFORE_READ is on",
+ global_system_variables.wsrep_sync_wait);
+ global_system_variables.wsrep_causal_reads = 1;
+ } else {
+ // they are both turned off.
+ }
+ }
+#endif // WITH_WSREP
+
// Synchronize @@global.autocommit on --autocommit
const ulonglong turn_bit_on= opt_autocommit ?
OPTION_AUTOCOMMIT : OPTION_NOT_AUTOCOMMIT;
@@ -9024,6 +9896,9 @@ void set_server_version(void)
#ifdef EMBEDDED_LIBRARY
end= strnmov(end, "-embedded", (version_end-end));
#endif
+#ifdef WITH_WSREP
+ end= strmov(end, "-wsrep");
+#endif
#ifndef DBUG_OFF
if (!strstr(MYSQL_SERVER_SUFFIX_STR, "-debug"))
end= strnmov(end, "-debug", (version_end-end));
@@ -9252,8 +10127,6 @@ static int test_if_case_insensitive(const char *dir_name)
DBUG_PRINT("exit", ("result: %d", result));
DBUG_RETURN(result);
}
-
-
#ifndef EMBEDDED_LIBRARY
/**
@@ -9317,6 +10190,9 @@ void refresh_status(THD *thd)
/* Reset some global variables */
reset_status_vars();
+#ifdef WITH_WSREP
+ wsrep->stats_reset(wsrep);
+#endif /* WITH_WSREP */
/* Reset the counters of all key caches (default and named). */
process_key_caches(reset_key_cache_counters, 0);
diff --git a/sql/mysqld.h b/sql/mysqld.h
index 78e832e4abc..4af04a3df75 100644
--- a/sql/mysqld.h
+++ b/sql/mysqld.h
@@ -248,6 +248,10 @@ extern PSI_mutex_key key_PAGE_lock, key_LOCK_sync, key_LOCK_active,
key_LOCK_pool, key_LOCK_pending_checkpoint;
#endif /* HAVE_MMAP */
+#ifdef WITH_WSREP
+extern PSI_mutex_key key_LOCK_wsrep_thd;
+#endif /* WITH_WSREP */
+
#ifdef HAVE_OPENSSL
extern PSI_mutex_key key_LOCK_des_key_file;
#endif
@@ -604,6 +608,15 @@ enum options_mysqld
OPT_WANT_CORE,
OPT_MYSQL_COMPATIBILITY,
OPT_MYSQL_TO_BE_IMPLEMENTED,
+#ifdef WITH_WSREP
+ OPT_WSREP_PROVIDER,
+ OPT_WSREP_PROVIDER_OPTIONS,
+ OPT_WSREP_CLUSTER_ADDRESS,
+ OPT_WSREP_START_POSITION,
+ OPT_WSREP_SST_AUTH,
+ OPT_WSREP_RECOVER,
+#endif /* WITH_WSREP */
+
OPT_which_is_always_the_last
};
#endif
@@ -767,4 +780,9 @@ extern uint internal_tmp_table_max_key_segments;
extern uint volatile global_disable_checkpoint;
extern my_bool opt_help;
+#ifdef WITH_WSREP
+#include "my_pthread.h"
+pthread_handler_t start_wsrep_THD(void*);
+#endif /* WITH_WSREP */
+
#endif /* MYSQLD_INCLUDED */
diff --git a/sql/protocol.cc b/sql/protocol.cc
index 777f124f502..67fb8924764 100644
--- a/sql/protocol.cc
+++ b/sql/protocol.cc
@@ -497,6 +497,14 @@ static uchar *net_store_length_fast(uchar *packet, uint length)
void Protocol::end_statement()
{
+#ifdef WITH_WSREP
+ /*sanity check, can be removed before 1.0 release */
+ if (WSREP(thd) && thd->wsrep_conflict_state== REPLAYING)
+ {
+ WSREP_ERROR("attempting net_end_statement while replaying");
+ return;
+ }
+#endif
DBUG_ENTER("Protocol::end_statement");
DBUG_ASSERT(! thd->get_stmt_da()->is_sent());
bool error= FALSE;
diff --git a/sql/rpl_gtid.cc b/sql/rpl_gtid.cc
index 683f2492097..52cec9f0a85 100644
--- a/sql/rpl_gtid.cc
+++ b/sql/rpl_gtid.cc
@@ -560,6 +560,14 @@ rpl_slave_state::record_gtid(THD *thd, const rpl_gtid *gtid, uint64 sub_id,
if ((err= gtid_check_rpl_slave_state_table(table)))
goto end;
+#ifdef WITH_WSREP
+ /*
+ Updates in slave state table should not be appended to galera transaction
+ writeset.
+ */
+ thd->wsrep_skip_append_keys= true;
+#endif
+
if (!in_transaction)
{
DBUG_PRINT("info", ("resetting OPTION_BEGIN"));
@@ -673,6 +681,10 @@ IF_DBUG(dbug_break:, )
end:
+#ifdef WITH_WSREP
+ thd->wsrep_skip_append_keys= false;
+#endif
+
if (table_opened)
{
if (err || (err= ha_commit_trans(thd, FALSE)))
diff --git a/sql/rpl_mi.cc b/sql/rpl_mi.cc
index c810e030bf2..c23190cda2b 100644
--- a/sql/rpl_mi.cc
+++ b/sql/rpl_mi.cc
@@ -111,7 +111,13 @@ void Master_info::wait_until_free()
Master_info::~Master_info()
{
- wait_until_free();
+#ifdef WITH_WSREP
+ /*
+ Do not free "wsrep" rpl_filter. It will eventually be freed by
+ free_all_rpl_filters() when server terminates.
+ */
+ if (strncmp(connection_name.str, STRING_WITH_LEN("wsrep")))
+#endif
rpl_filters.delete_element(connection_name.str, connection_name.length,
(void (*)(const char*, uchar*)) free_rpl_filter);
my_free(connection_name.str);
diff --git a/sql/rpl_record.cc b/sql/rpl_record.cc
index 5e48cfb02e5..21b8a8028a6 100644
--- a/sql/rpl_record.cc
+++ b/sql/rpl_record.cc
@@ -308,7 +308,11 @@ unpack_row(rpl_group_info *rgi,
uint16 const metadata= tabledef->field_metadata(i);
#ifndef DBUG_OFF
uchar const *const old_pack_ptr= pack_ptr;
-#endif
+#else
+#ifdef WITH_WSREP
+ uchar const *const old_pack_ptr= pack_ptr;
+#endif /* WITH_WSREP */
+#endif /* !DBUF_OFF */
pack_ptr= f->unpack(f->ptr, pack_ptr, row_end, metadata);
DBUG_PRINT("debug", ("field: %s; metadata: 0x%x;"
" pack_ptr: 0x%lx; pack_ptr': 0x%lx; bytes: %d",
@@ -317,6 +321,20 @@ unpack_row(rpl_group_info *rgi,
(int) (pack_ptr - old_pack_ptr)));
if (!pack_ptr)
{
+#ifdef WITH_WSREP
+ /*
+ Debug message to troubleshoot bug:
+ https://mariadb.atlassian.net/browse/MDEV-4404
+ */
+ WSREP_WARN("ROW event unpack field: %s metadata: 0x%x;"
+ " pack_ptr: 0x%lx; conv_table %p conv_field %p table %s"
+ " row_end: 0x%lx",
+ f->field_name, metadata,
+ (ulong) old_pack_ptr, conv_table, conv_field,
+ (table_found) ? "found" : "not found", (ulong)row_end
+ );
+#endif /* WITH_WSREP */
+
rgi->rli->report(ERROR_LEVEL, ER_SLAVE_CORRUPT_EVENT,
rgi->gtid_info(),
"Could not read field '%s' of table '%s.%s'",
diff --git a/sql/set_var.cc b/sql/set_var.cc
index 0b9699e39f7..7ad528f0eae 100644
--- a/sql/set_var.cc
+++ b/sql/set_var.cc
@@ -556,7 +556,11 @@ int mysql_del_sys_var_chain(sys_var *first)
static int show_cmp(SHOW_VAR *a, SHOW_VAR *b)
{
+#ifdef WITH_WSREP
+ return my_strcasecmp(system_charset_info, a->name, b->name);
+#else
return strcmp(a->name, b->name);
+#endif /* WITH_WSREP */
}
diff --git a/sql/set_var.h b/sql/set_var.h
index 83ba662b76c..064da1404fa 100644
--- a/sql/set_var.h
+++ b/sql/set_var.h
@@ -247,6 +247,9 @@ public:
int check(THD *thd);
int update(THD *thd);
int light_check(THD *thd);
+#ifdef WITH_WSREP
+ int wsrep_store_variable(THD *thd);
+#endif
};
@@ -341,6 +344,9 @@ extern sys_var *Sys_autocommit_ptr;
CHARSET_INFO *get_old_charset_by_name(const char *old_name);
+#ifdef WITH_WSREP
+int sql_set_wsrep_variables(THD *thd, List<set_var_base> *var_list);
+#endif
int sys_var_init();
int sys_var_add_options(DYNAMIC_ARRAY *long_options, int parse_flags);
void sys_var_end(void);
diff --git a/sql/slave.cc b/sql/slave.cc
index 45af5fcd337..f785e4f345c 100644
--- a/sql/slave.cc
+++ b/sql/slave.cc
@@ -53,6 +53,9 @@
// Create_file_log_event,
// Format_description_log_event
+#ifdef WITH_WSREP
+#include "wsrep_mysqld.h"
+#endif
#ifdef HAVE_REPLICATION
#include "rpl_tblmap.h"
@@ -1029,8 +1032,7 @@ static bool io_slave_killed(Master_info* mi)
In the event of deffering decision @rli->last_event_start_time waiting
timer is set to force the killed status be accepted upon its expiration.
- @param thd pointer to a THD instance
- @param rli pointer to Relay_log_info instance
+ @param rgi pointer to relay_group_info instance
@return TRUE the killed status is recognized, FALSE a possible killed
status is deferred.
@@ -1367,6 +1369,10 @@ bool is_network_error(uint errorno)
errorno == ER_NET_READ_INTERRUPTED ||
errorno == ER_SERVER_SHUTDOWN)
return TRUE;
+#ifdef WITH_WSREP
+ if (errorno == ER_UNKNOWN_COM_ERROR)
+ return TRUE;
+#endif
return FALSE;
}
@@ -3310,6 +3316,17 @@ int apply_event_and_update_pos(Log_event* ev, THD* thd,
if (reason == Log_event::EVENT_SKIP_NOT)
exec_res= ev->apply_event(rgi);
+#ifdef WITH_WSREP
+ if (exec_res && thd->wsrep_conflict_state != NO_CONFLICT)
+ {
+ WSREP_DEBUG("SQL apply failed, res %d conflict state: %d",
+ exec_res, thd->wsrep_conflict_state);
+ rli->abort_slave= 1;
+ rli->report(ERROR_LEVEL, ER_UNKNOWN_COM_ERROR, rgi->gtid_info(),
+ "Node has dropped from cluster");
+ }
+#endif
+
#ifndef DBUG_OFF
/*
This only prints information to the debug trace.
@@ -3641,6 +3658,10 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli,
serial_rgi->event_relay_log_pos= rli->event_relay_log_pos;
exec_res= apply_event_and_update_pos(ev, thd, serial_rgi, NULL);
+#ifdef WITH_WSREP
+ WSREP_DEBUG("apply_event_and_update_pos() result: %d", exec_res);
+#endif /* WITH_WSREP */
+
delete_or_keep_event_post_apply(serial_rgi, typ, ev);
/*
@@ -3650,6 +3671,12 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli,
if (exec_res == 2)
DBUG_RETURN(1);
+#ifdef WITH_WSREP
+ mysql_mutex_lock(&thd->LOCK_wsrep_thd);
+ if (thd->wsrep_conflict_state == NO_CONFLICT)
+ {
+ mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
+#endif /* WITH_WSREP */
if (slave_trans_retries)
{
int temp_err;
@@ -3723,6 +3750,12 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli,
serial_rgi->trans_retries));
}
}
+#ifdef WITH_WSREP
+ } else {
+ mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
+ }
+#endif /* WITH_WSREP */
+
thread_safe_increment64(&rli->executed_entries,
&slave_executed_entries_lock);
DBUG_RETURN(exec_res);
@@ -4465,6 +4498,9 @@ pthread_handler_t handle_slave_sql(void *arg)
my_off_t saved_skip= 0;
Master_info *mi= ((Master_info*)arg);
Relay_log_info* rli = &mi->rli;
+#ifdef WITH_WSREP
+ my_bool wsrep_node_dropped= FALSE;
+#endif /* WITH_WSREP */
const char *errmsg;
rpl_group_info *serial_rgi;
rpl_sql_thread_info sql_info(mi->rpl_filter);
@@ -4472,6 +4508,9 @@ pthread_handler_t handle_slave_sql(void *arg)
// needs to call my_thread_init(), otherwise we get a coredump in DBUG_ stuff
my_thread_init();
DBUG_ENTER("handle_slave_sql");
+#ifdef WITH_WSREP
+ wsrep_restart_point:
+#endif /* WITH_WSREP */
LINT_INIT(saved_master_log_pos);
LINT_INIT(saved_log_pos);
@@ -4629,6 +4668,11 @@ pthread_handler_t handle_slave_sql(void *arg)
}
#endif
+#ifdef WITH_WSREP
+ thd->wsrep_exec_mode= LOCAL_STATE;
+ /* synchronize with wsrep replication */
+ if (WSREP_ON) wsrep_ready_wait();
+#endif
DBUG_PRINT("master_info",("log_file_name: %s position: %s",
rli->group_master_log_name,
llstr(rli->group_master_log_pos,llbuff)));
@@ -4752,10 +4796,27 @@ log '%s' at position %s, relay log '%s' position: %s%s", RPL_LOG_NAME,
if (exec_relay_log_event(thd, rli, serial_rgi))
{
+#ifdef WITH_WSREP
+ if (thd->wsrep_conflict_state != NO_CONFLICT)
+ {
+ wsrep_node_dropped= TRUE;
+ rli->abort_slave= TRUE;
+ }
+#endif /* WITH_WSREP */
+
DBUG_PRINT("info", ("exec_relay_log_event() failed"));
// do not scare the user if SQL thread was simply killed or stopped
if (!sql_slave_killed(serial_rgi))
+ {
slave_output_error_info(serial_rgi, thd);
+#ifdef WITH_WSREP
+ uint32 const last_errno= rli->last_error().number;
+ if (WSREP_ON && last_errno == ER_UNKNOWN_COM_ERROR)
+ {
+ wsrep_node_dropped= TRUE;
+ }
+#endif /* WITH_WSREP */
+ }
goto err;
}
}
@@ -4882,6 +4943,33 @@ err_during_init:
thd->rgi_fake= thd->rgi_slave= NULL;
delete serial_rgi;
mysql_mutex_unlock(&LOCK_thread_count);
+
+#ifdef WITH_WSREP
+ /*
+ If slave stopped due to node going non primary, we set global flag to
+ trigger automatic restart of slave when node joins back to cluster.
+ */
+ if (wsrep_node_dropped && wsrep_restart_slave)
+ {
+ if (wsrep_ready)
+ {
+ WSREP_INFO("Slave error due to node temporarily non-primary"
+ "SQL slave will continue");
+ wsrep_node_dropped= FALSE;
+ mysql_mutex_unlock(&rli->run_lock);
+ WSREP_DEBUG("wsrep_conflict_state now: %d", thd->wsrep_conflict_state);
+ WSREP_INFO("slave restart: %d", thd->wsrep_conflict_state);
+ thd->wsrep_conflict_state= NO_CONFLICT;
+ goto wsrep_restart_point;
+ } else {
+ WSREP_INFO("Slave error due to node going non-primary");
+ WSREP_INFO("wsrep_restart_slave was set and therefore slave will be "
+ "automatically restarted when node joins back to cluster.");
+ wsrep_restart_slave_activated= TRUE;
+ }
+ }
+#endif /* WITH_WSREP */
+
/*
Note: the order of the broadcast and unlock calls below (first broadcast, then unlock)
is important. Otherwise a killer_thread can execute between the calls and
diff --git a/sql/sp.cc b/sql/sp.cc
index 52d3c04cbdf..78de0209e6f 100644
--- a/sql/sp.cc
+++ b/sql/sp.cc
@@ -2274,3 +2274,38 @@ sp_load_for_information_schema(THD *thd, TABLE *proc_table, String *db,
thd->lex= old_lex;
return sp;
}
+#ifdef WITH_WSREP
+int wsrep_create_sp(THD *thd, uchar** buf, size_t* buf_len)
+{
+ String log_query;
+ sp_head *sp = thd->lex->sphead;
+ ulong saved_mode= thd->variables.sql_mode;
+ String retstr(64);
+ retstr.set_charset(system_charset_info);
+
+ log_query.set_charset(system_charset_info);
+
+ if (sp->m_type == TYPE_ENUM_FUNCTION)
+ {
+ sp_returns_type(thd, retstr, sp);
+ }
+
+ if (!create_string(thd, &log_query,
+ sp->m_type,
+ (sp->m_explicit_name ? sp->m_db.str : NULL),
+ (sp->m_explicit_name ? sp->m_db.length : 0),
+ sp->m_name.str, sp->m_name.length,
+ sp->m_params.str, sp->m_params.length,
+ retstr.c_ptr(), retstr.length(),
+ sp->m_body.str, sp->m_body.length,
+ sp->m_chistics, &(thd->lex->definer->user),
+ &(thd->lex->definer->host),
+ saved_mode))
+ {
+ WSREP_WARN("SP create string failed: schema: %s, query: %s",
+ (thd->db ? thd->db : "(null)"), thd->query());
+ return 1;
+ }
+ return wsrep_to_buf_helper(thd, log_query.ptr(), log_query.length(), buf, buf_len);
+}
+#endif /* WITH_WSREP */
diff --git a/sql/sql_acl.cc b/sql/sql_acl.cc
index c7e47c84db0..3ed1a687c5a 100644
--- a/sql/sql_acl.cc
+++ b/sql/sql_acl.cc
@@ -2553,8 +2553,15 @@ int check_change_password(THD *thd, const char *host, const char *user,
my_error(ER_OPTION_PREVENTS_STATEMENT, MYF(0), "--skip-grant-tables");
return(1);
}
+
+#ifdef WITH_WSREP
+ if ((!WSREP(thd) || !thd->wsrep_applier) &&
+ !thd->slave_thread && !thd->security_ctx->priv_user[0] &&
+ !in_bootstrap)
+#else
if (!thd->slave_thread && !thd->security_ctx->priv_user[0] &&
!in_bootstrap)
+#endif /* WITH_WSREP */
{
my_message(ER_PASSWORD_ANONYMOUS_USER, ER(ER_PASSWORD_ANONYMOUS_USER),
MYF(0));
@@ -2565,7 +2572,11 @@ int check_change_password(THD *thd, const char *host, const char *user,
my_error(ER_PASSWORD_NO_MATCH, MYF(0));
return 1;
}
+
if (!thd->slave_thread &&
+#ifdef WITH_WSREP
+ (!WSREP(thd) || !thd->wsrep_applier) &&
+#endif /* WITH_WSREP */
(strcmp(thd->security_ctx->priv_user, user) ||
my_strcasecmp(system_charset_info, host,
thd->security_ctx->priv_host)))
@@ -2604,11 +2615,14 @@ bool change_password(THD *thd, const char *host, const char *user,
Rpl_filter *rpl_filter;
/* Buffer should be extended when password length is extended. */
char buff[512];
- ulong query_length;
+ ulong query_length=0;
enum_binlog_format save_binlog_format;
uint new_password_len= (uint) strlen(new_password);
bool result= 1;
bool use_salt= 0;
+#ifdef WITH_WSREP
+ const CSET_STRING query_save = thd->query_string;
+#endif /* WITH_WSREP */
DBUG_ENTER("change_password");
DBUG_PRINT("enter",("host: '%s' user: '%s' new_password: '%s'",
host,user,new_password));
@@ -2616,6 +2630,18 @@ bool change_password(THD *thd, const char *host, const char *user,
if (check_change_password(thd, host, user, new_password, new_password_len))
DBUG_RETURN(1);
+#ifdef WITH_WSREP
+ if (WSREP(thd) && !thd->wsrep_applier)
+ {
+ query_length= sprintf(buff, "SET PASSWORD FOR '%-.120s'@'%-.120s'='%-.120s'",
+ user ? user : "",
+ host ? host : "",
+ new_password);
+ thd->set_query_inner(buff, query_length, system_charset_info);
+
+ WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, (char*)"user", NULL);
+ }
+#endif /* WITH_WSREP */
tables.init_one_table("mysql", 5, "user", 4, "user", TL_WRITE);
@@ -2697,9 +2723,23 @@ bool change_password(THD *thd, const char *host, const char *user,
}
end:
close_mysql_tables(thd);
+#ifdef WITH_WSREP
+ if (WSREP(thd) && !thd->wsrep_applier)
+ {
+ WSREP_TO_ISOLATION_END;
+
+ thd->query_string = query_save;
+ thd->wsrep_exec_mode = LOCAL_STATE;
+ }
+#endif /* WITH_WSREP */
thd->restore_stmt_binlog_format(save_binlog_format);
DBUG_RETURN(result);
+#ifdef WITH_WSREP
+ error:
+ WSREP_ERROR("Replication of SET PASSWORD failed: %s", buff);
+ DBUG_RETURN(result);
+#endif /* WITH_WSREP */
}
diff --git a/sql/sql_admin.cc b/sql/sql_admin.cc
index 2cb7b596473..e1c66bceedc 100644
--- a/sql/sql_admin.cc
+++ b/sql/sql_admin.cc
@@ -1194,6 +1194,7 @@ bool Sql_cmd_analyze_table::execute(THD *thd)
if (check_table_access(thd, SELECT_ACL | INSERT_ACL, first_table,
FALSE, UINT_MAX, FALSE))
goto error;
+ WSREP_TO_ISOLATION_BEGIN_WRTCHK(NULL, NULL, first_table);
thd->enable_slow_log= opt_log_slow_admin_statements;
res= mysql_admin_table(thd, first_table, &m_lex->check_opt,
"analyze", lock_type, 1, 0, 0, 0,
@@ -1249,6 +1250,7 @@ bool Sql_cmd_optimize_table::execute(THD *thd)
if (check_table_access(thd, SELECT_ACL | INSERT_ACL, first_table,
FALSE, UINT_MAX, FALSE))
goto error; /* purecov: inspected */
+ WSREP_TO_ISOLATION_BEGIN_WRTCHK(NULL, NULL, first_table);
thd->enable_slow_log= opt_log_slow_admin_statements;
res= (specialflag & SPECIAL_NO_NEW_FUNC) ?
mysql_recreate_table(thd, first_table, true) :
@@ -1282,6 +1284,7 @@ bool Sql_cmd_repair_table::execute(THD *thd)
FALSE, UINT_MAX, FALSE))
goto error; /* purecov: inspected */
thd->enable_slow_log= opt_log_slow_admin_statements;
+ WSREP_TO_ISOLATION_BEGIN_WRTCHK(NULL, NULL, first_table);
res= mysql_admin_table(thd, first_table, &m_lex->check_opt, "repair",
TL_WRITE, 1,
MY_TEST(m_lex->check_opt.sql_flags & TT_USEFRM),
diff --git a/sql/sql_alter.cc b/sql/sql_alter.cc
index 97b9c127c22..a39f07ae35d 100644
--- a/sql/sql_alter.cc
+++ b/sql/sql_alter.cc
@@ -18,7 +18,9 @@
// mysql_exchange_partition
#include "sql_base.h" // open_temporary_tables
#include "sql_alter.h"
-
+#ifdef WITH_WSREP
+#include "wsrep_mysqld.h"
+#endif /* WITH_WSREP */
Alter_info::Alter_info(const Alter_info &rhs, MEM_ROOT *mem_root)
:drop_list(rhs.drop_list, mem_root),
alter_list(rhs.alter_list, mem_root),
@@ -303,6 +305,17 @@ bool Sql_cmd_alter_table::execute(THD *thd)
thd->enable_slow_log= opt_log_slow_admin_statements;
+#ifdef WITH_WSREP
+ TABLE *find_temporary_table(THD *thd, const TABLE_LIST *tl);
+
+ if ((!thd->is_current_stmt_binlog_format_row() ||
+ !find_temporary_table(thd, first_table)))
+ {
+ WSREP_TO_ISOLATION_BEGIN(((lex->name.str) ? select_lex->db : NULL),
+ ((lex->name.str) ? lex->name.str : NULL),
+ first_table);
+ }
+#endif /* WITH_WSREP */
result= mysql_alter_table(thd, select_lex->db, lex->name.str,
&create_info,
first_table,
@@ -312,6 +325,12 @@ bool Sql_cmd_alter_table::execute(THD *thd)
lex->ignore);
DBUG_RETURN(result);
+
+#ifdef WITH_WSREP
+error:
+ WSREP_WARN("ALTER TABLE isolation failure");
+ DBUG_RETURN(TRUE);
+#endif /* WITH_WSREP */
}
bool Sql_cmd_discard_import_tablespace::execute(THD *thd)
diff --git a/sql/sql_base.cc b/sql/sql_base.cc
index 73f71a07f15..2c7f3147901 100644
--- a/sql/sql_base.cc
+++ b/sql/sql_base.cc
@@ -62,6 +62,10 @@
#include <io.h>
#endif
+#ifdef WITH_WSREP
+#include "wsrep_mysqld.h"
+#include "wsrep_thd.h"
+#endif // WITH_WSREP
bool
No_such_table_error_handler::handle_condition(THD *,
@@ -3667,7 +3671,7 @@ thr_lock_type read_lock_type_for_table(THD *thd,
*/
bool log_on= mysql_bin_log.is_open() && thd->variables.sql_log_bin;
ulong binlog_format= thd->variables.binlog_format;
- if ((log_on == FALSE) || (binlog_format == BINLOG_FORMAT_ROW) ||
+ if ((log_on == FALSE) || (WSREP_FORMAT(binlog_format) == BINLOG_FORMAT_ROW) ||
(table_list->table->s->table_category == TABLE_CATEGORY_LOG) ||
(table_list->table->s->table_category == TABLE_CATEGORY_PERFORMANCE) ||
!(is_update_query(prelocking_ctx->sql_command) ||
@@ -4715,9 +4719,38 @@ restart:
tbl->reginfo.lock_type= tables->lock_type;
}
}
+#ifdef WITH_WSREP
+ if (wsrep_replicate_myisam &&
+ (*start) &&
+ (*start)->table &&
+ (*start)->table->file->ht->db_type == DB_TYPE_MYISAM &&
+ thd->get_command() != COM_STMT_PREPARE &&
+ ((thd->lex->sql_command == SQLCOM_INSERT ||
+ thd->lex->sql_command == SQLCOM_INSERT_SELECT ||
+ thd->lex->sql_command == SQLCOM_REPLACE ||
+ thd->lex->sql_command == SQLCOM_REPLACE_SELECT ||
+ thd->lex->sql_command == SQLCOM_UPDATE ||
+ thd->lex->sql_command == SQLCOM_UPDATE_MULTI ||
+ thd->lex->sql_command == SQLCOM_LOAD ||
+ thd->lex->sql_command == SQLCOM_DELETE)))
+ {
+ WSREP_TO_ISOLATION_BEGIN(NULL, NULL, (*start));
+ }
+ error:
+#endif
err:
THD_STAGE_INFO(thd, stage_after_opening_tables);
+
+#ifdef WITH_WSREP
+ if (WSREP(thd))
+ thd_proc_info(thd, "exit open_tables()");
+ else
+ thd_proc_info(thd, 0);
+#else /* WITH_WSREP */
+ thd_proc_info(thd, 0);
+#endif /* WITH_WSREP */
+
free_root(&new_frm_mem, MYF(0)); // Free pre-alloced block
if (error && *table_to_open)
@@ -5183,7 +5216,18 @@ end:
trans_rollback_stmt(thd);
close_thread_tables(thd);
}
+
THD_STAGE_INFO(thd, stage_after_opening_tables);
+
+#ifdef WITH_WSREP
+ if (WSREP(thd))
+ thd_proc_info(thd, "End opening table");
+ else
+ thd_proc_info(thd, 0);
+#else /* WITH_WSREP */
+ thd_proc_info(thd, 0);
+#endif /* WITH_WSREP */
+
DBUG_RETURN(table);
}
@@ -9051,7 +9095,19 @@ bool mysql_notify_thread_having_shared_lock(THD *thd, THD *in_use,
(e.g. see partitioning code).
*/
if (!thd_table->needs_reopen())
+#ifdef WITH_WSREP
+ {
+ signalled|= mysql_lock_abort_for_thread(thd, thd_table);
+ if (thd && WSREP(thd) && wsrep_thd_is_BF((void *)thd, true))
+ {
+ WSREP_DEBUG("remove_table_from_cache: %llu",
+ (unsigned long long) thd->real_id);
+ wsrep_abort_thd((void *)thd, (void *)in_use, FALSE);
+ }
+ }
+#else
signalled|= mysql_lock_abort_for_thread(thd, thd_table);
+#endif
}
mysql_mutex_unlock(&in_use->LOCK_thd_data);
}
diff --git a/sql/sql_builtin.cc.in b/sql/sql_builtin.cc.in
index 63850650ac9..2de475b0a76 100644
--- a/sql/sql_builtin.cc.in
+++ b/sql/sql_builtin.cc.in
@@ -25,7 +25,11 @@ extern
#endif
builtin_maria_plugin
@mysql_mandatory_plugins@ @mysql_optional_plugins@
- builtin_maria_binlog_plugin, builtin_maria_mysql_password_plugin;
+ builtin_maria_binlog_plugin,
+#ifdef WITH_WSREP
+ builtin_wsrep_plugin@mysql_plugin_defs@,
+#endif /* WITH_WSREP */
+ builtin_maria_mysql_password_plugin;
struct st_maria_plugin *mysql_optional_plugins[]=
{
@@ -35,5 +39,8 @@ struct st_maria_plugin *mysql_optional_plugins[]=
struct st_maria_plugin *mysql_mandatory_plugins[]=
{
builtin_maria_binlog_plugin, builtin_maria_mysql_password_plugin,
+#ifdef WITH_WSREP
+ builtin_wsrep_plugin@mysql_plugin_defs@,
+#endif /* WITH_WSREP */
@mysql_mandatory_plugins@ 0
};
diff --git a/sql/sql_cache.cc b/sql/sql_cache.cc
index e1efb1e85d6..2e9e90346fb 100644
--- a/sql/sql_cache.cc
+++ b/sql/sql_cache.cc
@@ -1945,6 +1945,13 @@ def_week_frmt: %lu, in_trans: %d, autocommit: %d",
(int)flags.autocommit));
memcpy((uchar *)(sql + (tot_length - QUERY_CACHE_FLAGS_SIZE)),
(uchar*) &flags, QUERY_CACHE_FLAGS_SIZE);
+
+#ifdef WITH_WSREP
+ bool once_more;
+ once_more= true;
+lookup:
+#endif /* WITH_WSREP */
+
query_block = (Query_cache_block *) my_hash_search(&queries, (uchar*) sql,
tot_length);
/* Quick abort on unlocked data */
@@ -1957,6 +1964,19 @@ def_week_frmt: %lu, in_trans: %d, autocommit: %d",
}
DBUG_PRINT("qcache", ("Query in query hash 0x%lx", (ulong)query_block));
+#ifdef WITH_WSREP
+ if (once_more && WSREP_CLIENT(thd) && wsrep_must_sync_wait(thd))
+ {
+ unlock();
+ if (wsrep_sync_wait(thd))
+ goto err;
+ if (try_lock(thd, Query_cache::TIMEOUT))
+ goto err;
+ once_more= false;
+ goto lookup;
+ }
+#endif /* WITH_WSREP */
+
/* Now lock and test that nothing changed while blocks was unlocked */
BLOCK_LOCK_RD(query_block);
diff --git a/sql/sql_class.cc b/sql/sql_class.cc
index da1a0e43ac1..f0543becc0c 100644
--- a/sql/sql_class.cc
+++ b/sql/sql_class.cc
@@ -63,6 +63,10 @@
#include "sql_parse.h" // is_update_query
#include "sql_callback.h"
#include "lock.h"
+#ifdef WITH_WSREP
+#include "wsrep_mysqld.h"
+#include "wsrep_thd.h"
+#endif
#include "sql_connect.h"
/*
@@ -813,6 +817,180 @@ char *thd_get_error_context_description(THD *thd, char *buffer,
return buffer;
}
+#ifdef WITH_WSREP
+extern int wsrep_on(void *thd)
+{
+ return (int)(WSREP(((THD*)thd)));
+}
+extern "C" bool wsrep_thd_is_wsrep_on(THD *thd)
+{
+ return thd->variables.wsrep_on;
+}
+
+extern "C" bool wsrep_consistency_check(void *thd)
+{
+ return ((THD*)thd)->wsrep_consistency_check == CONSISTENCY_CHECK_RUNNING;
+}
+
+extern "C" void wsrep_thd_set_exec_mode(THD *thd, enum wsrep_exec_mode mode)
+{
+ thd->wsrep_exec_mode= mode;
+}
+extern "C" void wsrep_thd_set_query_state(
+ THD *thd, enum wsrep_query_state state)
+{
+ thd->wsrep_query_state= state;
+}
+extern "C" void wsrep_thd_set_conflict_state(
+ THD *thd, enum wsrep_conflict_state state)
+{
+ if (WSREP(thd)) thd->wsrep_conflict_state= state;
+}
+
+
+extern "C" enum wsrep_exec_mode wsrep_thd_exec_mode(THD *thd)
+{
+ return thd->wsrep_exec_mode;
+}
+
+extern "C" const char *wsrep_thd_exec_mode_str(THD *thd)
+{
+ return
+ (!thd) ? "void" :
+ (thd->wsrep_exec_mode == LOCAL_STATE) ? "local" :
+ (thd->wsrep_exec_mode == REPL_RECV) ? "applier" :
+ (thd->wsrep_exec_mode == TOTAL_ORDER) ? "total order" :
+ (thd->wsrep_exec_mode == LOCAL_COMMIT) ? "local commit" : "void";
+}
+
+extern "C" enum wsrep_query_state wsrep_thd_query_state(THD *thd)
+{
+ return thd->wsrep_query_state;
+}
+
+extern "C" const char *wsrep_thd_query_state_str(THD *thd)
+{
+ return
+ (!thd) ? "void" :
+ (thd->wsrep_query_state == QUERY_IDLE) ? "idle" :
+ (thd->wsrep_query_state == QUERY_EXEC) ? "executing" :
+ (thd->wsrep_query_state == QUERY_COMMITTING) ? "committing" :
+ (thd->wsrep_query_state == QUERY_EXITING) ? "exiting" :
+ (thd->wsrep_query_state == QUERY_ROLLINGBACK) ? "rolling back" : "void";
+}
+
+extern "C" enum wsrep_conflict_state wsrep_thd_conflict_state(THD *thd)
+{
+ return thd->wsrep_conflict_state;
+}
+extern "C" const char *wsrep_thd_conflict_state_str(THD *thd)
+{
+ return
+ (!thd) ? "void" :
+ (thd->wsrep_conflict_state == NO_CONFLICT) ? "no conflict" :
+ (thd->wsrep_conflict_state == MUST_ABORT) ? "must abort" :
+ (thd->wsrep_conflict_state == ABORTING) ? "aborting" :
+ (thd->wsrep_conflict_state == MUST_REPLAY) ? "must replay" :
+ (thd->wsrep_conflict_state == REPLAYING) ? "replaying" :
+ (thd->wsrep_conflict_state == RETRY_AUTOCOMMIT) ? "retrying" :
+ (thd->wsrep_conflict_state == CERT_FAILURE) ? "cert failure" : "void";
+}
+
+extern "C" wsrep_ws_handle_t* wsrep_thd_ws_handle(THD *thd)
+{
+ return &thd->wsrep_ws_handle;
+}
+
+extern "C" void wsrep_thd_LOCK(THD *thd)
+{
+ mysql_mutex_lock(&thd->LOCK_wsrep_thd);
+}
+extern "C" void wsrep_thd_UNLOCK(THD *thd)
+{
+ mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
+}
+extern "C" time_t wsrep_thd_query_start(THD *thd)
+{
+ return thd->query_start();
+}
+extern "C" uint32 wsrep_thd_wsrep_rand(THD *thd)
+{
+ return thd->wsrep_rand;
+}
+extern "C" my_thread_id wsrep_thd_thread_id(THD *thd)
+{
+ return thd->thread_id;
+}
+extern "C" wsrep_seqno_t wsrep_thd_trx_seqno(THD *thd)
+{
+ return (thd) ? thd->wsrep_trx_meta.gtid.seqno : WSREP_SEQNO_UNDEFINED;
+}
+extern "C" query_id_t wsrep_thd_query_id(THD *thd)
+{
+ return thd->query_id;
+}
+extern "C" char *wsrep_thd_query(THD *thd)
+{
+ return (thd) ? thd->query() : NULL;
+}
+extern "C" query_id_t wsrep_thd_wsrep_last_query_id(THD *thd)
+{
+ return thd->wsrep_last_query_id;
+}
+extern "C" void wsrep_thd_set_wsrep_last_query_id(THD *thd, query_id_t id)
+{
+ thd->wsrep_last_query_id= id;
+}
+extern "C" void wsrep_thd_awake(THD *thd, my_bool signal)
+{
+ if (signal)
+ {
+ mysql_mutex_lock(&thd->LOCK_thd_data);
+ thd->awake(KILL_QUERY);
+ mysql_mutex_unlock(&thd->LOCK_thd_data);
+ }
+ else
+ {
+ mysql_mutex_lock(&LOCK_wsrep_replaying);
+ mysql_cond_broadcast(&COND_wsrep_replaying);
+ mysql_mutex_unlock(&LOCK_wsrep_replaying);
+ }
+}
+extern "C" int wsrep_thd_retry_counter(THD *thd)
+{
+ return(thd->wsrep_retry_counter);
+}
+extern "C" bool wsrep_thd_skip_append_keys(THD *thd)
+{
+ return thd->wsrep_skip_append_keys;
+}
+
+extern int
+wsrep_trx_order_before(void *thd1, void *thd2)
+{
+ if (wsrep_thd_trx_seqno((THD*)thd1) < wsrep_thd_trx_seqno((THD*)thd2)) {
+ WSREP_DEBUG("BF conflict, order: %lld %lld\n",
+ (long long)wsrep_thd_trx_seqno((THD*)thd1),
+ (long long)wsrep_thd_trx_seqno((THD*)thd2));
+ return 1;
+ }
+ WSREP_DEBUG("waiting for BF, trx order: %lld %lld\n",
+ (long long)wsrep_thd_trx_seqno((THD*)thd1),
+ (long long)wsrep_thd_trx_seqno((THD*)thd2));
+ return 0;
+}
+extern "C" int
+wsrep_trx_is_aborting(void *thd_ptr)
+{
+ if (thd_ptr) {
+ if ((((THD *)thd_ptr)->wsrep_conflict_state == MUST_ABORT) ||
+ (((THD *)thd_ptr)->wsrep_conflict_state == ABORTING)) {
+ return 1;
+ }
+ }
+ return 0;
+}
+#endif
#if MARIA_PLUGIN_INTERFACE_VERSION < 0x0200
/**
@@ -856,7 +1034,11 @@ bool Drop_table_error_handler::handle_condition(THD *thd,
}
+#ifdef WITH_WSREP
+THD::THD(bool is_applier)
+#else
THD::THD()
+#endif
:Statement(&main_lex, &main_mem_root, STMT_CONVENTIONAL_EXECUTION,
/* statement id */ 0),
rli_fake(0), rgi_fake(0), rgi_slave(NULL),
@@ -892,8 +1074,20 @@ THD::THD()
debug_sync_control(0),
#endif /* defined(ENABLED_DEBUG_SYNC) */
wait_for_commit_ptr(0),
- main_da(0, false, false),
+ main_da(0, false, false),
m_stmt_da(&main_da)
+#ifdef WITH_WSREP
+ ,
+ wsrep_applier(is_applier),
+ wsrep_applier_closing(FALSE),
+ wsrep_client_thread(0),
+ wsrep_po_handle(WSREP_PO_INITIALIZER),
+ wsrep_po_cnt(0),
+ wsrep_po_in_trans(FALSE),
+ wsrep_apply_format(0),
+ wsrep_apply_toi(false),
+ wsrep_skip_append_keys(false)
+#endif
{
ulong tmp;
@@ -1003,6 +1197,23 @@ THD::THD()
m_command=COM_CONNECT;
*scramble= '\0';
+#ifdef WITH_WSREP
+ mysql_mutex_init(key_LOCK_wsrep_thd, &LOCK_wsrep_thd, MY_MUTEX_INIT_FAST);
+ wsrep_ws_handle.trx_id = WSREP_UNDEFINED_TRX_ID;
+ wsrep_ws_handle.opaque = NULL;
+ wsrep_retry_counter = 0;
+ wsrep_PA_safe = true;
+ wsrep_retry_query = NULL;
+ wsrep_retry_query_len = 0;
+ wsrep_retry_command = COM_CONNECT;
+ wsrep_consistency_check = NO_CONSISTENCY_CHECK;
+ wsrep_status_vars = 0;
+ wsrep_mysql_replicated = 0;
+ wsrep_TOI_pre_query = NULL;
+ wsrep_TOI_pre_query_len = 0;
+ wsrep_sync_wait_gtid = WSREP_GTID_UNDEFINED;
+ wsrep_affected_rows = 0;
+#endif
/* Call to init() below requires fully initialized Open_tables_state. */
reset_open_tables_state(this);
@@ -1042,6 +1253,13 @@ THD::THD()
my_rnd_init(&rand, tmp + (ulong) &rand, tmp + (ulong) ::global_query_id);
substitute_null_with_insert_id = FALSE;
thr_lock_info_init(&lock_info); /* safety: will be reset after start */
+#ifdef WITH_WSREP
+ lock_info.mysql_thd= (void *)this;
+ lock_info.in_lock_tables= false;
+#ifdef WSREP_PROC_INFO
+ wsrep_info[sizeof(wsrep_info) - 1] = '\0'; /* make sure it is 0-terminated */
+#endif /* WSREP_PROC_INFO */
+#endif /* WITH_WSREP */
m_token_array= NULL;
if (max_digest_length > 0)
@@ -1396,6 +1614,32 @@ void THD::init(void)
last_commit_gtid.seq_no= 0;
status_in_global= 0;
+#ifdef WITH_WSREP
+ wsrep_exec_mode= wsrep_applier ? REPL_RECV : LOCAL_STATE;
+ wsrep_conflict_state= NO_CONFLICT;
+ wsrep_query_state= QUERY_IDLE;
+ wsrep_last_query_id= 0;
+ wsrep_trx_meta.gtid= WSREP_GTID_UNDEFINED;
+ wsrep_trx_meta.depends_on= WSREP_SEQNO_UNDEFINED;
+ wsrep_converted_lock_session= false;
+ wsrep_retry_counter= 0;
+ wsrep_rgi= NULL;
+ wsrep_PA_safe= true;
+ wsrep_consistency_check = NO_CONSISTENCY_CHECK;
+ wsrep_mysql_replicated = 0;
+ wsrep_TOI_pre_query = NULL;
+ wsrep_TOI_pre_query_len = 0;
+ wsrep_sync_wait_gtid = WSREP_GTID_UNDEFINED;
+ wsrep_affected_rows = 0;
+
+ /*
+ @@wsrep_causal_reads is now being handled via wsrep_sync_wait, update it
+ appropriately.
+ */
+ if (variables.wsrep_causal_reads)
+ variables.wsrep_sync_wait|= WSREP_SYNC_WAIT_BEFORE_READ;
+#endif /* WITH_WSREP */
+
if (variables.sql_log_bin)
variables.option_bits|= OPTION_BIN_LOG;
else
@@ -1591,6 +1835,13 @@ THD::~THD()
mysql_mutex_lock(&LOCK_thd_data);
mysql_mutex_unlock(&LOCK_thd_data);
+#ifdef WITH_WSREP
+ mysql_mutex_lock(&LOCK_wsrep_thd);
+ mysql_mutex_unlock(&LOCK_wsrep_thd);
+ mysql_mutex_destroy(&LOCK_wsrep_thd);
+ if (wsrep_rgi) delete wsrep_rgi;
+ wsrep_free_status(this);
+#endif
/* Close connection */
#ifndef EMBEDDED_LIBRARY
if (net.vio)
@@ -1772,6 +2023,9 @@ void THD::awake(killed_state state_to_set)
/* Interrupt target waiting inside a storage engine. */
if (state_to_set != NOT_KILLED)
+#ifdef WITH_WSREP
+ /* TODO: prevent applier close here */
+#endif /* WITH_WSREP */
ha_kill_query(this, thd_kill_level(this));
/* Broadcast a condition to kick the target if it is waiting on it. */
@@ -1920,7 +2174,19 @@ bool THD::notify_shared_lock(MDL_context_owner *ctx_in_use,
thread can see those instances (e.g. see partitioning code).
*/
if (!thd_table->needs_reopen())
+#ifdef WITH_WSREP
+ {
signalled|= mysql_lock_abort_for_thread(this, thd_table);
+ if (WSREP(this) && wsrep_thd_is_BF((void *)this, FALSE))
+ {
+ WSREP_DEBUG("remove_table_from_cache: %llu",
+ (unsigned long long) this->real_id);
+ wsrep_abort_thd((void *)this, (void *)in_use, FALSE);
+ }
+ }
+#else
+ signalled|= mysql_lock_abort_for_thread(this, thd_table);
+#endif
}
}
mysql_mutex_unlock(&in_use->LOCK_thd_data);
@@ -2103,12 +2369,25 @@ void THD::cleanup_after_query()
/* reset table map for multi-table update */
table_map_for_update= 0;
m_binlog_invoker= INVOKER_NONE;
+#ifdef WITH_WSREP
+ if (TOTAL_ORDER == wsrep_exec_mode)
+ {
+ wsrep_exec_mode = LOCAL_STATE;
+ }
+ //wsrep_trx_seqno = 0;
+#endif /* WITH_WSREP */
#ifndef EMBEDDED_LIBRARY
if (rgi_slave)
rgi_slave->cleanup_after_query();
#endif
+#ifdef WITH_WSREP
+ wsrep_sync_wait_gtid= WSREP_GTID_UNDEFINED;
+ if (!in_active_multi_stmt_transaction())
+ wsrep_affected_rows= 0;
+#endif /* WITH_WSREP */
+
DBUG_VOID_RETURN;
}
@@ -2508,6 +2787,13 @@ bool sql_exchange::escaped_given(void)
bool select_send::send_result_set_metadata(List<Item> &list, uint flags)
{
bool res;
+#ifdef WITH_WSREP
+ if (WSREP(thd) && thd->wsrep_retry_query)
+ {
+ WSREP_DEBUG("skipping select metadata");
+ return FALSE;
+ }
+#endif /* WITH_WSREP */
if (!(res= thd->protocol->send_result_set_metadata(&list, flags)))
is_result_set_started= 1;
return res;
@@ -4485,8 +4771,13 @@ extern "C" int thd_non_transactional_update(const MYSQL_THD thd)
extern "C" int thd_binlog_format(const MYSQL_THD thd)
{
+#ifdef WITH_WSREP
+ if (((WSREP(thd) && wsrep_emulate_bin_log) || mysql_bin_log.is_open()) &&
+ (thd->variables.option_bits & OPTION_BIN_LOG))
+#else
if (mysql_bin_log.is_open() && (thd->variables.option_bits & OPTION_BIN_LOG))
- return (int) thd->variables.binlog_format;
+#endif
+ return (int) WSREP_FORMAT(thd->variables.binlog_format);
else
return BINLOG_FORMAT_UNSPEC;
}
@@ -4965,7 +5256,11 @@ void THD::get_definer(LEX_USER *definer, bool role)
{
binlog_invoker(role);
#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION)
+#ifdef WITH_WSREP
+ if ((wsrep_applier || slave_thread) && has_invoker())
+#else
if (slave_thread && has_invoker())
+#endif
{
definer->user = invoker_user;
definer->host= invoker_host;
@@ -5315,7 +5610,7 @@ int THD::decide_logging_format(TABLE_LIST *tables)
binlog by filtering rules.
*/
if (mysql_bin_log.is_open() && (variables.option_bits & OPTION_BIN_LOG) &&
- !(variables.binlog_format == BINLOG_FORMAT_STMT &&
+ !(WSREP_FORMAT(variables.binlog_format) == BINLOG_FORMAT_STMT &&
!binlog_filter->db_ok(db)))
{
/*
@@ -5552,7 +5847,7 @@ int THD::decide_logging_format(TABLE_LIST *tables)
*/
my_error((error= ER_BINLOG_ROW_INJECTION_AND_STMT_ENGINE), MYF(0));
}
- else if (variables.binlog_format == BINLOG_FORMAT_ROW &&
+ else if (WSREP_FORMAT(variables.binlog_format) == BINLOG_FORMAT_ROW &&
sqlcom_can_generate_row_events(this))
{
/*
@@ -5581,7 +5876,7 @@ int THD::decide_logging_format(TABLE_LIST *tables)
else
{
/* binlog_format = STATEMENT */
- if (variables.binlog_format == BINLOG_FORMAT_STMT)
+ if (WSREP_FORMAT(variables.binlog_format) == BINLOG_FORMAT_STMT)
{
if (lex->is_stmt_row_injection())
{
@@ -5598,7 +5893,14 @@ int THD::decide_logging_format(TABLE_LIST *tables)
5. Error: Cannot modify table that uses a storage engine
limited to row-logging when binlog_format = STATEMENT
*/
+#ifdef WITH_WSREP
+ if (!WSREP(this) || wsrep_exec_mode == LOCAL_STATE)
+ {
+#endif /* WITH_WSREP */
my_error((error= ER_BINLOG_STMT_MODE_AND_ROW_ENGINE), MYF(0), "");
+#ifdef WITH_WSREP
+ }
+#endif /* WITH_WSREP */
}
else if (is_write && (unsafe_flags= lex->get_stmt_unsafe_flags()) != 0)
{
@@ -5710,7 +6012,7 @@ int THD::decide_logging_format(TABLE_LIST *tables)
"and binlog_filter->db_ok(db) = %d",
mysql_bin_log.is_open(),
(variables.option_bits & OPTION_BIN_LOG),
- variables.binlog_format,
+ WSREP_FORMAT(variables.binlog_format),
binlog_filter->db_ok(db)));
#endif
@@ -5947,7 +6249,13 @@ int THD::binlog_write_row(TABLE* table, bool is_trans,
MY_BITMAP const* cols, size_t colcnt,
uchar const *record)
{
+#ifdef WITH_WSREP
+ DBUG_ASSERT(is_current_stmt_binlog_format_row() &&
+ ((WSREP(this) && wsrep_emulate_bin_log) ||
+ mysql_bin_log.is_open()));
+#else
DBUG_ASSERT(is_current_stmt_binlog_format_row() && mysql_bin_log.is_open());
+#endif
/*
Pack records into format for transfer. We are allocating more
@@ -5981,7 +6289,13 @@ int THD::binlog_update_row(TABLE* table, bool is_trans,
const uchar *before_record,
const uchar *after_record)
{
+#ifdef WITH_WSREP
+ DBUG_ASSERT(is_current_stmt_binlog_format_row() &&
+ ((WSREP(this) && wsrep_emulate_bin_log)
+ || mysql_bin_log.is_open()));
+#else
DBUG_ASSERT(is_current_stmt_binlog_format_row() && mysql_bin_log.is_open());
+#endif
size_t const before_maxlen = max_row_length(table, before_record);
size_t const after_maxlen = max_row_length(table, after_record);
@@ -6030,7 +6344,13 @@ int THD::binlog_delete_row(TABLE* table, bool is_trans,
MY_BITMAP const* cols, size_t colcnt,
uchar const *record)
{
+#ifdef WITH_WSREP
+ DBUG_ASSERT(is_current_stmt_binlog_format_row() &&
+ ((WSREP(this) && wsrep_emulate_bin_log)
+ || mysql_bin_log.is_open()));
+#else
DBUG_ASSERT(is_current_stmt_binlog_format_row() && mysql_bin_log.is_open());
+#endif
/*
Pack records into format for transfer. We are allocating more
@@ -6065,7 +6385,11 @@ int THD::binlog_remove_pending_rows_event(bool clear_maps,
{
DBUG_ENTER("THD::binlog_remove_pending_rows_event");
+#ifdef WITH_WSREP
+ if (!(WSREP_EMULATE_BINLOG(this) || mysql_bin_log.is_open()))
+#else
if (!mysql_bin_log.is_open())
+#endif
DBUG_RETURN(0);
/* Ensure that all events in a GTID group are in the same cache */
@@ -6088,7 +6412,11 @@ int THD::binlog_flush_pending_rows_event(bool stmt_end, bool is_transactional)
mode: it might be the case that we left row-based mode before
flushing anything (e.g., if we have explicitly locked tables).
*/
+#ifdef WITH_WSREP
+ if (!(WSREP_EMULATE_BINLOG(this) || mysql_bin_log.is_open()))
+#else
if (!mysql_bin_log.is_open())
+#endif
DBUG_RETURN(0);
/* Ensure that all events in a GTID group are in the same cache */
@@ -6350,7 +6678,12 @@ int THD::binlog_query(THD::enum_binlog_query_type qtype, char const *query_arg,
DBUG_ENTER("THD::binlog_query");
DBUG_PRINT("enter", ("qtype: %s query: '%-.*s'",
show_query_type(qtype), (int) query_len, query_arg));
+#ifdef WITH_WSREP
+ DBUG_ASSERT(query_arg && (WSREP_EMULATE_BINLOG(this)
+ || mysql_bin_log.is_open()));
+#else
DBUG_ASSERT(query_arg && mysql_bin_log.is_open());
+#endif
/* If this is withing a BEGIN ... COMMIT group, don't log it */
if (variables.option_bits & OPTION_GTID_BEGIN)
@@ -6368,7 +6701,6 @@ int THD::binlog_query(THD::enum_binlog_query_type qtype, char const *query_arg,
*/
DBUG_RETURN(0);
}
-
/*
If we are not in prelocked mode, mysql_unlock_tables() will be
called after this binlog_query(), so we have to flush the pending
diff --git a/sql/sql_class.h b/sql/sql_class.h
index 6d2c9ef89fd..beef22a8140 100644
--- a/sql/sql_class.h
+++ b/sql/sql_class.h
@@ -58,6 +58,20 @@ void set_thd_stage_info(void *thd,
#include "my_apc.h"
#include "rpl_gtid.h"
+#ifdef WITH_WSREP
+#include "wsrep_mysqld.h"
+struct wsrep_thd_shadow {
+ ulonglong options;
+ uint server_status;
+ enum wsrep_exec_mode wsrep_exec_mode;
+ Vio *vio;
+ ulong tx_isolation;
+ char *db;
+ size_t db_length;
+ my_hrtime_t user_time;
+ longlong row_count_func;
+};
+#endif
class Reprepare_observer;
class Relay_log_info;
struct rpl_group_info;
@@ -640,6 +654,14 @@ typedef struct system_variables
ulong wt_timeout_short, wt_deadlock_search_depth_short;
ulong wt_timeout_long, wt_deadlock_search_depth_long;
+#ifdef WITH_WSREP
+ my_bool wsrep_on;
+ my_bool wsrep_causal_reads;
+ my_bool wsrep_dirty_reads;
+ uint wsrep_sync_wait;
+ ulong wsrep_retry_autocommit;
+ ulong wsrep_OSU_method;
+#endif
double long_query_time_double;
my_bool pseudo_slave_mode;
@@ -2816,7 +2838,11 @@ public:
/* Debug Sync facility. See debug_sync.cc. */
struct st_debug_sync_control *debug_sync_control;
#endif /* defined(ENABLED_DEBUG_SYNC) */
+#ifdef WITH_WSREP
+ THD(bool is_applier = false);
+#else
THD();
+#endif
~THD();
void init(void);
@@ -3345,7 +3371,7 @@ public:
tests fail and so force them to propagate the
lex->binlog_row_based_if_mixed upwards to the caller.
*/
- if ((variables.binlog_format == BINLOG_FORMAT_MIXED) &&
+ if ((WSREP_FORMAT(variables.binlog_format) == BINLOG_FORMAT_MIXED) &&
(in_sub_stmt == 0))
set_current_stmt_binlog_format_row();
@@ -3397,7 +3423,7 @@ public:
show_system_thread(system_thread)));
if (in_sub_stmt == 0)
{
- if (variables.binlog_format == BINLOG_FORMAT_ROW)
+ if (WSREP_FORMAT(variables.binlog_format) == BINLOG_FORMAT_ROW)
set_current_stmt_binlog_format_row();
else if (temporary_tables == NULL)
set_current_stmt_binlog_format_stmt();
@@ -3798,6 +3824,50 @@ public:
return (temporary_tables ||
(rgi_slave && unlikely(rgi_have_temporary_tables())));
}
+
+#ifdef WITH_WSREP
+ const bool wsrep_applier; /* dedicated slave applier thread */
+ bool wsrep_applier_closing; /* applier marked to close */
+ bool wsrep_client_thread; /* to identify client threads*/
+ enum wsrep_exec_mode wsrep_exec_mode;
+ query_id_t wsrep_last_query_id;
+ enum wsrep_query_state wsrep_query_state;
+ enum wsrep_conflict_state wsrep_conflict_state;
+ mysql_mutex_t LOCK_wsrep_thd;
+ // changed from wsrep_seqno_t to wsrep_trx_meta_t in wsrep API rev 75
+ // wsrep_seqno_t wsrep_trx_seqno;
+ wsrep_trx_meta_t wsrep_trx_meta;
+ uint32 wsrep_rand;
+ rpl_group_info* wsrep_rgi;
+ bool wsrep_converted_lock_session;
+ wsrep_ws_handle_t wsrep_ws_handle;
+#ifdef WSREP_PROC_INFO
+ char wsrep_info[128]; /* string for dynamic proc info */
+#endif /* WSREP_PROC_INFO */
+ ulong wsrep_retry_counter; // of autocommit
+ bool wsrep_PA_safe;
+ char* wsrep_retry_query;
+ size_t wsrep_retry_query_len;
+ enum enum_server_command wsrep_retry_command;
+ enum wsrep_consistency_check_mode
+ wsrep_consistency_check;
+ wsrep_stats_var* wsrep_status_vars;
+ int wsrep_mysql_replicated;
+ const char* wsrep_TOI_pre_query; /* a query to apply before
+ the actual TOI query */
+ size_t wsrep_TOI_pre_query_len;
+ wsrep_po_handle_t wsrep_po_handle;
+ size_t wsrep_po_cnt;
+ my_bool wsrep_po_in_trans;
+#ifdef GTID_SUPPORT
+ rpl_sid wsrep_po_sid;
+#endif /* GTID_SUPPORT */
+ void* wsrep_apply_format;
+ bool wsrep_apply_toi; /* applier processing in TOI */
+ bool wsrep_skip_append_keys;
+ wsrep_gtid_t wsrep_sync_wait_gtid;
+ ulong wsrep_affected_rows;
+#endif /* WITH_WSREP */
};
@@ -4113,6 +4183,8 @@ class select_insert :public select_result_interceptor {
virtual int send_data(List<Item> &items);
virtual void store_values(List<Item> &values);
virtual bool can_rollback_data() { return 0; }
+ bool prepare_eof();
+ bool send_ok_packet();
bool send_eof();
virtual void abort_result_set();
/* not implemented: select_insert is never re-used in prepared statements */
diff --git a/sql/sql_connect.cc b/sql/sql_connect.cc
index 63f9bfae47a..695d1095125 100644
--- a/sql/sql_connect.cc
+++ b/sql/sql_connect.cc
@@ -43,6 +43,9 @@ HASH global_index_stats;
extern mysql_mutex_t LOCK_global_user_client_stats;
extern mysql_mutex_t LOCK_global_table_stats;
extern mysql_mutex_t LOCK_global_index_stats;
+#ifdef WITH_WSREP
+#include "wsrep_mysqld.h"
+#endif
/*
Get structure for logging connection data for the current user
@@ -1170,6 +1173,17 @@ exit:
void end_connection(THD *thd)
{
NET *net= &thd->net;
+#ifdef WITH_WSREP
+ if (WSREP(thd))
+ {
+ wsrep_status_t rcode= wsrep->free_connection(wsrep, thd->thread_id);
+ if (rcode) {
+ WSREP_WARN("wsrep failed to free connection context: %lu, code: %d",
+ thd->thread_id, rcode);
+ }
+ }
+ thd->wsrep_client_thread= 0;
+#endif
plugin_thdvar_cleanup(thd);
if (thd->user_connect)
@@ -1305,6 +1319,9 @@ bool thd_prepare_connection(THD *thd)
(char *) thd->security_ctx->host_or_ip);
prepare_new_connection_state(thd);
+#ifdef WITH_WSREP
+ thd->wsrep_client_thread= 1;
+#endif /* WITH_WSREP */
return FALSE;
}
@@ -1378,7 +1395,15 @@ void do_handle_one_connection(THD *thd_arg)
break;
}
end_connection(thd);
-
+
+#ifdef WITH_WSREP
+ if (WSREP(thd))
+ {
+ mysql_mutex_lock(&thd->LOCK_wsrep_thd);
+ thd->wsrep_query_state= QUERY_EXITING;
+ mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
+ }
+#endif
end_thread:
close_connection(thd);
diff --git a/sql/sql_delete.cc b/sql/sql_delete.cc
index 6b0135d92d9..ad5bc23a31b 100644
--- a/sql/sql_delete.cc
+++ b/sql/sql_delete.cc
@@ -643,7 +643,11 @@ cleanup:
/* See similar binlogging code in sql_update.cc, for comments */
if ((error < 0) || thd->transaction.stmt.modified_non_trans_table)
{
+#ifdef WITH_WSREP
+ if ((WSREP_EMULATE_BINLOG(thd) || mysql_bin_log.is_open()))
+#else
if (mysql_bin_log.is_open())
+#endif
{
int errcode= 0;
if (error < 0)
@@ -1094,7 +1098,11 @@ void multi_delete::abort_result_set()
/*
there is only side effects; to binlog with the error
*/
+#ifdef WITH_WSREP
+ if (WSREP_EMULATE_BINLOG(thd) || mysql_bin_log.is_open())
+#else
if (mysql_bin_log.is_open())
+#endif
{
int errcode= query_error_code(thd, thd->killed == NOT_KILLED);
/* possible error of writing binary log is ignored deliberately */
@@ -1270,7 +1278,11 @@ bool multi_delete::send_eof()
}
if ((local_error == 0) || thd->transaction.stmt.modified_non_trans_table)
{
+#ifdef WITH_WSREP
+ if (WSREP_EMULATE_BINLOG(thd) || mysql_bin_log.is_open())
+#else
if (mysql_bin_log.is_open())
+#endif
{
int errcode= 0;
if (local_error == 0)
diff --git a/sql/sql_insert.cc b/sql/sql_insert.cc
index ef4b4703455..2e69dc89800 100644
--- a/sql/sql_insert.cc
+++ b/sql/sql_insert.cc
@@ -1018,7 +1018,11 @@ bool mysql_insert(THD *thd,TABLE_LIST *table_list,
thd->transaction.stmt.modified_non_trans_table ||
was_insert_delayed)
{
+#ifdef WITH_WSREP
+ if (WSREP_EMULATE_BINLOG(thd) || mysql_bin_log.is_open())
+#else
if (mysql_bin_log.is_open())
+#endif
{
int errcode= 0;
if (error <= 0)
@@ -3245,6 +3249,12 @@ bool Delayed_insert::handle_inserts(void)
mysql_cond_broadcast(&cond_client); // If waiting clients
}
}
+#ifdef WITH_WSREP
+ if (WSREP((&thd)))
+ thd_proc_info(&thd, "insert done");
+ else
+#endif /* WITH_WSREP */
+ thd_proc_info(&thd, 0);
mysql_mutex_unlock(&mutex);
/*
@@ -3687,19 +3697,26 @@ void select_insert::store_values(List<Item> &values)
TRG_EVENT_INSERT);
}
-bool select_insert::send_eof()
+bool select_insert::prepare_eof()
{
int error;
bool const trans_table= table->file->has_transactions();
- ulonglong id, row_count;
bool changed;
killed_state killed_status= thd->killed;
- DBUG_ENTER("select_insert::send_eof");
+
+ DBUG_ENTER("select_insert::prepare_eof");
DBUG_PRINT("enter", ("trans_table=%d, table_type='%s'",
trans_table, table->file->table_type()));
+#ifdef WITH_WSREP
+ error= (thd->wsrep_conflict_state == MUST_ABORT ||
+ thd->wsrep_conflict_state == CERT_FAILURE) ? -1 :
+ (thd->locked_tables_mode <= LTM_LOCK_TABLES ?
+ table->file->ha_end_bulk_insert() : 0);
+#else
error= (thd->locked_tables_mode <= LTM_LOCK_TABLES ?
table->file->ha_end_bulk_insert() : 0);
+#endif /* WITH_WSREP */
if (!error && thd->is_error())
error= thd->get_stmt_da()->sql_errno();
@@ -3727,7 +3744,11 @@ bool select_insert::send_eof()
events are in the transaction cache and will be written when
ha_autocommit_or_rollback() is issued below.
*/
+#ifdef WITH_WSREP
+ if ((WSREP_EMULATE_BINLOG(thd) || mysql_bin_log.is_open()) &&
+#else
if (mysql_bin_log.is_open() &&
+#endif
(!error || thd->transaction.stmt.modified_non_trans_table))
{
int errcode= 0;
@@ -3740,7 +3761,7 @@ bool select_insert::send_eof()
trans_table, FALSE, FALSE, errcode))
{
table->file->ha_release_auto_increment();
- DBUG_RETURN(1);
+ DBUG_RETURN(true);
}
}
table->file->ha_release_auto_increment();
@@ -3748,27 +3769,49 @@ bool select_insert::send_eof()
if (error)
{
table->file->print_error(error,MYF(0));
- DBUG_RETURN(1);
+ DBUG_RETURN(true);
}
- char buff[160];
+
+ DBUG_RETURN(false);
+}
+
+bool select_insert::send_ok_packet() {
+ char message[160]; /* status message */
+ ulong row_count; /* rows affected */
+ ulong id; /* last insert-id */
+
+ DBUG_ENTER("select_insert::send_ok_packet");
+
if (info.ignore)
- sprintf(buff, ER(ER_INSERT_INFO), (ulong) info.records,
- (ulong) (info.records - info.copied),
- (long) thd->get_stmt_da()->current_statement_warn_count());
+ my_snprintf(message, sizeof(message), ER(ER_INSERT_INFO),
+ (ulong) info.records, (ulong) (info.records - info.copied),
+ (long) thd->get_stmt_da()->current_statement_warn_count());
else
- sprintf(buff, ER(ER_INSERT_INFO), (ulong) info.records,
- (ulong) (info.deleted+info.updated),
- (long) thd->get_stmt_da()->current_statement_warn_count());
+ my_snprintf(message, sizeof(message), ER(ER_INSERT_INFO),
+ (ulong) info.records, (ulong) (info.deleted + info.updated),
+ (long) thd->get_stmt_da()->current_statement_warn_count());
+
row_count= info.copied + info.deleted +
- ((thd->client_capabilities & CLIENT_FOUND_ROWS) ?
- info.touched : info.updated);
+ ((thd->client_capabilities & CLIENT_FOUND_ROWS) ?
+ info.touched : info.updated);
+
id= (thd->first_successful_insert_id_in_cur_stmt > 0) ?
thd->first_successful_insert_id_in_cur_stmt :
(thd->arg_of_last_insert_id_function ?
thd->first_successful_insert_id_in_prev_stmt :
(info.copied ? autoinc_value_of_last_inserted_row : 0));
- ::my_ok(thd, row_count, id, buff);
- DBUG_RETURN(0);
+
+ ::my_ok(thd, row_count, id, message);
+
+ DBUG_RETURN(false);
+}
+
+bool select_insert::send_eof()
+{
+ bool res;
+ DBUG_ENTER("select_insert::send_eof");
+ res= (prepare_eof() || send_ok_packet());
+ DBUG_RETURN(res);
}
void select_insert::abort_result_set() {
@@ -3812,7 +3855,11 @@ void select_insert::abort_result_set() {
if (!can_rollback_data())
thd->transaction.all.modified_non_trans_table= TRUE;
+#ifdef WITH_WSREP
+ if (WSREP_EMULATE_BINLOG(thd) || mysql_bin_log.is_open())
+#else
if (mysql_bin_log.is_open())
+#endif
{
int errcode= query_error_code(thd, thd->killed == NOT_KILLED);
/* error of writing binary log is ignored */
@@ -4242,7 +4289,11 @@ select_create::binlog_show_create_table(TABLE **tables, uint count)
WITH_DB_NAME);
DBUG_ASSERT(result == 0); /* show_create_table() always return 0 */
+#ifdef WITH_WSREP
+ if (WSREP_EMULATE_BINLOG(thd) || mysql_bin_log.is_open())
+#else
if (mysql_bin_log.is_open())
+#endif /* WITH_WSREP */
{
int errcode= query_error_code(thd, thd->killed == NOT_KILLED);
result= thd->binlog_query(THD::STMT_QUERY_TYPE,
@@ -4252,6 +4303,9 @@ select_create::binlog_show_create_table(TABLE **tables, uint count)
/* suppress_use */ FALSE,
errcode);
}
+#ifdef WITH_WSREP
+ ha_wsrep_fake_trx_id(thd);
+#endif
return result;
}
@@ -4264,13 +4318,13 @@ void select_create::store_values(List<Item> &values)
bool select_create::send_eof()
{
- if (select_insert::send_eof())
+ DBUG_ENTER("select_create::send_eof");
+ if (prepare_eof())
{
abort_result_set();
- return 1;
+ DBUG_RETURN(true);
}
- exit_done= 1; // Avoid double calls
/*
Do an implicit commit at end of statement for non-temporary
tables. This can fail, but we should unlock the table
@@ -4281,13 +4335,33 @@ bool select_create::send_eof()
trans_commit_stmt(thd);
if (!(thd->variables.option_bits & OPTION_GTID_BEGIN))
trans_commit_implicit(thd);
+#ifdef WITH_WSREP
+ mysql_mutex_lock(&thd->LOCK_wsrep_thd);
+ if (thd->wsrep_conflict_state != NO_CONFLICT)
+ {
+ WSREP_DEBUG("select_create commit failed, thd: %lu err: %d %s",
+ thd->thread_id, thd->wsrep_conflict_state, thd->query());
+ mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
+ abort_result_set();
+ DBUG_RETURN(true);
+ }
+ mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
+#endif /* WITH_WSREP */
}
else if (!thd->is_current_stmt_binlog_format_row())
table->s->table_creation_was_logged= 1;
+ /*
+ exit_done must only be set after last potential call to
+ abort_result_set().
+ */
+ exit_done= 1; // Avoid double calls
+
table->file->extra(HA_EXTRA_NO_IGNORE_DUP_KEY);
table->file->extra(HA_EXTRA_WRITE_CANNOT_REPLACE);
+ send_ok_packet();
+
if (m_plock)
{
MYSQL_LOCK *lock= *m_plock;
@@ -4308,12 +4382,12 @@ bool select_create::send_eof()
create_info->
pos_in_locked_tables,
table, lock))
- return 0; // ok
+ DBUG_RETURN(false); // ok
/* Fail. Continue without locking the table */
}
mysql_unlock_tables(thd, lock);
}
- return 0;
+ DBUG_RETURN(false);
}
diff --git a/sql/sql_lex.cc b/sql/sql_lex.cc
index 19568bdd42c..5f22a657dfe 100644
--- a/sql/sql_lex.cc
+++ b/sql/sql_lex.cc
@@ -1622,6 +1622,17 @@ static int lex_one_token(YYSTYPE *yylval, THD *thd)
}
else
{
+#ifdef WITH_WSREP
+ if (version == 99997 && thd->wsrep_exec_mode == LOCAL_STATE)
+ {
+ WSREP_DEBUG("consistency check: %s", thd->query());
+ thd->wsrep_consistency_check= CONSISTENCY_CHECK_DECLARED;
+ lip->yySkipn(5);
+ lip->set_echo(TRUE);
+ state=MY_LEX_START;
+ break; /* Do not treat contents as a comment. */
+ }
+#endif /* WITH_WSREP */
/*
Patch and skip the conditional comment to avoid it
being propagated infinitely (eg. to a slave).
diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc
index efcb218eb4c..9f0b6e25a5c 100644
--- a/sql/sql_parse.cc
+++ b/sql/sql_parse.cc
@@ -50,7 +50,7 @@
#include "sql_connect.h" // decrease_user_connections,
// check_mqh,
// reset_mqh
-#include "sql_rename.h" // mysql_rename_table
+#include "sql_rename.h" // mysql_rename_tables
#include "sql_tablespace.h" // mysql_alter_tablespace
#include "hostname.h" // hostname_cache_refresh
#include "sql_acl.h" // *_ACL, check_grant, is_acl_user,
@@ -104,6 +104,13 @@
#include "../storage/maria/ha_maria.h"
#endif
+#ifdef WITH_WSREP
+#include "wsrep_mysqld.h"
+#include "wsrep_thd.h"
+#include "wsrep_binlog.h"
+static void wsrep_mysql_parse(THD *thd, char *rawbuf, uint length,
+ Parser_state *parser_state);
+#endif /* WITH_WSREP */
/**
@defgroup Runtime_Environment Runtime Environment
@{
@@ -124,7 +131,7 @@ static void sql_kill(THD *thd, longlong id, killed_state state, killed_type type
static void sql_kill_user(THD *thd, LEX_USER *user, killed_state state);
static bool lock_tables_precheck(THD *thd, TABLE_LIST *tables);
static bool execute_show_status(THD *, TABLE_LIST *);
-static bool execute_rename_table(THD *, TABLE_LIST *, TABLE_LIST *);
+static bool check_rename_table(THD *, TABLE_LIST *, TABLE_LIST *);
const char *any_db="*any*"; // Special symbol for check_access
@@ -879,11 +886,26 @@ bool do_command(THD *thd)
{
bool return_value;
char *packet= 0;
+#ifdef WITH_WSREP
+ ulong packet_length= 0; // just to avoid (false positive) compiler warning
+#else
ulong packet_length;
+#endif /* WITH_WSREP */
NET *net= &thd->net;
enum enum_server_command command;
DBUG_ENTER("do_command");
-
+#ifdef WITH_WSREP
+ if (WSREP(thd))
+ {
+ mysql_mutex_lock(&thd->LOCK_wsrep_thd);
+ thd->wsrep_query_state= QUERY_IDLE;
+ if (thd->wsrep_conflict_state==MUST_ABORT)
+ {
+ wsrep_client_rollback(thd);
+ }
+ mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
+ }
+#endif /* WITH_WSREP */
/*
indicator of uninitialized lex => normal flow of errors handling
(see my_message_sql)
@@ -930,12 +952,46 @@ bool do_command(THD *thd)
packet_length= my_net_read_packet(net, 1);
+#ifdef WITH_WSREP
+ if (WSREP(thd)) {
+ mysql_mutex_lock(&thd->LOCK_wsrep_thd);
+
+ /* these THD's are aborted or are aborting during being idle */
+ if (thd->wsrep_conflict_state == ABORTING)
+ {
+ while (thd->wsrep_conflict_state == ABORTING) {
+ mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
+ my_sleep(1000);
+ mysql_mutex_lock(&thd->LOCK_wsrep_thd);
+ }
+ thd->store_globals();
+ }
+ else if (thd->wsrep_conflict_state == ABORTED)
+ {
+ thd->store_globals();
+ }
+
+ thd->wsrep_query_state= QUERY_EXEC;
+ mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
+ }
+#endif /* WITH_WSREP */
if (packet_length == packet_error)
{
DBUG_PRINT("info",("Got error %d reading command from socket %s",
net->error,
vio_description(net->vio)));
+#ifdef WITH_WSREP
+ if (WSREP(thd)) {
+ mysql_mutex_lock(&thd->LOCK_wsrep_thd);
+ if (thd->wsrep_conflict_state == MUST_ABORT)
+ {
+ DBUG_PRINT("wsrep",("aborted for wsrep rollback: %lu", thd->real_id));
+ wsrep_client_rollback(thd);
+ }
+ mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
+ }
+#endif /* WITH_WSREP */
/* Instrument this broken statement as "statement/com/error" */
thd->m_statement_psi= MYSQL_REFINE_STATEMENT(thd->m_statement_psi,
com_statement_info[COM_END].
@@ -991,12 +1047,79 @@ bool do_command(THD *thd)
vio_description(net->vio), command,
command_name[command].str));
+#ifdef WITH_WSREP
+ if (WSREP(thd)) {
+ /*
+ * bail out if DB snapshot has not been installed. We however,
+ * allow queries "SET" and "SHOW", they are trapped later in execute_command
+ */
+ if (thd->variables.wsrep_on && !thd->wsrep_applier && !wsrep_ready &&
+ command != COM_QUERY &&
+ command != COM_PING &&
+ command != COM_QUIT &&
+ command != COM_PROCESS_INFO &&
+ command != COM_PROCESS_KILL &&
+ command != COM_SET_OPTION &&
+ command != COM_SHUTDOWN &&
+ command != COM_SLEEP &&
+ command != COM_STATISTICS &&
+ command != COM_TIME &&
+ command != COM_STMT_PREPARE &&
+ command != COM_STMT_SEND_LONG_DATA &&
+ command != COM_STMT_EXECUTE &&
+ command != COM_STMT_RESET &&
+ command != COM_STMT_CLOSE &&
+ command != COM_END
+ ) {
+ my_message(ER_UNKNOWN_COM_ERROR,
+ "WSREP has not yet prepared node for application use",
+ MYF(0));
+ thd->protocol->end_statement();
+
+ /* Performance Schema Interface instrumentation end */
+ MYSQL_END_STATEMENT(thd->m_statement_psi, thd->get_stmt_da());
+ thd->m_statement_psi= NULL;
+ thd->m_digest= NULL;
+
+ return_value= FALSE;
+ goto out;
+ }
+ }
+#endif /* WITH_WSREP */
/* Restore read timeout value */
my_net_set_read_timeout(net, thd->variables.net_read_timeout);
DBUG_ASSERT(packet_length);
DBUG_ASSERT(!thd->apc_target.is_enabled());
return_value= dispatch_command(command, thd, packet+1, (uint) (packet_length-1));
+#ifdef WITH_WSREP
+ if (WSREP(thd)) {
+ while (thd->wsrep_conflict_state== RETRY_AUTOCOMMIT)
+ {
+ WSREP_DEBUG("Retry autocommit for: %s\n", thd->wsrep_retry_query);
+ CHARSET_INFO *current_charset = thd->variables.character_set_client;
+ if (!is_supported_parser_charset(current_charset))
+ {
+ /* Do not use non-supported parser character sets */
+ WSREP_WARN("Current client character set is non-supported parser "
+ "character set: %s", current_charset->csname);
+ thd->variables.character_set_client = &my_charset_latin1;
+ WSREP_WARN("For retry temporally setting character set to : %s",
+ my_charset_latin1.csname);
+ }
+ return_value= dispatch_command(command, thd, thd->wsrep_retry_query,
+ thd->wsrep_retry_query_len);
+ thd->variables.character_set_client = current_charset;
+ }
+ }
+ if (thd->wsrep_retry_query && thd->wsrep_conflict_state != REPLAYING)
+ {
+ my_free(thd->wsrep_retry_query);
+ thd->wsrep_retry_query = NULL;
+ thd->wsrep_retry_query_len = 0;
+ thd->wsrep_retry_command = COM_CONNECT;
+ }
+#endif /* WITH_WSREP */
DBUG_ASSERT(!thd->apc_target.is_enabled());
out:
@@ -1071,6 +1194,21 @@ static my_bool deny_updates_if_read_only_option(THD *thd,
DBUG_RETURN(FALSE);
}
+#ifdef WITH_WSREP
+static void wsrep_copy_query(THD *thd)
+{
+ thd->wsrep_retry_command = thd->get_command();
+ thd->wsrep_retry_query_len = thd->query_length();
+ if (thd->wsrep_retry_query) {
+ my_free(thd->wsrep_retry_query);
+ }
+ thd->wsrep_retry_query = (char *)my_malloc(
+ thd->wsrep_retry_query_len + 1, MYF(0));
+ strncpy(thd->wsrep_retry_query, thd->query(), thd->wsrep_retry_query_len);
+ thd->wsrep_retry_query[thd->wsrep_retry_query_len] = '\0';
+}
+#endif /* WITH_WSREP */
+
/**
Perform one connection-level (COM_XXXX) command.
@@ -1100,6 +1238,42 @@ bool dispatch_command(enum enum_server_command command, THD *thd,
DBUG_ENTER("dispatch_command");
DBUG_PRINT("info", ("command: %d", command));
+#ifdef WITH_WSREP
+ if (WSREP(thd)) {
+ if (!thd->in_multi_stmt_transaction_mode())
+ {
+ thd->wsrep_PA_safe= true;
+ }
+
+ mysql_mutex_lock(&thd->LOCK_wsrep_thd);
+ thd->wsrep_query_state= QUERY_EXEC;
+ if (thd->wsrep_conflict_state== RETRY_AUTOCOMMIT)
+ {
+ thd->wsrep_conflict_state= NO_CONFLICT;
+ }
+ if (thd->wsrep_conflict_state== MUST_ABORT)
+ {
+ wsrep_client_rollback(thd);
+ }
+ if (thd->wsrep_conflict_state== ABORTED)
+ {
+ my_error(ER_LOCK_DEADLOCK, MYF(0), "wsrep aborted transaction");
+ WSREP_DEBUG("Deadlock error for: %s", thd->query());
+ mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
+ thd->killed = NOT_KILLED;
+ thd->mysys_var->abort = 0;
+ thd->wsrep_conflict_state = NO_CONFLICT;
+ thd->wsrep_retry_counter = 0;
+ /*
+ Increment threads running to compensate dec_thread_running() called
+ after dispatch_end label.
+ */
+ inc_thread_running();
+ goto dispatch_end;
+ }
+ mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
+ }
+#endif /* WITH_WSREP */
#if defined(ENABLED_PROFILING)
thd->profiling.start_new_query();
#endif
@@ -1302,7 +1476,11 @@ bool dispatch_command(enum enum_server_command command, THD *thd,
if (parser_state.init(thd, thd->query(), thd->query_length()))
break;
+#ifdef WITH_WSREP
+ wsrep_mysql_parse(thd, thd->query(), thd->query_length(), &parser_state);
+#else
mysql_parse(thd, thd->query(), thd->query_length(), &parser_state);
+#endif /* WITH_WSREP */
while (!thd->killed && (parser_state.m_lip.found_semicolon != NULL) &&
! thd->is_error())
@@ -1379,10 +1557,19 @@ bool dispatch_command(enum enum_server_command command, THD *thd,
Count each statement from the client.
*/
statistic_increment(thd->status_var.questions, &LOCK_status);
+#ifdef WITH_WSREP
+ if (!WSREP(thd))
+ thd->set_time(); /* Reset the query start time. */
+#else
thd->set_time(); /* Reset the query start time. */
+#endif /* WITH_WSREP */
parser_state.reset(beginning_of_next_stmt, length);
/* TODO: set thd->lex->sql_command to SQLCOM_END here */
+#ifdef WITH_WSREP
+ wsrep_mysql_parse(thd, beginning_of_next_stmt, length, &parser_state);
+#else
mysql_parse(thd, beginning_of_next_stmt, length, &parser_state);
+#endif /* WITH_WSREP */
}
DBUG_PRINT("info",("query ready"));
@@ -1722,6 +1909,26 @@ bool dispatch_command(enum enum_server_command command, THD *thd,
my_message(ER_UNKNOWN_COM_ERROR, ER(ER_UNKNOWN_COM_ERROR), MYF(0));
break;
}
+#ifdef WITH_WSREP
+ dispatch_end:
+
+ if (WSREP(thd)) {
+ /* wsrep BF abort in query exec phase */
+ mysql_mutex_lock(&thd->LOCK_wsrep_thd);
+ if ((thd->wsrep_conflict_state != REPLAYING) &&
+ (thd->wsrep_conflict_state != RETRY_AUTOCOMMIT))
+ {
+ mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
+ thd->update_server_status();
+ thd->protocol->end_statement();
+ query_cache_end_of_result(thd);
+ }
+ else
+ {
+ mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
+ }
+ } else { /* if (WSREP(thd))... */
+#endif /* WITH_WSREP */
DBUG_ASSERT(thd->derived_tables == NULL &&
(thd->open_tables == NULL ||
(thd->locked_tables_mode == LTM_LOCK_TABLES)));
@@ -1731,6 +1938,9 @@ bool dispatch_command(enum enum_server_command command, THD *thd,
thd->update_server_status();
thd->protocol->end_statement();
query_cache_end_of_result(thd);
+#ifdef WITH_WSREP
+ }
+#endif /* WITH_WSREP */
if (!thd->is_error() && !thd->killed_errno())
mysql_audit_general(thd, MYSQL_AUDIT_GENERAL_RESULT, 0, 0);
@@ -2184,6 +2394,13 @@ err:
return TRUE;
}
+#ifdef WITH_WSREP
+static bool wsrep_is_show_query(enum enum_sql_command command)
+{
+ DBUG_ASSERT(command >= 0 && command <= SQLCOM_END);
+ return (sql_command_flags[command] & CF_STATUS_COMMAND) != 0;
+}
+#endif /* WITH_WSREP */
/**
Execute command saved in thd and lex->sql_command.
@@ -2389,7 +2606,51 @@ mysql_execute_command(THD *thd)
#ifdef HAVE_REPLICATION
} /* endif unlikely slave */
#endif
+#ifdef WITH_WSREP
+ if (WSREP(thd)) {
+ /*
+ change LOCK TABLE WRITE to transaction
+ */
+ if (lex->sql_command== SQLCOM_LOCK_TABLES && wsrep_convert_LOCK_to_trx)
+ {
+ for (TABLE_LIST *table= all_tables; table; table= table->next_global)
+ {
+ if (table->lock_type >= TL_WRITE_ALLOW_WRITE)
+ {
+ lex->sql_command= SQLCOM_BEGIN;
+ thd->wsrep_converted_lock_session= true;
+ break;
+ }
+ }
+ }
+ if (lex->sql_command== SQLCOM_UNLOCK_TABLES &&
+ thd->wsrep_converted_lock_session)
+ {
+ thd->wsrep_converted_lock_session= false;
+ lex->sql_command= SQLCOM_COMMIT;
+ lex->tx_release= TVL_NO;
+ }
+
+ /*
+ Bail out if DB snapshot has not been installed. SET and SHOW commands,
+ however, are always allowed.
+ We additionally allow all other commands that do not change data in
+ case wsrep_dirty_reads is enabled.
+ */
+ if (thd->variables.wsrep_on && !thd->wsrep_applier && !wsrep_ready &&
+ lex->sql_command != SQLCOM_SET_OPTION &&
+ !(thd->variables.wsrep_dirty_reads &&
+ !is_update_query(lex->sql_command)) &&
+ !wsrep_is_show_query(lex->sql_command))
+ {
+ my_message(ER_UNKNOWN_COM_ERROR,
+ "WSREP has not yet prepared node for application use",
+ MYF(0));
+ goto error;
+ }
+ }
+#endif /* WITH_WSREP */
status_var_increment(thd->status_var.com_stat[lex->sql_command]);
thd->progress.report_to_client= MY_TEST(sql_command_flags[lex->sql_command] &
CF_REPORT_PROGRESS);
@@ -2431,7 +2692,13 @@ mysql_execute_command(THD *thd)
{
/* Commit the normal transaction if one is active. */
if (trans_commit_implicit(thd))
+ {
+ thd->mdl_context.release_transactional_locks();
+#ifdef WITH_WSREP
+ WSREP_DEBUG("implicit commit failed, MDL released: %lu", thd->thread_id);
+#endif /* WITH_WSREP */
goto error;
+ }
/* Release metadata locks acquired in this transaction. */
thd->mdl_context.release_transactional_locks();
}
@@ -2521,6 +2788,10 @@ mysql_execute_command(THD *thd)
/* fall through */
case SQLCOM_SHOW_STATUS_PROC:
case SQLCOM_SHOW_STATUS_FUNC:
+#ifdef WITH_WSREP
+ if (WSREP_CLIENT(thd) && wsrep_sync_wait(thd)) goto error;
+#endif /* WITH_WSREP */
+
case SQLCOM_SHOW_DATABASES:
case SQLCOM_SHOW_TABLES:
case SQLCOM_SHOW_TRIGGERS:
@@ -2529,17 +2800,27 @@ mysql_execute_command(THD *thd)
case SQLCOM_SHOW_PLUGINS:
case SQLCOM_SHOW_FIELDS:
case SQLCOM_SHOW_KEYS:
+#ifndef WITH_WSREP
case SQLCOM_SHOW_VARIABLES:
case SQLCOM_SHOW_CHARSETS:
case SQLCOM_SHOW_COLLATIONS:
case SQLCOM_SHOW_STORAGE_ENGINES:
case SQLCOM_SHOW_PROFILE:
+#endif /* WITH_WSREP */
case SQLCOM_SHOW_CLIENT_STATS:
case SQLCOM_SHOW_USER_STATS:
case SQLCOM_SHOW_TABLE_STATS:
case SQLCOM_SHOW_INDEX_STATS:
case SQLCOM_SELECT:
- {
+#ifdef WITH_WSREP
+ if (WSREP_CLIENT(thd) && wsrep_sync_wait(thd)) goto error;
+ case SQLCOM_SHOW_VARIABLES:
+ case SQLCOM_SHOW_CHARSETS:
+ case SQLCOM_SHOW_COLLATIONS:
+ case SQLCOM_SHOW_STORAGE_ENGINES:
+ case SQLCOM_SHOW_PROFILE:
+#endif /* WITH_WSREP */
+ {
thd->status_var.last_query_cost= 0.0;
/*
@@ -2916,7 +3197,7 @@ case SQLCOM_PREPARE:
*/
if (thd->query_name_consts &&
mysql_bin_log.is_open() &&
- thd->variables.binlog_format == BINLOG_FORMAT_STMT &&
+ WSREP_FORMAT(thd->variables.binlog_format) == BINLOG_FORMAT_STMT &&
!mysql_bin_log.is_query_in_union(thd, thd->query_id))
{
List_iterator_fast<Item> it(select_lex->item_list);
@@ -3001,6 +3282,7 @@ case SQLCOM_PREPARE:
/*
select_create is currently not re-execution friendly and
needs to be created for every execution of a PS/SP.
+ Note: In wsrep-patch, CTAS is handled like a regular transaction.
*/
if ((result= new select_create(create_table,
&create_info,
@@ -3035,6 +3317,15 @@ case SQLCOM_PREPARE:
}
else
{
+#ifdef WITH_WSREP
+ /* in STATEMENT format, we probably have to replicate also temporary
+ tables, like mysql replication does
+ */
+ if (!thd->is_current_stmt_binlog_format_row() ||
+ !(create_info.options & HA_LEX_CREATE_TMP_TABLE))
+ WSREP_TO_ISOLATION_BEGIN(create_table->db, create_table->table_name,
+ NULL)
+#endif /* WITH_WSREP */
/* Regular CREATE TABLE */
res= mysql_create_table(thd, create_table,
&create_info, &alter_info);
@@ -3072,6 +3363,7 @@ end_with_restore_list:
DBUG_ASSERT(first_table == all_tables && first_table != 0);
if (check_one_table_access(thd, INDEX_ACL, all_tables))
goto error; /* purecov: inspected */
+ WSREP_TO_ISOLATION_BEGIN(first_table->db, first_table->table_name, NULL)
/*
Currently CREATE INDEX or DROP INDEX cause a full table rebuild
and thus classify as slow administrative statements just like
@@ -3194,7 +3486,12 @@ end_with_restore_list:
#endif /* HAVE_REPLICATION */
case SQLCOM_RENAME_TABLE:
{
- if (execute_rename_table(thd, first_table, all_tables))
+ if (check_rename_table(thd, first_table, all_tables))
+ goto error;
+
+ WSREP_TO_ISOLATION_BEGIN(0, 0, first_table)
+
+ if (mysql_rename_tables(thd, first_table, 0))
goto error;
break;
}
@@ -3221,6 +3518,10 @@ end_with_restore_list:
goto error;
#else
{
+#ifdef WITH_WSREP
+ if (WSREP_CLIENT(thd) && wsrep_sync_wait(thd)) goto error;
+#endif /* WITH_WSREP */
+
/*
Access check:
SHOW CREATE TABLE require any privileges on the table level (ie
@@ -3283,6 +3584,10 @@ end_with_restore_list:
case SQLCOM_CHECKSUM:
{
DBUG_ASSERT(first_table == all_tables && first_table != 0);
+#ifdef WITH_WSREP
+ if (WSREP_CLIENT(thd) && wsrep_sync_wait(thd)) goto error;
+#endif /* WITH_WSREP */
+
if (check_table_access(thd, SELECT_ACL, all_tables,
FALSE, UINT_MAX, FALSE))
goto error; /* purecov: inspected */
@@ -3291,6 +3596,10 @@ end_with_restore_list:
break;
}
case SQLCOM_UPDATE:
+#ifdef WITH_WSREP
+ if (WSREP_CLIENT(thd) &&
+ wsrep_sync_wait(thd, WSREP_SYNC_WAIT_BEFORE_UPDATE_DELETE)) goto error;
+#endif /* WITH_WSREP */
{
ha_rows found= 0, updated= 0;
DBUG_ASSERT(first_table == all_tables && first_table != 0);
@@ -3330,6 +3639,10 @@ end_with_restore_list:
/* if we switched from normal update, rights are checked */
if (up_result != 2)
{
+#ifdef WITH_WSREP
+ if (WSREP_CLIENT(thd) &&
+ wsrep_sync_wait(thd, WSREP_SYNC_WAIT_BEFORE_UPDATE_DELETE)) goto error;
+#endif /* WITH_WSREP */
if ((res= multi_update_precheck(thd, all_tables)))
break;
}
@@ -3399,6 +3712,10 @@ end_with_restore_list:
break;
}
case SQLCOM_REPLACE:
+#ifdef WITH_WSREP
+ if (WSREP_CLIENT(thd) &&
+ wsrep_sync_wait(thd, WSREP_SYNC_WAIT_BEFORE_INSERT_REPLACE)) goto error;
+#endif /* WITH_WSREP */
#ifndef DBUG_OFF
if (mysql_bin_log.is_open())
{
@@ -3435,6 +3752,10 @@ end_with_restore_list:
#endif
/* fall through */
case SQLCOM_INSERT:
+#ifdef WITH_WSREP
+ if (WSREP_CLIENT(thd) &&
+ wsrep_sync_wait(thd, WSREP_SYNC_WAIT_BEFORE_INSERT_REPLACE)) goto error;
+#endif /* WITH_WSREP */
{
DBUG_ASSERT(first_table == all_tables && first_table != 0);
@@ -3488,12 +3809,24 @@ end_with_restore_list:
}
case SQLCOM_REPLACE_SELECT:
case SQLCOM_INSERT_SELECT:
+#ifdef WITH_WSREP
+ if (WSREP_CLIENT(thd) &&
+ wsrep_sync_wait(thd, WSREP_SYNC_WAIT_BEFORE_INSERT_REPLACE)) goto error;
+#endif /* WITH_WSREP */
{
select_result *sel_result;
bool explain= MY_TEST(lex->describe);
DBUG_ASSERT(first_table == all_tables && first_table != 0);
if ((res= insert_precheck(thd, all_tables)))
break;
+#ifdef WITH_WSREP
+ if (thd->wsrep_consistency_check == CONSISTENCY_CHECK_DECLARED)
+ {
+ thd->wsrep_consistency_check = CONSISTENCY_CHECK_RUNNING;
+ WSREP_TO_ISOLATION_BEGIN(first_table->db, first_table->table_name, NULL);
+ }
+
+#endif
/*
INSERT...SELECT...ON DUPLICATE KEY UPDATE/REPLACE SELECT/
INSERT...IGNORE...SELECT can be unsafe, unless ORDER BY PRIMARY KEY
@@ -3594,6 +3927,10 @@ end_with_restore_list:
break;
}
case SQLCOM_DELETE:
+#ifdef WITH_WSREP
+ if (WSREP_CLIENT(thd) &&
+ wsrep_sync_wait(thd, WSREP_SYNC_WAIT_BEFORE_UPDATE_DELETE)) goto error;
+#endif /* WITH_WSREP */
{
select_result *sel_result=lex->result;
DBUG_ASSERT(first_table == all_tables && first_table != 0);
@@ -3614,6 +3951,10 @@ end_with_restore_list:
break;
}
case SQLCOM_DELETE_MULTI:
+#ifdef WITH_WSREP
+ if (WSREP_CLIENT(thd) &&
+ wsrep_sync_wait(thd, WSREP_SYNC_WAIT_BEFORE_UPDATE_DELETE)) goto error;
+#endif /* WITH_WSREP */
{
DBUG_ASSERT(first_table == all_tables && first_table != 0);
TABLE_LIST *aux_tables= thd->lex->auxiliary_table_list.first;
@@ -3689,6 +4030,18 @@ end_with_restore_list:
/* So that DROP TEMPORARY TABLE gets to binlog at commit/rollback */
thd->variables.option_bits|= OPTION_KEEP_LOG;
}
+#ifdef WITH_WSREP
+ for (TABLE_LIST *table= all_tables; table; table= table->next_global)
+ {
+ if (!lex->drop_temporary &&
+ (!thd->is_current_stmt_binlog_format_row() ||
+ !find_temporary_table(thd, table)))
+ {
+ WSREP_TO_ISOLATION_BEGIN(NULL, NULL, all_tables);
+ break;
+ }
+ }
+#endif /* WITH_WSREP */
/*
If we are a slave, we should add IF EXISTS if the query executed
on the master without an error. This will help a slave to
@@ -3742,7 +4095,6 @@ end_with_restore_list:
if (!mysql_change_db(thd, &db_str, FALSE))
my_ok(thd);
-
break;
}
@@ -3893,6 +4245,7 @@ end_with_restore_list:
#endif
if (check_access(thd, CREATE_ACL, lex->name.str, NULL, NULL, 1, 0))
break;
+ WSREP_TO_ISOLATION_BEGIN(lex->name.str, NULL, NULL)
res= mysql_create_db(thd, lex->name.str, &create_info, 0);
break;
}
@@ -3924,6 +4277,7 @@ end_with_restore_list:
#endif
if (check_access(thd, DROP_ACL, lex->name.str, NULL, NULL, 1, 0))
break;
+ WSREP_TO_ISOLATION_BEGIN(lex->name.str, NULL, NULL)
res= mysql_rm_db(thd, lex->name.str, lex->check_exists, 0);
break;
}
@@ -3955,6 +4309,7 @@ end_with_restore_list:
res= 1;
break;
}
+ WSREP_TO_ISOLATION_BEGIN(db->str, NULL, NULL)
res= mysql_upgrade_db(thd, db);
if (!res)
my_ok(thd);
@@ -3990,6 +4345,7 @@ end_with_restore_list:
#endif
if (check_access(thd, ALTER_ACL, db->str, NULL, NULL, 1, 0))
break;
+ WSREP_TO_ISOLATION_BEGIN(db->str, NULL, NULL)
res= mysql_alter_db(thd, db->str, &create_info);
break;
}
@@ -4003,6 +4359,11 @@ end_with_restore_list:
db_name.str= db_name_buff;
db_name.length= lex->name.length;
strmov(db_name.str, lex->name.str);
+
+#ifdef WITH_WSREP
+ if (WSREP_CLIENT(thd) && wsrep_sync_wait(thd)) goto error;
+#endif /* WITH_WSREP */
+
if (check_db_name(&db_name))
{
my_error(ER_WRONG_DB_NAME, MYF(0), db_name.str);
@@ -4028,6 +4389,7 @@ end_with_restore_list:
if (res)
break;
+ WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL)
switch (lex->sql_command) {
case SQLCOM_CREATE_EVENT:
{
@@ -4058,10 +4420,14 @@ end_with_restore_list:
/* lex->unit.cleanup() is called outside, no need to call it here */
break;
case SQLCOM_SHOW_CREATE_EVENT:
+#ifdef WITH_WSREP
+ if (WSREP_CLIENT(thd) && wsrep_sync_wait(thd)) goto error;
+#endif /* WITH_WSREP */
res= Events::show_create_event(thd, lex->spname->m_db,
lex->spname->m_name);
break;
case SQLCOM_DROP_EVENT:
+ WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL)
if (!(res= Events::drop_event(thd,
lex->spname->m_db, lex->spname->m_name,
lex->check_exists)))
@@ -4076,6 +4442,7 @@ end_with_restore_list:
if (check_access(thd, INSERT_ACL, "mysql", NULL, NULL, 1, 0))
break;
#ifdef HAVE_DLOPEN
+ WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL)
if (!(res = mysql_create_function(thd, &lex->udf)))
my_ok(thd);
#else
@@ -4091,6 +4458,7 @@ end_with_restore_list:
if (check_access(thd, INSERT_ACL, "mysql", NULL, NULL, 1, 1) &&
check_global_access(thd,CREATE_USER_ACL))
break;
+ WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL)
/* Conditionally writes to binlog */
if (!(res= mysql_create_user(thd, lex->users_list,
lex->sql_command == SQLCOM_CREATE_ROLE)))
@@ -4104,6 +4472,7 @@ end_with_restore_list:
check_global_access(thd,CREATE_USER_ACL))
break;
/* Conditionally writes to binlog */
+ WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL)
if (!(res= mysql_drop_user(thd, lex->users_list,
lex->sql_command == SQLCOM_DROP_ROLE)))
my_ok(thd);
@@ -4115,6 +4484,7 @@ end_with_restore_list:
check_global_access(thd,CREATE_USER_ACL))
break;
/* Conditionally writes to binlog */
+ WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL)
if (!(res= mysql_rename_user(thd, lex->users_list)))
my_ok(thd);
break;
@@ -4126,6 +4496,7 @@ end_with_restore_list:
break;
/* Conditionally writes to binlog */
+ WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL)
if (!(res = mysql_revoke_all(thd, lex->users_list)))
my_ok(thd);
break;
@@ -4208,6 +4579,7 @@ end_with_restore_list:
lex->type == TYPE_ENUM_PROCEDURE, 0))
goto error;
/* Conditionally writes to binlog */
+ WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL)
res= mysql_routine_grant(thd, all_tables,
lex->type == TYPE_ENUM_PROCEDURE,
lex->users_list, grants,
@@ -4221,6 +4593,7 @@ end_with_restore_list:
all_tables, FALSE, UINT_MAX, FALSE))
goto error;
/* Conditionally writes to binlog */
+ WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL)
res= mysql_table_grant(thd, all_tables, lex->users_list,
lex->columns, lex->grant,
lex->sql_command == SQLCOM_REVOKE);
@@ -4236,6 +4609,7 @@ end_with_restore_list:
}
else
{
+ WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL)
/* Conditionally writes to binlog */
res= mysql_grant(thd, select_lex->db, lex->users_list, lex->grant,
lex->sql_command == SQLCOM_REVOKE,
@@ -4261,6 +4635,7 @@ end_with_restore_list:
case SQLCOM_REVOKE_ROLE:
case SQLCOM_GRANT_ROLE:
{
+ WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL)
if (!(res= mysql_grant_role(thd, lex->users_list,
lex->sql_command != SQLCOM_GRANT_ROLE)))
my_ok(thd);
@@ -4294,6 +4669,38 @@ end_with_restore_list:
break;
}
+#ifdef WITH_WSREP
+ if (lex->type & (
+ REFRESH_GRANT |
+ REFRESH_HOSTS |
+#ifdef HAVE_OPENSSL
+ REFRESH_DES_KEY_FILE |
+#endif
+ /*
+ Write all flush log statements except
+ FLUSH LOGS
+ FLUSH BINARY LOGS
+ Check reload_acl_and_cache for why.
+ */
+ REFRESH_RELAY_LOG |
+ REFRESH_SLOW_LOG |
+ REFRESH_GENERAL_LOG |
+ REFRESH_ENGINE_LOG |
+ REFRESH_ERROR_LOG |
+#ifdef HAVE_QUERY_CACHE
+ REFRESH_QUERY_CACHE_FREE |
+#endif /* HAVE_QUERY_CACHE */
+ REFRESH_STATUS |
+ REFRESH_USER_RESOURCES |
+ REFRESH_TABLE_STATS |
+ REFRESH_INDEX_STATS |
+ REFRESH_USER_STATS |
+ REFRESH_CLIENT_STATS))
+ {
+ WSREP_TO_ISOLATION_BEGIN_WRTCHK(WSREP_MYSQL_DB, NULL, NULL)
+ }
+#endif /* WITH_WSREP*/
+
#ifdef HAVE_REPLICATION
if (lex->type & REFRESH_READ_LOCK)
{
@@ -4307,12 +4714,32 @@ end_with_restore_list:
goto error;
}
#endif
+
/*
reload_acl_and_cache() will tell us if we are allowed to write to the
binlog or not.
*/
if (!reload_acl_and_cache(thd, lex->type, first_table, &write_to_binlog))
{
+#ifdef WITH_WSREP
+ if ((lex->type & REFRESH_TABLES) && !(lex->type & (REFRESH_FOR_EXPORT|REFRESH_READ_LOCK)))
+ {
+ /*
+ This is done after reload_acl_and_cache is because
+ LOCK TABLES is not replicated in galera, the upgrade of which
+ is checked in reload_acl_and_cache.
+ Hence, done after/if we are able to upgrade locks.
+ */
+ if (first_table)
+ {
+ WSREP_TO_ISOLATION_BEGIN_WRTCHK(NULL, NULL, first_table);
+ }
+ else
+ {
+ WSREP_TO_ISOLATION_BEGIN_WRTCHK(WSREP_MYSQL_DB, NULL, NULL);
+ }
+ }
+#endif /* WITH_WSREP */
/*
We WANT to write and we CAN write.
! we write after unlocking the table.
@@ -4414,15 +4841,29 @@ end_with_restore_list:
able to open it (with SQLCOM_HA_OPEN) in the first place.
*/
unit->set_limit(select_lex);
+#ifdef WITH_WSREP
+ { char* tmp_info= NULL;
+ if (WSREP(thd)) tmp_info = (char *)thd_proc_info(thd, "mysql_ha_read()");
+#endif /* WITH_WSREP */
res= mysql_ha_read(thd, first_table, lex->ha_read_mode, lex->ident.str,
lex->insert_list, lex->ha_rkey_mode, select_lex->where,
unit->select_limit_cnt, unit->offset_limit_cnt);
+#ifdef WITH_WSREP
+ if (WSREP(thd)) thd_proc_info(thd, tmp_info);
+ }
+#endif /* WITH_WSREP */
break;
case SQLCOM_BEGIN:
DBUG_PRINT("info", ("Executing SQLCOM_BEGIN thd: %p", thd));
if (trans_begin(thd, lex->start_transaction_opt))
+ {
+ thd->mdl_context.release_transactional_locks();
+#ifdef WITH_WSREP
+ WSREP_DEBUG("BEGIN failed, MDL released: %lu", thd->thread_id);
+#endif /* WITH_WSREP */
goto error;
+ }
my_ok(thd);
break;
case SQLCOM_COMMIT:
@@ -4436,7 +4877,13 @@ end_with_restore_list:
(thd->variables.completion_type == 2 &&
lex->tx_release != TVL_NO));
if (trans_commit(thd))
+ {
+ thd->mdl_context.release_transactional_locks();
+#ifdef WITH_WSREP
+ WSREP_DEBUG("COMMIT failed, MDL released: %lu", thd->thread_id);
+#endif /* WITH_WSREP */
goto error;
+ }
thd->mdl_context.release_transactional_locks();
/* Begin transaction with the same isolation level. */
if (tx_chain)
@@ -4456,7 +4903,20 @@ end_with_restore_list:
thd->killed= KILL_CONNECTION;
thd->print_aborted_warning(3, "RELEASE");
}
+#ifdef WITH_WSREP
+ if (WSREP(thd)) {
+
+ if (thd->wsrep_conflict_state == NO_CONFLICT ||
+ thd->wsrep_conflict_state == REPLAYING)
+ {
+ my_ok(thd);
+ }
+ } else {
+#endif /* WITH_WSREP */
my_ok(thd);
+#ifdef WITH_WSREP
+ }
+#endif /* WITH_WSREP */
break;
}
case SQLCOM_ROLLBACK:
@@ -4471,7 +4931,13 @@ end_with_restore_list:
lex->tx_release != TVL_NO));
if (trans_rollback(thd))
+ {
+ thd->mdl_context.release_transactional_locks();
+#ifdef WITH_WSREP
+ WSREP_DEBUG("rollback failed, MDL released: %lu", thd->thread_id);
+#endif /* WITH_WSREP */
goto error;
+ }
thd->mdl_context.release_transactional_locks();
/* Begin transaction with the same isolation level. */
if (tx_chain)
@@ -4488,8 +4954,18 @@ end_with_restore_list:
/* Disconnect the current client connection. */
if (tx_release)
thd->killed= KILL_CONNECTION;
- my_ok(thd);
- break;
+#ifdef WITH_WSREP
+ if (WSREP(thd)) {
+ if (thd->wsrep_conflict_state == NO_CONFLICT) {
+ my_ok(thd);
+ }
+ } else {
+#endif /* WITH_WSREP */
+ my_ok(thd);
+#ifdef WITH_WSREP
+ }
+#endif /* WITH_WSREP */
+ break;
}
case SQLCOM_RELEASE_SAVEPOINT:
if (trans_release_savepoint(thd, lex->ident))
@@ -4557,6 +5033,7 @@ end_with_restore_list:
if (sp_process_definer(thd))
goto create_sp_error;
+ WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL)
res= (sp_result= sp_create_routine(thd, lex->sphead->m_type, lex->sphead));
switch (sp_result) {
case SP_OK: {
@@ -4767,6 +5244,7 @@ create_sp_error:
already puts on CREATE FUNCTION.
*/
/* Conditionally writes to binlog */
+ WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL)
sp_result= sp_update_routine(thd, type, lex->spname, &lex->sp_chistics);
switch (sp_result)
{
@@ -4838,6 +5316,7 @@ create_sp_error:
if (check_routine_access(thd, ALTER_PROC_ACL, db, name,
lex->sql_command == SQLCOM_DROP_PROCEDURE, 0))
goto error;
+ WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL)
/* Conditionally writes to binlog */
sp_result= sp_drop_routine(thd, type, lex->spname);
@@ -4902,12 +5381,18 @@ create_sp_error:
}
case SQLCOM_SHOW_CREATE_PROC:
{
+#ifdef WITH_WSREP
+ if (WSREP_CLIENT(thd) && wsrep_sync_wait(thd)) goto error;
+#endif /* WITH_WSREP */
if (sp_show_create_routine(thd, TYPE_ENUM_PROCEDURE, lex->spname))
goto error;
break;
}
case SQLCOM_SHOW_CREATE_FUNC:
{
+#ifdef WITH_WSREP
+ if (WSREP_CLIENT(thd) && wsrep_sync_wait(thd)) goto error;
+#endif /* WITH_WSREP */
if (sp_show_create_routine(thd, TYPE_ENUM_FUNCTION, lex->spname))
goto error;
break;
@@ -4920,6 +5405,9 @@ create_sp_error:
stored_procedure_type type= (lex->sql_command == SQLCOM_SHOW_PROC_CODE ?
TYPE_ENUM_PROCEDURE : TYPE_ENUM_FUNCTION);
+#ifdef WITH_WSREP
+ if (WSREP_CLIENT(thd) && wsrep_sync_wait(thd)) goto error;
+#endif /* WITH_WSREP */
if (sp_cache_routine(thd, type, lex->spname, FALSE, &sp))
goto error;
if (!sp || sp->show_routine_code(thd))
@@ -4941,6 +5429,9 @@ create_sp_error:
if (check_ident_length(&lex->spname->m_name))
goto error;
+#ifdef WITH_WSREP
+ if (WSREP_CLIENT(thd) && wsrep_sync_wait(thd)) goto error;
+#endif /* WITH_WSREP */
if (show_create_trigger(thd, lex->spname))
goto error; /* Error has been already logged. */
@@ -4952,6 +5443,7 @@ create_sp_error:
Note: SQLCOM_CREATE_VIEW also handles 'ALTER VIEW' commands
as specified through the thd->lex->create_view_mode flag.
*/
+ WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL)
res= mysql_create_view(thd, first_table, thd->lex->create_view_mode);
break;
}
@@ -4960,12 +5452,14 @@ create_sp_error:
if (check_table_access(thd, DROP_ACL, all_tables, FALSE, UINT_MAX, FALSE))
goto error;
/* Conditionally writes to binlog. */
+ WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL)
res= mysql_drop_view(thd, first_table, thd->lex->drop_mode);
break;
}
case SQLCOM_CREATE_TRIGGER:
{
/* Conditionally writes to binlog. */
+ WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL)
res= mysql_create_or_drop_trigger(thd, all_tables, 1);
break;
@@ -4973,6 +5467,7 @@ create_sp_error:
case SQLCOM_DROP_TRIGGER:
{
/* Conditionally writes to binlog. */
+ WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL)
res= mysql_create_or_drop_trigger(thd, all_tables, 0);
break;
}
@@ -4993,7 +5488,13 @@ create_sp_error:
break;
case SQLCOM_XA_COMMIT:
if (trans_xa_commit(thd))
+ {
+ thd->mdl_context.release_transactional_locks();
+#ifdef WITH_WSREP
+ WSREP_DEBUG("XA commit failed, MDL released: %lu", thd->thread_id);
+#endif /* WITH_WSREP */
goto error;
+ }
thd->mdl_context.release_transactional_locks();
/*
We've just done a commit, reset transaction
@@ -5005,7 +5506,13 @@ create_sp_error:
break;
case SQLCOM_XA_ROLLBACK:
if (trans_xa_rollback(thd))
+ {
+ thd->mdl_context.release_transactional_locks();
+#ifdef WITH_WSREP
+ WSREP_DEBUG("XA rollback failed, MDL released: %lu", thd->thread_id);
+#endif /* WITH_WSREP */
goto error;
+ }
thd->mdl_context.release_transactional_locks();
/*
We've just done a rollback, reset transaction
@@ -5025,11 +5532,13 @@ create_sp_error:
my_ok(thd);
break;
case SQLCOM_INSTALL_PLUGIN:
+ WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL)
if (! (res= mysql_install_plugin(thd, &thd->lex->comment,
&thd->lex->ident)))
my_ok(thd);
break;
case SQLCOM_UNINSTALL_PLUGIN:
+ WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL)
if (! (res= mysql_uninstall_plugin(thd, &thd->lex->comment,
&thd->lex->ident)))
my_ok(thd);
@@ -5174,6 +5683,9 @@ finish:
/* Free tables */
close_thread_tables(thd);
+#ifdef WITH_WSREP
+ thd->wsrep_consistency_check= NO_CONSISTENCY_CHECK;
+#endif /* WITH_WSREP */
#ifndef DBUG_OFF
if (lex->sql_command != SQLCOM_SET_OPTION && ! thd->in_sub_stmt)
@@ -5227,6 +5739,22 @@ finish:
{
thd->mdl_context.release_statement_locks();
}
+ WSREP_TO_ISOLATION_END;
+
+#ifdef WITH_WSREP
+ /*
+ Force release of transactional locks if not in active MST and wsrep is on.
+ */
+ if (WSREP(thd) &&
+ ! thd->in_sub_stmt &&
+ ! thd->in_active_multi_stmt_transaction() &&
+ thd->mdl_context.has_transactional_locks())
+ {
+ WSREP_DEBUG("Forcing release of transactional locks for thd %lu",
+ thd->thread_id);
+ thd->mdl_context.release_transactional_locks();
+ }
+#endif /* WITH_WSREP */
DBUG_RETURN(res || thd->is_error());
}
@@ -5309,6 +5837,9 @@ static bool execute_sqlcom_select(THD *thd, TABLE_LIST *all_tables)
status_var_increment(thd->status_var.empty_queries);
else
status_var_add(thd->status_var.rows_sent, thd->get_sent_row_count());
+#ifdef WITH_WSREP
+ if (lex->sql_command == SQLCOM_SHOW_STATUS) wsrep_free_status(thd);
+#endif /* WITH_WSREP */
return res;
}
@@ -5338,8 +5869,8 @@ static bool execute_show_status(THD *thd, TABLE_LIST *all_tables)
}
-static bool execute_rename_table(THD *thd, TABLE_LIST *first_table,
- TABLE_LIST *all_tables)
+static bool check_rename_table(THD *thd, TABLE_LIST *first_table,
+ TABLE_LIST *all_tables)
{
DBUG_ASSERT(first_table == all_tables && first_table != 0);
TABLE_LIST *table;
@@ -5353,7 +5884,7 @@ static bool execute_rename_table(THD *thd, TABLE_LIST *first_table,
&table->next_local->grant.privilege,
&table->next_local->grant.m_internal,
0, 0))
- return 1;
+ return true;
TABLE_LIST old_list, new_list;
/*
we do not need initialize old_list and new_list because we will
@@ -5366,10 +5897,10 @@ static bool execute_rename_table(THD *thd, TABLE_LIST *first_table,
INSERT_ACL | CREATE_ACL) &&
check_grant(thd, INSERT_ACL | CREATE_ACL, &new_list, FALSE, 1,
FALSE)))
- return 1;
+ return true;
}
- return mysql_rename_tables(thd, first_table, 0);
+ return false;
}
@@ -6275,6 +6806,24 @@ void THD::reset_for_next_command()
thd->auto_inc_intervals_in_cur_stmt_for_binlog.empty();
thd->stmt_depends_on_first_successful_insert_id_in_prev_stmt= 0;
+#ifdef WITH_WSREP
+ /*
+ Autoinc variables should be adjusted only for locally executed
+ transactions. Appliers and replayers are either processing ROW
+ events or get autoinc variable values from Query_log_event and
+ mysql slave may be processing STATEMENT format events, but he should
+ use autoinc values passed in binlog events, not the values forced by
+ the cluster.
+ */
+ if (WSREP(thd) && thd->wsrep_exec_mode == LOCAL_STATE &&
+ !thd->slave_thread && wsrep_auto_increment_control)
+ {
+ thd->variables.auto_increment_offset=
+ global_system_variables.auto_increment_offset;
+ thd->variables.auto_increment_increment=
+ global_system_variables.auto_increment_increment;
+ }
+#endif /* WITH_WSREP */
thd->query_start_used= 0;
thd->query_start_sec_part_used= 0;
thd->is_fatal_error= thd->time_zone_used= 0;
@@ -6480,6 +7029,109 @@ void mysql_init_multi_delete(LEX *lex)
lex->query_tables_last= &lex->query_tables;
}
+#ifdef WITH_WSREP
+static void wsrep_mysql_parse(THD *thd, char *rawbuf, uint length,
+ Parser_state *parser_state)
+{
+ bool is_autocommit=
+ !thd->in_multi_stmt_transaction_mode() &&
+ thd->wsrep_conflict_state == NO_CONFLICT &&
+ !thd->wsrep_applier;
+
+ do
+ {
+ if (thd->wsrep_conflict_state== RETRY_AUTOCOMMIT)
+ {
+ thd->wsrep_conflict_state= NO_CONFLICT;
+ /* Performance Schema Interface instrumentation, begin */
+ thd->m_statement_psi= MYSQL_REFINE_STATEMENT(thd->m_statement_psi,
+ com_statement_info[thd->get_command()].m_key);
+ MYSQL_SET_STATEMENT_TEXT(thd->m_statement_psi, thd->query(),
+ thd->query_length());
+ }
+ mysql_parse(thd, rawbuf, length, parser_state);
+
+ if (WSREP(thd)) {
+ /* wsrep BF abort in query exec phase */
+ mysql_mutex_lock(&thd->LOCK_wsrep_thd);
+ if (thd->wsrep_conflict_state == MUST_ABORT) {
+ wsrep_client_rollback(thd);
+
+ WSREP_DEBUG("abort in exec query state, avoiding autocommit");
+ }
+
+ if (thd->wsrep_conflict_state == MUST_REPLAY)
+ {
+ wsrep_replay_transaction(thd);
+ }
+
+ /* setting error code for BF aborted trxs */
+ if (thd->wsrep_conflict_state == ABORTED ||
+ thd->wsrep_conflict_state == CERT_FAILURE)
+ {
+ mysql_reset_thd_for_next_command(thd);
+ thd->killed= NOT_KILLED;
+ if (is_autocommit &&
+ thd->lex->sql_command != SQLCOM_SELECT &&
+ (thd->wsrep_retry_counter < thd->variables.wsrep_retry_autocommit))
+ {
+ WSREP_DEBUG("wsrep retrying AC query: %s",
+ (thd->query()) ? thd->query() : "void");
+
+ /* Performance Schema Interface instrumentation, end */
+ MYSQL_END_STATEMENT(thd->m_statement_psi, thd->get_stmt_da());
+ thd->m_statement_psi= NULL;
+ thd->m_digest= NULL;
+ close_thread_tables(thd);
+
+ thd->wsrep_conflict_state= RETRY_AUTOCOMMIT;
+ thd->wsrep_retry_counter++; // grow
+ wsrep_copy_query(thd);
+ thd->set_time();
+ parser_state->reset(rawbuf, length);
+ }
+ else
+ {
+ WSREP_DEBUG("%s, thd: %lu is_AC: %d, retry: %lu - %lu SQL: %s",
+ (thd->wsrep_conflict_state == ABORTED) ?
+ "BF Aborted" : "cert failure",
+ thd->thread_id, is_autocommit, thd->wsrep_retry_counter,
+ thd->variables.wsrep_retry_autocommit, thd->query());
+ my_error(ER_LOCK_DEADLOCK, MYF(0), "wsrep aborted transaction");
+ thd->killed= NOT_KILLED;
+ thd->wsrep_conflict_state= NO_CONFLICT;
+ if (thd->wsrep_conflict_state != REPLAYING)
+ thd->wsrep_retry_counter= 0; // reset
+ }
+ }
+ else
+ {
+ set_if_smaller(thd->wsrep_retry_counter, 0); // reset; eventually ok
+ }
+ mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
+ }
+
+ /* If retry is requested clean up explain structure */
+ if (thd->wsrep_conflict_state == RETRY_AUTOCOMMIT && thd->lex->explain)
+ delete_explain_query(thd->lex);
+
+ } while (thd->wsrep_conflict_state== RETRY_AUTOCOMMIT);
+
+ if (thd->wsrep_retry_query)
+ {
+ WSREP_DEBUG("releasing retry_query: conf %d sent %d kill %d errno %d SQL %s",
+ thd->wsrep_conflict_state,
+ thd->get_stmt_da()->is_sent(),
+ thd->killed,
+ thd->get_stmt_da()->is_error() ? thd->get_stmt_da()->sql_errno() : 0,
+ thd->wsrep_retry_query);
+ my_free(thd->wsrep_retry_query);
+ thd->wsrep_retry_query = NULL;
+ thd->wsrep_retry_query_len = 0;
+ thd->wsrep_retry_command = COM_CONNECT;
+ }
+}
+#endif /* WITH_WSREP */
/*
When you modify mysql_parse(), you may need to modify
@@ -6534,6 +7186,7 @@ void mysql_parse(THD *thd, char *rawbuf, uint length,
MYSQL_REFINE_STATEMENT(thd->m_statement_psi,
sql_statement_info[thd->lex->sql_command].
m_key);
+
#ifndef NO_EMBEDDED_ACCESS_CHECKS
if (mqh_used && thd->user_connect &&
check_mqh(thd, lex->sql_command))
@@ -6608,6 +7261,12 @@ void mysql_parse(THD *thd, char *rawbuf, uint length,
sql_statement_info[SQLCOM_SELECT].m_key);
status_var_increment(thd->status_var.com_stat[SQLCOM_SELECT]);
thd->update_stats();
+#ifdef WITH_WSREP
+ if (WSREP_CLIENT(thd))
+ {
+ thd->wsrep_sync_wait_gtid= WSREP_GTID_UNDEFINED;
+ }
+#endif /* WITH_WSREP */
}
DBUG_VOID_RETURN;
}
@@ -7506,8 +8165,14 @@ kill_one_thread(THD *thd, longlong id, killed_state kill_signal, killed_type typ
faster and do a harder kill than KILL_SYSTEM_THREAD;
*/
+#ifdef WITH_WSREP
+ if (((thd->security_ctx->master_access & SUPER_ACL) ||
+ thd->security_ctx->user_matches(tmp->security_ctx)) &&
+ !wsrep_thd_is_BF((void *)tmp, true))
+#else
if ((thd->security_ctx->master_access & SUPER_ACL) ||
thd->security_ctx->user_matches(tmp->security_ctx))
+#endif /* WITH_WSREP */
{
tmp->awake(kill_signal);
error=0;
@@ -8384,7 +9049,6 @@ LEX_USER *create_definer(THD *thd, LEX_STRING *user_name, LEX_STRING *host_name)
return definer;
}
-
/**
Check that byte length of a string does not exceed some limit.
diff --git a/sql/sql_parse.h b/sql/sql_parse.h
index fa414911093..368bba91c20 100644
--- a/sql/sql_parse.h
+++ b/sql/sql_parse.h
@@ -208,6 +208,31 @@ inline bool is_supported_parser_charset(CHARSET_INFO *cs)
{
return MY_TEST(cs->mbminlen == 1);
}
+#ifdef WITH_WSREP
+
+#define WSREP_MYSQL_DB (char *)"mysql"
+#define WSREP_TO_ISOLATION_BEGIN(db_, table_, table_list_) \
+ if (WSREP(thd) && wsrep_to_isolation_begin(thd, db_, table_, table_list_)) goto error;
+
+#define WSREP_TO_ISOLATION_END \
+ if (WSREP(thd) || (thd && thd->wsrep_exec_mode==TOTAL_ORDER)) \
+ wsrep_to_isolation_end(thd);
+
+/*
+ Checks if lex->no_write_to_binlog is set for statements that use LOCAL or
+ NO_WRITE_TO_BINLOG.
+*/
+#define WSREP_TO_ISOLATION_BEGIN_WRTCHK(db_, table_, table_list_) \
+ if (WSREP(thd) && !thd->lex->no_write_to_binlog \
+ && wsrep_to_isolation_begin(thd, db_, table_, table_list_)) goto error;
+
+#else
+
+#define WSREP_TO_ISOLATION_BEGIN(db_, table_, table_list_)
+#define WSREP_TO_ISOLATION_END
+#define WSREP_TO_ISOLATION_BEGIN_WRTCHK(db_, table_, table_list_)
+
+#endif /* WITH_WSREP */
#endif /* SQL_PARSE_INCLUDED */
diff --git a/sql/sql_partition_admin.cc b/sql/sql_partition_admin.cc
index 9b471f97521..2a76c8d6671 100644
--- a/sql/sql_partition_admin.cc
+++ b/sql/sql_partition_admin.cc
@@ -530,6 +530,21 @@ bool Sql_cmd_alter_table_exchange_partition::
&alter_prelocking_strategy))
DBUG_RETURN(true);
+#ifdef WITH_WSREP
+ /* Forward declaration */
+ TABLE *find_temporary_table(THD *thd, const TABLE_LIST *tl);
+
+ if ((!thd->is_current_stmt_binlog_format_row() ||
+ /* TODO: Do we really need to check for temp tables in this case? */
+ !find_temporary_table(thd, table_list)) &&
+ wsrep_to_isolation_begin(thd, table_list->db, table_list->table_name,
+ NULL))
+ {
+ WSREP_WARN("ALTER TABLE EXCHANGE PARTITION isolation failure");
+ DBUG_RETURN(TRUE);
+ }
+#endif /* WITH_WSREP */
+
part_table= table_list->table;
swap_table= swap_table_list->table;
@@ -763,6 +778,20 @@ bool Sql_cmd_alter_table_truncate_partition::execute(THD *thd)
if (check_one_table_access(thd, DROP_ACL, first_table))
DBUG_RETURN(TRUE);
+#ifdef WITH_WSREP
+ /* Forward declaration */
+ TABLE *find_temporary_table(THD *thd, const TABLE_LIST *tl);
+
+ if (WSREP(thd) && (!thd->is_current_stmt_binlog_format_row() ||
+ !find_temporary_table(thd, first_table)) &&
+ wsrep_to_isolation_begin(
+ thd, first_table->db, first_table->table_name, NULL)
+ )
+ {
+ WSREP_WARN("ALTER TABLE TRUNCATE PARTITION isolation failure");
+ DBUG_RETURN(TRUE);
+ }
+#endif /* WITH_WSREP */
if (open_tables(thd, &first_table, &table_counter, 0))
DBUG_RETURN(true);
diff --git a/sql/sql_plugin.cc b/sql/sql_plugin.cc
index e34409b3532..df24f398e97 100644
--- a/sql/sql_plugin.cc
+++ b/sql/sql_plugin.cc
@@ -3005,11 +3005,17 @@ void plugin_thdvar_init(THD *thd)
thd->variables.dynamic_variables_size= 0;
thd->variables.dynamic_variables_ptr= 0;
+#ifdef WITH_WSREP
+ if (!WSREP(thd) || !thd->wsrep_applier) {
+#endif
mysql_mutex_lock(&LOCK_plugin);
thd->variables.table_plugin=
intern_plugin_lock(NULL, global_system_variables.table_plugin);
intern_plugin_unlock(NULL, old_table_plugin);
mysql_mutex_unlock(&LOCK_plugin);
+#ifdef WITH_WSREP
+ }
+#endif
DBUG_VOID_RETURN;
}
diff --git a/sql/sql_prepare.cc b/sql/sql_prepare.cc
index 1a02a2ae84c..1bb465d220b 100644
--- a/sql/sql_prepare.cc
+++ b/sql/sql_prepare.cc
@@ -3549,6 +3549,10 @@ Prepared_statement::set_parameters(String *expanded_query,
return res;
}
+#ifdef WITH_WSREP
+/* forward declaration */
+void wsrep_replay_transaction(THD *thd);
+#endif /* WITH_WSREP */
/**
Execute a prepared statement. Re-prepare it a limited number
@@ -3628,6 +3632,24 @@ reexecute:
error= execute(expanded_query, open_cursor) || thd->is_error();
thd->m_reprepare_observer= NULL;
+#ifdef WITH_WSREP
+ mysql_mutex_lock(&thd->LOCK_wsrep_thd);
+ switch (thd->wsrep_conflict_state)
+ {
+ case CERT_FAILURE:
+ WSREP_DEBUG("PS execute fail for CERT_FAILURE: thd: %ld err: %d",
+ thd->thread_id, thd->get_stmt_da()->sql_errno() );
+ thd->wsrep_conflict_state = NO_CONFLICT;
+ break;
+
+ case MUST_REPLAY:
+ (void) wsrep_replay_transaction(thd);
+ /* fallthrough */
+
+ default: break;
+ }
+ mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
+#endif /* WITH_WSREP */
if ((sql_command_flags[lex->sql_command] & CF_REEXECUTION_FRAGILE) &&
error && !thd->is_fatal_error && !thd->killed &&
diff --git a/sql/sql_reload.cc b/sql/sql_reload.cc
index f9c5757f721..704e2f84437 100644
--- a/sql/sql_reload.cc
+++ b/sql/sql_reload.cc
@@ -30,6 +30,7 @@
#include "debug_sync.h"
#include "des_key_file.h"
+
static void disable_checkpoints(THD *thd);
/**
@@ -154,6 +155,12 @@ bool reload_acl_and_cache(THD *thd, unsigned long long options,
{
if (mysql_bin_log.rotate_and_purge(true))
*write_to_binlog= -1;
+
+ if (WSREP_ON)
+ {
+ /* Wait for last binlog checkpoint event to be logged. */
+ mysql_bin_log.wait_for_last_checkpoint_event();
+ }
}
}
if (options & REFRESH_RELAY_LOG)
@@ -253,7 +260,18 @@ bool reload_acl_and_cache(THD *thd, unsigned long long options,
}
if (options & REFRESH_CHECKPOINT)
disable_checkpoints(thd);
- }
+#ifdef WITH_WSREP
+ /*
+ We need to do it second time after wsrep appliers were blocked in
+ make_global_read_lock_block_commit(thd) above since they could have
+ modified the tables too.
+ */
+ if (WSREP(thd) &&
+ close_cached_tables(thd, tables, (options & REFRESH_FAST) ?
+ FALSE : TRUE, TRUE))
+ result= 1;
+#endif /* WITH_WSREP */
+ }
else
{
if (thd && thd->locked_tables_mode)
@@ -297,6 +315,16 @@ bool reload_acl_and_cache(THD *thd, unsigned long long options,
}
}
+#ifdef WITH_WSREP
+ if (thd && thd->wsrep_applier)
+ {
+ /*
+ In case of applier thread, do not wait for table share(s) to be
+ removed from table definition cache.
+ */
+ options|= REFRESH_FAST;
+ }
+#endif
if (close_cached_tables(thd, tables,
((options & REFRESH_FAST) ? FALSE : TRUE),
(thd ? thd->variables.lock_wait_timeout :
diff --git a/sql/sql_repl.cc b/sql/sql_repl.cc
index a2acf52d44e..f1bbe58a8b8 100644
--- a/sql/sql_repl.cc
+++ b/sql/sql_repl.cc
@@ -2996,6 +2996,15 @@ int start_slave(THD* thd , Master_info* mi, bool net_report)
err:
mi->unlock_slave_threads();
+#ifdef WITH_WSREP
+ if (WSREP(thd))
+ thd_proc_info(thd, "exit stop_slave()");
+ else
+ thd_proc_info(thd, 0);
+#else /* WITH_WSREP */
+ thd_proc_info(thd, 0);
+#endif /* WITH_WSREP */
+
if (slave_errno)
{
if (net_report)
diff --git a/sql/sql_show.cc b/sql/sql_show.cc
index a3d834b0e42..1918cbab720 100644
--- a/sql/sql_show.cc
+++ b/sql/sql_show.cc
@@ -59,6 +59,10 @@
#include "debug_sync.h"
#include "keycaches.h"
+#if !defined(MYSQL_MAX_VARIABLE_VALUE_LEN)
+#define MYSQL_MAX_VARIABLE_VALUE_LEN 1024
+#endif // !defined(MYSQL_MAX_VARIABLE_VALUE_LEN)
+
#ifdef WITH_PARTITION_STORAGE_ENGINE
#include "ha_partition.h"
#endif
@@ -118,8 +122,6 @@ static void get_cs_converted_string_value(THD *thd,
static int show_create_view(THD *thd, TABLE_LIST *table, String *buff);
-static void append_algorithm(TABLE_LIST *table, String *buff);
-
static COND * make_cond_for_info_schema(COND *cond, TABLE_LIST *table);
/**
@@ -2099,32 +2101,30 @@ static void store_key_options(THD *thd, String *packet, TABLE *table,
}
}
-
-void
-view_store_options(THD *thd, TABLE_LIST *table, String *buff)
-{
- append_algorithm(table, buff);
- append_definer(thd, buff, &table->definer.user, &table->definer.host);
- if (table->view_suid)
- buff->append(STRING_WITH_LEN("SQL SECURITY DEFINER "));
- else
- buff->append(STRING_WITH_LEN("SQL SECURITY INVOKER "));
-}
-
-
/*
- Append DEFINER clause to the given buffer.
+ Append ALGORITHM clause to the given buffer.
SYNOPSIS
- append_definer()
- thd [in] thread handle
- buffer [inout] buffer to hold DEFINER clause
- definer_user [in] user name part of definer
- definer_host [in] host name part of definer
+ append_algorithm()
+ table [in] table list
+ buff [inout] buffer to hold the ALGORITHM clause
+ check_inherit [in] if true, do nothing if algorithm is INHERIT
*/
-static void append_algorithm(TABLE_LIST *table, String *buff)
+static void append_algorithm(TABLE_LIST *table, String *buff,
+ bool check_inherit)
{
+ int16 algorithm= (int16) table->algorithm;
+
+ DBUG_ENTER("append_algorithm");
+
+ /*
+ Handle a special case when ALGORITHM is not specified, in which case we
+ simply return.
+ */
+ if (check_inherit && (algorithm == VIEW_ALGORITHM_INHERIT))
+ DBUG_VOID_RETURN;
+
buff->append(STRING_WITH_LEN("ALGORITHM="));
switch ((int16)table->algorithm) {
case VIEW_ALGORITHM_UNDEFINED:
@@ -2139,6 +2139,8 @@ static void append_algorithm(TABLE_LIST *table, String *buff)
default:
DBUG_ASSERT(0); // never should happen
}
+
+ DBUG_VOID_RETURN;
}
/*
@@ -2157,7 +2159,7 @@ void append_definer(THD *thd, String *buffer, const LEX_STRING *definer_user,
{
buffer->append(STRING_WITH_LEN("DEFINER="));
append_identifier(thd, buffer, definer_user->str, definer_user->length);
- if (definer_host->str[0])
+ if (definer_host->str && definer_host->str[0])
{
buffer->append('@');
append_identifier(thd, buffer, definer_host->str, definer_host->length);
@@ -2165,6 +2167,23 @@ void append_definer(THD *thd, String *buffer, const LEX_STRING *definer_user,
buffer->append(' ');
}
+void
+view_store_options4(THD *thd, TABLE_LIST *table, String *buff,
+ bool check_inherit)
+{
+ append_algorithm(table, buff, check_inherit);
+ append_definer(thd, buff, &table->definer.user, &table->definer.host);
+ if (table->view_suid)
+ buff->append(STRING_WITH_LEN("SQL SECURITY DEFINER "));
+ else
+ buff->append(STRING_WITH_LEN("SQL SECURITY INVOKER "));
+}
+
+void
+view_store_options(THD *thd, TABLE_LIST *table, String *buff)
+{
+ view_store_options4(thd, table, buff, false);
+}
static int show_create_view(THD *thd, TABLE_LIST *table, String *buff)
{
@@ -3019,11 +3038,39 @@ static bool show_status_array(THD *thd, const char *wild,
*prefix_end++= '_';
len=name_buffer + sizeof(name_buffer) - prefix_end;
+#ifdef WITH_WSREP
+ bool is_wsrep_var= FALSE;
+ /*
+ This is a workaround for lp:1306875 (PBX) to skip switching of wsrep
+ status variable name's first letter to uppercase. This is an optimization
+ for status variables defined under wsrep plugin.
+ TODO: remove once lp:1306875 has been addressed.
+ */
+ if (*prefix && !my_strcasecmp(system_charset_info, prefix, "wsrep"))
+ {
+ is_wsrep_var= TRUE;
+ }
+#endif /* WITH_WSREP */
+
for (; variables->name; variables++)
{
bool wild_checked= 0;
strnmov(prefix_end, variables->name, len);
name_buffer[sizeof(name_buffer)-1]=0; /* Safety */
+
+#ifdef WITH_WSREP
+ /*
+ If the prefix is NULL, that means we are looking into the status variables
+ defined directly under mysqld.cc. Do not capitalize wsrep status variable
+ names until lp:1306875 has been fixed.
+ TODO: remove once lp:1306875 has been addressed.
+ */
+ if (!(*prefix) && !strncasecmp(name_buffer, "wsrep", strlen("wsrep")))
+ {
+ is_wsrep_var= TRUE;
+ }
+#endif /* WITH_WSREP */
+
if (ucase_names)
my_caseup_str(system_charset_info, name_buffer);
else
@@ -3032,8 +3079,13 @@ static bool show_status_array(THD *thd, const char *wild,
DBUG_ASSERT(name_buffer[0] >= 'a');
DBUG_ASSERT(name_buffer[0] <= 'z');
+#ifdef WITH_WSREP
+ // TODO: remove once lp:1306875 has been addressed.
+ if (status_var && (is_wsrep_var == FALSE))
+#else
/* traditionally status variables have a first letter uppercased */
if (status_var)
+#endif /* WITH_WSREP */
name_buffer[0]-= 'a' - 'A';
}
@@ -8934,7 +8986,8 @@ ST_FIELD_INFO variables_fields_info[]=
{
{"VARIABLE_NAME", 64, MYSQL_TYPE_STRING, 0, 0, "Variable_name",
SKIP_OPEN_TABLE},
- {"VARIABLE_VALUE", 1024, MYSQL_TYPE_STRING, 0, 1, "Value", SKIP_OPEN_TABLE},
+ {"VARIABLE_VALUE", MYSQL_MAX_VARIABLE_VALUE_LEN, MYSQL_TYPE_STRING, 0, 1,
+ "Value", SKIP_OPEN_TABLE},
{0, 0, MYSQL_TYPE_STRING, 0, 0, 0, SKIP_OPEN_TABLE}
};
diff --git a/sql/sql_show.h b/sql/sql_show.h
index 9ca60557cc0..c1c6388216d 100644
--- a/sql/sql_show.h
+++ b/sql/sql_show.h
@@ -109,6 +109,8 @@ void free_status_vars();
void reset_status_vars();
bool show_create_trigger(THD *thd, const sp_name *trg_name);
void view_store_options(THD *thd, TABLE_LIST *table, String *buff);
+void view_store_options4(THD *thd, TABLE_LIST *table, String *buff,
+ bool check_inherit);
void init_fill_schema_files_row(TABLE* table);
bool schema_table_store_record(THD *thd, TABLE *table);
diff --git a/sql/sql_table.cc b/sql/sql_table.cc
index 2eff8fd5e2f..e591b8a1eb7 100644
--- a/sql/sql_table.cc
+++ b/sql/sql_table.cc
@@ -5306,8 +5306,64 @@ bool mysql_create_like_table(THD* thd, TABLE_LIST* table,
bool do_logging= FALSE;
uint not_used;
int create_res;
+ uint save_thd_create_info_options;
DBUG_ENTER("mysql_create_like_table");
+#ifdef WITH_WSREP
+ if (WSREP(thd) && !thd->wsrep_applier)
+ {
+ TABLE *tmp_table;
+ bool is_tmp_table= FALSE;
+
+ for (tmp_table= thd->temporary_tables; tmp_table; tmp_table=tmp_table->next)
+ {
+ if (!strcmp(src_table->db, tmp_table->s->db.str) &&
+ !strcmp(src_table->table_name, tmp_table->s->table_name.str))
+ {
+ is_tmp_table= TRUE;
+ break;
+ }
+ }
+ if (create_info->options & HA_LEX_CREATE_TMP_TABLE)
+ {
+ /* CREATE TEMPORARY TABLE LIKE must be skipped from replication */
+ WSREP_DEBUG("CREATE TEMPORARY TABLE LIKE... skipped replication\n %s",
+ thd->query());
+ }
+ else if (!is_tmp_table)
+ {
+ /* this is straight CREATE TABLE LIKE... eith no tmp tables */
+ WSREP_TO_ISOLATION_BEGIN(table->db, table->table_name, NULL);
+ }
+ else
+ {
+ /* here we have CREATE TABLE LIKE <temporary table>
+ the temporary table definition will be needed in slaves to
+ enable the create to succeed
+ */
+ TABLE_LIST tbl;
+ bzero((void*) &tbl, sizeof(tbl));
+ tbl.db= src_table->db;
+ tbl.table_name= tbl.alias= src_table->table_name;
+ tbl.table= tmp_table;
+ char buf[2048];
+ String query(buf, sizeof(buf), system_charset_info);
+ query.length(0); // Have to zero it since constructor doesn't
+
+ (void) show_create_table(thd, &tbl, &query, NULL, WITH_DB_NAME);
+ WSREP_DEBUG("TMP TABLE: %s", query.ptr());
+
+ thd->wsrep_TOI_pre_query= query.ptr();
+ thd->wsrep_TOI_pre_query_len= query.length();
+
+ WSREP_TO_ISOLATION_BEGIN(table->db, table->table_name, NULL);
+
+ thd->wsrep_TOI_pre_query= NULL;
+ thd->wsrep_TOI_pre_query_len= 0;
+ }
+ }
+#endif
+
/*
We the open source table to get its description in HA_CREATE_INFO
and Alter_info objects. This also acquires a shared metadata lock
@@ -5321,7 +5377,7 @@ bool mysql_create_like_table(THD* thd, TABLE_LIST* table,
*/
/* Copy temporarily the statement flags to thd for lock_table_names() */
- uint save_thd_create_info_options= thd->lex->create_info.options;
+ save_thd_create_info_options= thd->lex->create_info.options;
thd->lex->create_info.options|= create_info->options;
res= open_tables(thd, &thd->lex->query_tables, &not_used, 0);
thd->lex->create_info.options= save_thd_create_info_options;
@@ -5595,6 +5651,13 @@ err:
res= 1;
}
DBUG_RETURN(res);
+
+#ifdef WITH_WSREP
+ error:
+ thd->wsrep_TOI_pre_query= NULL;
+ DBUG_RETURN(TRUE);
+#endif /* WITH_WSREP */
+
}
@@ -8166,6 +8229,10 @@ simple_rename_or_index_change(THD *thd, TABLE_LIST *table_list,
: HA_EXTRA_FORCE_REOPEN;
DBUG_ENTER("simple_rename_or_index_change");
+#ifdef WITH_WSREP
+ bool do_log_write(true);
+#endif /* WITH_WSREP */
+
if (keys_onoff != Alter_info::LEAVE_AS_IS)
{
if (wait_while_table_is_used(thd, table, extra_func))
@@ -8225,7 +8292,14 @@ simple_rename_or_index_change(THD *thd, TABLE_LIST *table_list,
if (!error)
{
- error= write_bin_log(thd, TRUE, thd->query(), thd->query_length());
+#ifdef WITH_WSREP
+ if (!WSREP(thd) || do_log_write) {
+#endif /* WITH_WSREP */
+ error= write_bin_log(thd, TRUE, thd->query(), thd->query_length());
+#ifdef WITH_WSREP
+ }
+#endif /* !WITH_WSREP */
+
if (!error)
my_ok(thd);
}
@@ -8352,6 +8426,17 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name,
DEBUG_SYNC(thd, "alter_opened_table");
+#ifdef WITH_WSREP
+ DBUG_EXECUTE_IF("sync.alter_opened_table",
+ {
+ const char act[]=
+ "now "
+ "wait_for signal.alter_opened_table";
+ DBUG_ASSERT(!debug_sync_set_action(thd,
+ STRING_WITH_LEN(act)));
+ };);
+#endif // WITH_WSREP
+
if (error)
DBUG_RETURN(true);
diff --git a/sql/sql_trigger.cc b/sql/sql_trigger.cc
index b17775abb7c..327b5bb0260 100644
--- a/sql/sql_trigger.cc
+++ b/sql/sql_trigger.cc
@@ -434,8 +434,14 @@ bool mysql_create_or_drop_trigger(THD *thd, TABLE_LIST *tables, bool create)
binlogged, so they share the same danger, so trust_function_creators
applies to them too.
*/
+#ifdef WITH_WSREP
+ if (!trust_function_creators &&
+ (WSREP_EMULATE_BINLOG(thd) || mysql_bin_log.is_open()) &&
+ !(thd->security_ctx->master_access & SUPER_ACL))
+#else
if (!trust_function_creators && mysql_bin_log.is_open() &&
!(thd->security_ctx->master_access & SUPER_ACL))
+#endif /* WITH_WSREP */
{
my_error(ER_BINLOG_CREATE_ROUTINE_NEED_SUPER, MYF(0));
DBUG_RETURN(TRUE);
@@ -2440,3 +2446,59 @@ bool load_table_name_for_trigger(THD *thd,
DBUG_RETURN(FALSE);
}
+#ifdef WITH_WSREP
+int wsrep_create_trigger_query(THD *thd, uchar** buf, size_t* buf_len)
+{
+ LEX *lex= thd->lex;
+ String stmt_query;
+
+ LEX_STRING definer_user;
+ LEX_STRING definer_host;
+
+ if (!lex->definer)
+ {
+ if (!thd->slave_thread)
+ {
+ if (!(lex->definer= create_default_definer(thd, false)))
+ return 1;
+ }
+ }
+
+ if (lex->definer)
+ {
+ /* SUID trigger. */
+ LEX_USER *d= get_current_user(thd, lex->definer);
+
+ if (!d)
+ return 1;
+
+ definer_user= d->user;
+ definer_host= d->host;
+ }
+ else
+ {
+ /* non-SUID trigger. */
+
+ definer_user.str= 0;
+ definer_user.length= 0;
+
+ definer_host.str= 0;
+ definer_host.length= 0;
+ }
+
+ stmt_query.append(STRING_WITH_LEN("CREATE "));
+
+ append_definer(thd, &stmt_query, &definer_user, &definer_host);
+
+ LEX_STRING stmt_definition;
+ stmt_definition.str= (char*) thd->lex->stmt_definition_begin;
+ stmt_definition.length= thd->lex->stmt_definition_end
+ - thd->lex->stmt_definition_begin;
+ trim_whitespace(thd->charset(), & stmt_definition);
+
+ stmt_query.append(stmt_definition.str, stmt_definition.length);
+
+ return wsrep_to_buf_helper(thd, stmt_query.c_ptr(), stmt_query.length(),
+ buf, buf_len);
+}
+#endif /* WITH_WSREP */
diff --git a/sql/sql_truncate.cc b/sql/sql_truncate.cc
index 05869b70c8f..16c2a5027e3 100644
--- a/sql/sql_truncate.cc
+++ b/sql/sql_truncate.cc
@@ -24,6 +24,9 @@
#include "sql_acl.h" // DROP_ACL
#include "sql_parse.h" // check_one_table_access()
#include "sql_truncate.h"
+#ifdef WITH_WSREP
+#include "wsrep_mysqld.h"
+#endif /* WITH_WSREP */
#include "sql_show.h" //append_identifier()
@@ -483,6 +486,12 @@ bool Sql_cmd_truncate_table::truncate_table(THD *thd, TABLE_LIST *table_ref)
{
bool hton_can_recreate;
+#ifdef WITH_WSREP
+ if (WSREP(thd) && wsrep_to_isolation_begin(thd,
+ table_ref->db,
+ table_ref->table_name, NULL))
+ DBUG_RETURN(TRUE);
+#endif /* WITH_WSREP */
if (lock_table(thd, table_ref, &hton_can_recreate))
DBUG_RETURN(TRUE);
@@ -565,7 +574,6 @@ bool Sql_cmd_truncate_table::execute(THD *thd)
if (! (res= truncate_table(thd, first_table)))
my_ok(thd);
-
DBUG_RETURN(res);
}
diff --git a/sql/sql_update.cc b/sql/sql_update.cc
index 9ca96bd702e..1a136e5158b 100644
--- a/sql/sql_update.cc
+++ b/sql/sql_update.cc
@@ -983,7 +983,11 @@ int mysql_update(THD *thd,
*/
if ((error < 0) || thd->transaction.stmt.modified_non_trans_table)
{
+#ifdef WITH_WSREP
+ if (WSREP_EMULATE_BINLOG(thd) || mysql_bin_log.is_open())
+#else
if (mysql_bin_log.is_open())
+#endif
{
int errcode= 0;
if (error < 0)
@@ -2255,7 +2259,11 @@ void multi_update::abort_result_set()
The query has to binlog because there's a modified non-transactional table
either from the query's list or via a stored routine: bug#13270,23333
*/
+#ifdef WITH_WSREP
+ if (WSREP_EMULATE_BINLOG(thd) || mysql_bin_log.is_open())
+#else
if (mysql_bin_log.is_open())
+#endif
{
/*
THD::killed status might not have been set ON at time of an error
@@ -2527,7 +2535,11 @@ bool multi_update::send_eof()
if (local_error == 0 || thd->transaction.stmt.modified_non_trans_table)
{
+#ifdef WITH_WSREP
+ if (WSREP_EMULATE_BINLOG(thd) || mysql_bin_log.is_open())
+#else
if (mysql_bin_log.is_open())
+#endif
{
int errcode= 0;
if (local_error == 0)
diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy
index 95a683165e2..5a0c18b5109 100644
--- a/sql/sql_yacc.yy
+++ b/sql/sql_yacc.yy
@@ -7151,7 +7151,7 @@ alter:
}
view_tail
{}
- | ALTER definer_opt EVENT_SYM sp_name
+ | ALTER definer_opt remember_name EVENT_SYM sp_name
{
/*
It is safe to use Lex->spname because
@@ -7163,9 +7163,12 @@ alter:
if (!(Lex->event_parse_data= Event_parse_data::new_instance(thd)))
MYSQL_YYABORT;
- Lex->event_parse_data->identifier= $4;
+ Lex->event_parse_data->identifier= $5;
Lex->sql_command= SQLCOM_ALTER_EVENT;
+#ifdef WITH_WSREP
+ Lex->stmt_definition_begin= $3;
+#endif
}
ev_alter_on_schedule_completion
opt_ev_rename_to
@@ -7173,7 +7176,7 @@ alter:
opt_ev_comment
opt_ev_sql_stmt
{
- if (!($6 || $7 || $8 || $9 || $10))
+ if (!($7 || $8 || $9 || $10 || $11))
{
my_parse_error(ER(ER_SYNTAX_ERROR));
MYSQL_YYABORT;
@@ -7183,6 +7186,9 @@ alter:
can overwrite it
*/
Lex->sql_command= SQLCOM_ALTER_EVENT;
+#ifdef WITH_WSREP
+ Lex->stmt_definition_end= (char*)YYLIP->get_cpp_ptr();
+#endif
}
| ALTER TABLESPACE alter_tablespace_info
{
diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc
index 8d164ea86f5..000a424faeb 100644
--- a/sql/sys_vars.cc
+++ b/sql/sys_vars.cc
@@ -446,6 +446,27 @@ static bool binlog_format_check(sys_var *self, THD *thd, set_var *var)
if (check_has_super(self, thd, var))
return true;
+#ifdef WITH_WSREP
+ /*
+ MariaDB Galera does not support STATEMENT or MIXED binlog format currently.
+ */
+ if (WSREP(thd) &&
+ var->save_result.ulonglong_value != BINLOG_FORMAT_ROW)
+ {
+ // Push a warning to the error log.
+ push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, ER_UNKNOWN_ERROR,
+ "MariaDB Galera does not support binlog format: %s",
+ binlog_format_names[var->save_result.ulonglong_value]);
+
+ if (var->type == OPT_GLOBAL)
+ {
+ WSREP_ERROR("MariaDB Galera does not support binlog format: %s",
+ binlog_format_names[var->save_result.ulonglong_value]);
+ return true;
+ }
+ }
+#endif
+
if (var->type == OPT_GLOBAL)
return false;
@@ -3363,6 +3384,10 @@ static bool fix_autocommit(sys_var *self, THD *thd, enum_var_type type)
if (trans_commit_stmt(thd) || trans_commit(thd))
{
thd->variables.option_bits&= ~OPTION_AUTOCOMMIT;
+ thd->mdl_context.release_transactional_locks();
+#ifdef WITH_WSREP
+ WSREP_DEBUG("autocommit, MDL TRX lock released: %lu", thd->thread_id);
+#endif /* WITH_WSREP */
return true;
}
/*
@@ -4474,6 +4499,278 @@ static Sys_var_tz Sys_time_zone(
SESSION_VAR(time_zone), NO_CMD_LINE,
DEFAULT(&default_tz), NO_MUTEX_GUARD, IN_BINLOG);
+#ifdef WITH_WSREP
+#include "wsrep_var.h"
+#include "wsrep_sst.h"
+#include "wsrep_binlog.h"
+
+static Sys_var_charptr Sys_wsrep_provider(
+ "wsrep_provider", "Path to replication provider library",
+ PREALLOCATED GLOBAL_VAR(wsrep_provider), CMD_LINE(REQUIRED_ARG, OPT_WSREP_PROVIDER),
+ IN_FS_CHARSET, DEFAULT(WSREP_NONE),
+ NO_MUTEX_GUARD, NOT_IN_BINLOG,
+ ON_CHECK(wsrep_provider_check), ON_UPDATE(wsrep_provider_update));
+
+static Sys_var_charptr Sys_wsrep_provider_options(
+ "wsrep_provider_options", "provider specific options",
+ PREALLOCATED GLOBAL_VAR(wsrep_provider_options),
+ CMD_LINE(REQUIRED_ARG, OPT_WSREP_PROVIDER_OPTIONS),
+ IN_FS_CHARSET, DEFAULT(""), NO_MUTEX_GUARD, NOT_IN_BINLOG,
+ ON_CHECK(wsrep_provider_options_check),
+ ON_UPDATE(wsrep_provider_options_update));
+
+static Sys_var_charptr Sys_wsrep_data_home_dir(
+ "wsrep_data_home_dir", "home directory for wsrep provider",
+ READ_ONLY GLOBAL_VAR(wsrep_data_home_dir), CMD_LINE(REQUIRED_ARG),
+ IN_FS_CHARSET, DEFAULT(mysql_real_data_home),
+ NO_MUTEX_GUARD, NOT_IN_BINLOG);
+
+static Sys_var_charptr Sys_wsrep_cluster_name(
+ "wsrep_cluster_name", "Name for the cluster",
+ PREALLOCATED GLOBAL_VAR(wsrep_cluster_name), CMD_LINE(REQUIRED_ARG),
+ IN_FS_CHARSET, DEFAULT(WSREP_CLUSTER_NAME),
+ NO_MUTEX_GUARD, NOT_IN_BINLOG,
+ ON_CHECK(wsrep_cluster_name_check),
+ ON_UPDATE(wsrep_cluster_name_update));
+
+static PolyLock_mutex PLock_wsrep_slave_threads(&LOCK_wsrep_slave_threads);
+static Sys_var_charptr Sys_wsrep_cluster_address (
+ "wsrep_cluster_address", "Address to initially connect to cluster",
+ PREALLOCATED GLOBAL_VAR(wsrep_cluster_address),
+ CMD_LINE(REQUIRED_ARG, OPT_WSREP_CLUSTER_ADDRESS),
+ IN_FS_CHARSET, DEFAULT(""),
+ &PLock_wsrep_slave_threads, NOT_IN_BINLOG,
+ ON_CHECK(wsrep_cluster_address_check),
+ ON_UPDATE(wsrep_cluster_address_update));
+
+static Sys_var_charptr Sys_wsrep_node_name (
+ "wsrep_node_name", "Node name",
+ PREALLOCATED GLOBAL_VAR(wsrep_node_name), CMD_LINE(REQUIRED_ARG),
+ IN_FS_CHARSET, DEFAULT(glob_hostname), NO_MUTEX_GUARD, NOT_IN_BINLOG,
+ wsrep_node_name_check, wsrep_node_name_update);
+
+static Sys_var_charptr Sys_wsrep_node_address (
+ "wsrep_node_address", "Node address",
+ PREALLOCATED GLOBAL_VAR(wsrep_node_address), CMD_LINE(REQUIRED_ARG),
+ IN_FS_CHARSET, DEFAULT(""),
+ NO_MUTEX_GUARD, NOT_IN_BINLOG,
+ ON_CHECK(wsrep_node_address_check),
+ ON_UPDATE(wsrep_node_address_update));
+
+static Sys_var_charptr Sys_wsrep_node_incoming_address(
+ "wsrep_node_incoming_address", "Client connection address",
+ PREALLOCATED GLOBAL_VAR(wsrep_node_incoming_address),CMD_LINE(REQUIRED_ARG),
+ IN_FS_CHARSET, DEFAULT(WSREP_NODE_INCOMING_AUTO),
+ NO_MUTEX_GUARD, NOT_IN_BINLOG);
+
+static Sys_var_ulong Sys_wsrep_slave_threads(
+ "wsrep_slave_threads", "Number of slave appliers to launch",
+ GLOBAL_VAR(wsrep_slave_threads), CMD_LINE(REQUIRED_ARG),
+ VALID_RANGE(1, 512), DEFAULT(1), BLOCK_SIZE(1),
+ &PLock_wsrep_slave_threads, NOT_IN_BINLOG,
+ ON_CHECK(wsrep_slave_threads_check),
+ ON_UPDATE(wsrep_slave_threads_update));
+
+static Sys_var_charptr Sys_wsrep_dbug_option(
+ "wsrep_dbug_option", "DBUG options to provider library",
+ GLOBAL_VAR(wsrep_dbug_option),CMD_LINE(REQUIRED_ARG),
+ IN_FS_CHARSET, DEFAULT(""),
+ NO_MUTEX_GUARD, NOT_IN_BINLOG);
+
+static Sys_var_mybool Sys_wsrep_debug(
+ "wsrep_debug", "To enable debug level logging",
+ GLOBAL_VAR(wsrep_debug), CMD_LINE(OPT_ARG), DEFAULT(FALSE));
+
+static Sys_var_mybool Sys_wsrep_convert_LOCK_to_trx(
+ "wsrep_convert_LOCK_to_trx", "To convert locking sessions "
+ "into transactions",
+ GLOBAL_VAR(wsrep_convert_LOCK_to_trx),
+ CMD_LINE(OPT_ARG), DEFAULT(FALSE));
+
+static Sys_var_ulong Sys_wsrep_retry_autocommit(
+ "wsrep_retry_autocommit", "Max number of times to retry "
+ "a failed autocommit statement",
+ SESSION_VAR(wsrep_retry_autocommit), CMD_LINE(REQUIRED_ARG),
+ VALID_RANGE(0, 10000), DEFAULT(1), BLOCK_SIZE(1));
+
+static Sys_var_mybool Sys_wsrep_auto_increment_control(
+ "wsrep_auto_increment_control", "To automatically control the "
+ "assignment of autoincrement variables",
+ GLOBAL_VAR(wsrep_auto_increment_control),
+ CMD_LINE(OPT_ARG), DEFAULT(TRUE));
+
+static Sys_var_mybool Sys_wsrep_drupal_282555_workaround(
+ "wsrep_drupal_282555_workaround", "To use a workaround for"
+ "bad autoincrement value",
+ GLOBAL_VAR(wsrep_drupal_282555_workaround),
+ CMD_LINE(OPT_ARG), DEFAULT(FALSE));
+
+static Sys_var_charptr sys_wsrep_sst_method(
+ "wsrep_sst_method", "State snapshot transfer method",
+ GLOBAL_VAR(wsrep_sst_method),CMD_LINE(REQUIRED_ARG),
+ IN_FS_CHARSET, DEFAULT(WSREP_SST_DEFAULT), NO_MUTEX_GUARD, NOT_IN_BINLOG,
+ ON_CHECK(wsrep_sst_method_check),
+ ON_UPDATE(wsrep_sst_method_update));
+
+static Sys_var_charptr Sys_wsrep_sst_receive_address(
+ "wsrep_sst_receive_address", "Address where node is waiting for "
+ "SST contact",
+ GLOBAL_VAR(wsrep_sst_receive_address),CMD_LINE(REQUIRED_ARG),
+ IN_FS_CHARSET, DEFAULT(WSREP_SST_ADDRESS_AUTO), NO_MUTEX_GUARD,
+ NOT_IN_BINLOG,
+ ON_CHECK(wsrep_sst_receive_address_check),
+ ON_UPDATE(wsrep_sst_receive_address_update));
+
+static Sys_var_charptr Sys_wsrep_sst_auth(
+ "wsrep_sst_auth", "Authentication for SST connection",
+ PREALLOCATED GLOBAL_VAR(wsrep_sst_auth), CMD_LINE(REQUIRED_ARG, OPT_WSREP_SST_AUTH),
+ IN_FS_CHARSET, DEFAULT(NULL), NO_MUTEX_GUARD,
+ NOT_IN_BINLOG,
+ ON_CHECK(wsrep_sst_auth_check),
+ ON_UPDATE(wsrep_sst_auth_update));
+
+static Sys_var_charptr Sys_wsrep_sst_donor(
+ "wsrep_sst_donor", "preferred donor node for the SST",
+ GLOBAL_VAR(wsrep_sst_donor),CMD_LINE(REQUIRED_ARG),
+ IN_FS_CHARSET, DEFAULT(""), NO_MUTEX_GUARD, NOT_IN_BINLOG,
+ ON_CHECK(wsrep_sst_donor_check),
+ ON_UPDATE(wsrep_sst_donor_update));
+
+static Sys_var_mybool Sys_wsrep_sst_donor_rejects_queries(
+ "wsrep_sst_donor_rejects_queries", "Reject client queries "
+ "when donating state snapshot transfer",
+ GLOBAL_VAR(wsrep_sst_donor_rejects_queries),
+ CMD_LINE(OPT_ARG), DEFAULT(FALSE));
+
+static Sys_var_mybool Sys_wsrep_on (
+ "wsrep_on", "To enable wsrep replication ",
+ SESSION_VAR(wsrep_on),
+ CMD_LINE(OPT_ARG), DEFAULT(TRUE),
+ NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0),
+ ON_UPDATE(wsrep_on_update));
+
+static Sys_var_charptr Sys_wsrep_start_position (
+ "wsrep_start_position", "global transaction position to start from ",
+ PREALLOCATED GLOBAL_VAR(wsrep_start_position),
+ CMD_LINE(REQUIRED_ARG, OPT_WSREP_START_POSITION),
+ IN_FS_CHARSET, DEFAULT(WSREP_START_POSITION_ZERO),
+ NO_MUTEX_GUARD, NOT_IN_BINLOG,
+ ON_CHECK(wsrep_start_position_check),
+ ON_UPDATE(wsrep_start_position_update));
+
+static Sys_var_ulong Sys_wsrep_max_ws_size (
+ "wsrep_max_ws_size", "Max write set size (bytes)",
+ GLOBAL_VAR(wsrep_max_ws_size), CMD_LINE(REQUIRED_ARG),
+ VALID_RANGE(1024, WSREP_MAX_WS_SIZE), DEFAULT(WSREP_MAX_WS_SIZE),
+ BLOCK_SIZE(1), NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0),
+ ON_UPDATE(wsrep_max_ws_size_update));
+
+static Sys_var_ulong Sys_wsrep_max_ws_rows (
+ "wsrep_max_ws_rows", "Max number of rows in write set",
+ GLOBAL_VAR(wsrep_max_ws_rows), CMD_LINE(REQUIRED_ARG),
+ VALID_RANGE(0, 1048576), DEFAULT(0), BLOCK_SIZE(1));
+
+static Sys_var_charptr Sys_wsrep_notify_cmd(
+ "wsrep_notify_cmd", "",
+ GLOBAL_VAR(wsrep_notify_cmd),CMD_LINE(REQUIRED_ARG),
+ IN_FS_CHARSET, DEFAULT(""), NO_MUTEX_GUARD, NOT_IN_BINLOG);
+
+static Sys_var_mybool Sys_wsrep_certify_nonPK(
+ "wsrep_certify_nonPK", "Certify tables with no primary key",
+ GLOBAL_VAR(wsrep_certify_nonPK),
+ CMD_LINE(OPT_ARG), DEFAULT(TRUE));
+
+static Sys_var_mybool Sys_wsrep_causal_reads(
+ "wsrep_causal_reads", "(DEPRECATED) Setting this variable is equivalent "
+ "to setting wsrep_sync_wait READ flag",
+ SESSION_VAR(wsrep_causal_reads), CMD_LINE(OPT_ARG), DEFAULT(FALSE),
+ NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0),
+ ON_UPDATE(wsrep_causal_reads_update));
+
+static Sys_var_uint Sys_wsrep_sync_wait(
+ "wsrep_sync_wait", "Ensure \"synchronous\" read view before executing "
+ "an operation of the type specified by bitmask: 1 - READ(includes "
+ "SELECT, SHOW and BEGIN/START TRANSACTION); 2 - UPDATE and DELETE; 4 - "
+ "INSERT and REPLACE",
+ SESSION_VAR(wsrep_sync_wait), CMD_LINE(OPT_ARG),
+ VALID_RANGE(WSREP_SYNC_WAIT_NONE, WSREP_SYNC_WAIT_MAX),
+ DEFAULT(WSREP_SYNC_WAIT_NONE), BLOCK_SIZE(1),
+ NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0),
+ ON_UPDATE(wsrep_sync_wait_update));
+
+static const char *wsrep_OSU_method_names[]= { "TOI", "RSU", NullS };
+static Sys_var_enum Sys_wsrep_OSU_method(
+ "wsrep_OSU_method", "Method for Online Schema Upgrade",
+ SESSION_VAR(wsrep_OSU_method), CMD_LINE(OPT_ARG),
+ wsrep_OSU_method_names, DEFAULT(WSREP_OSU_TOI),
+ NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0),
+ ON_UPDATE(0));
+
+static PolyLock_mutex PLock_wsrep_desync(&LOCK_wsrep_desync);
+static Sys_var_mybool Sys_wsrep_desync (
+ "wsrep_desync", "To desynchronize the node from the cluster",
+ GLOBAL_VAR(wsrep_desync),
+ CMD_LINE(OPT_ARG), DEFAULT(FALSE),
+ &PLock_wsrep_desync, NOT_IN_BINLOG,
+ ON_CHECK(wsrep_desync_check),
+ ON_UPDATE(wsrep_desync_update));
+
+static Sys_var_enum Sys_wsrep_forced_binlog_format(
+ "wsrep_forced_binlog_format", "binlog format to take effect over user's choice",
+ GLOBAL_VAR(wsrep_forced_binlog_format),
+ CMD_LINE(REQUIRED_ARG, OPT_BINLOG_FORMAT),
+ wsrep_binlog_format_names, DEFAULT(BINLOG_FORMAT_UNSPEC),
+ NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0),
+ ON_UPDATE(0));
+
+static Sys_var_mybool Sys_wsrep_recover_datadir(
+ "wsrep_recover", "Recover database state after crash and exit",
+ READ_ONLY GLOBAL_VAR(wsrep_recovery),
+ CMD_LINE(OPT_ARG, OPT_WSREP_RECOVER), DEFAULT(FALSE));
+
+static Sys_var_mybool Sys_wsrep_replicate_myisam(
+ "wsrep_replicate_myisam", "To enable myisam replication",
+ GLOBAL_VAR(wsrep_replicate_myisam), CMD_LINE(OPT_ARG), DEFAULT(FALSE));
+
+static Sys_var_mybool Sys_wsrep_log_conflicts(
+ "wsrep_log_conflicts", "To log multi-master conflicts",
+ GLOBAL_VAR(wsrep_log_conflicts), CMD_LINE(OPT_ARG), DEFAULT(FALSE));
+
+static Sys_var_ulong Sys_wsrep_mysql_replication_bundle(
+ "wsrep_mysql_replication_bundle", "mysql replication group commit ",
+ GLOBAL_VAR(wsrep_mysql_replication_bundle), CMD_LINE(REQUIRED_ARG),
+ VALID_RANGE(0, 1000), DEFAULT(0), BLOCK_SIZE(1));
+
+static Sys_var_mybool Sys_wsrep_load_data_splitting(
+ "wsrep_load_data_splitting", "To commit LOAD DATA "
+ "transaction after every 10K rows inserted",
+ GLOBAL_VAR(wsrep_load_data_splitting),
+ CMD_LINE(OPT_ARG), DEFAULT(TRUE));
+
+static Sys_var_mybool Sys_wsrep_slave_FK_checks(
+ "wsrep_slave_FK_checks", "Should slave thread do "
+ "foreign key constraint checks",
+ GLOBAL_VAR(wsrep_slave_FK_checks),
+ CMD_LINE(OPT_ARG), DEFAULT(TRUE));
+
+static Sys_var_mybool Sys_wsrep_slave_UK_checks(
+ "wsrep_slave_UK_checks", "Should slave thread do "
+ "secondary index uniqueness checks",
+ GLOBAL_VAR(wsrep_slave_UK_checks),
+ CMD_LINE(OPT_ARG), DEFAULT(FALSE));
+
+static Sys_var_mybool Sys_wsrep_restart_slave(
+ "wsrep_restart_slave", "Should MySQL slave be restarted automatically, when node joins back to cluster",
+ GLOBAL_VAR(wsrep_restart_slave), CMD_LINE(OPT_ARG), DEFAULT(FALSE));
+
+static Sys_var_mybool Sys_wsrep_dirty_reads(
+ "wsrep_dirty_reads",
+ "Allow reads even when the node is not in the primary component.",
+ SESSION_VAR(wsrep_dirty_reads), CMD_LINE(OPT_ARG),
+ DEFAULT(FALSE), NO_MUTEX_GUARD, NOT_IN_BINLOG);
+
+#endif /* WITH_WSREP */
+
static bool fix_host_cache_size(sys_var *, THD *, enum_var_type)
{
hostname_cache_resize((uint) host_cache_size);
@@ -4830,3 +5127,13 @@ static Sys_var_mybool Sys_pseudo_slave_mode(
SESSION_ONLY(pseudo_slave_mode), NO_CMD_LINE, DEFAULT(FALSE),
NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(check_pseudo_slave_mode));
+#ifdef HAVE_MMAP
+static Sys_var_ulong Sys_log_tc_size(
+ "log_tc_size",
+ "Size of transaction coordinator log.",
+ READ_ONLY GLOBAL_VAR(opt_tc_log_size),
+ CMD_LINE(REQUIRED_ARG),
+ VALID_RANGE(my_getpagesize() * 3, ULONG_MAX),
+ DEFAULT(my_getpagesize() * 6),
+ BLOCK_SIZE(my_getpagesize()));
+#endif
diff --git a/sql/table.cc b/sql/table.cc
index 975d9d53882..7b43944e008 100644
--- a/sql/table.cc
+++ b/sql/table.cc
@@ -39,6 +39,9 @@
#include "sql_statistics.h"
#include "discover.h"
#include "mdl.h" // MDL_wait_for_graph_visitor
+#ifdef WITH_WSREP
+#include "ha_partition.h"
+#endif /* WITH_WSREP */
/* INFORMATION_SCHEMA name */
LEX_STRING INFORMATION_SCHEMA_NAME= {C_STRING_WITH_LEN("information_schema")};
diff --git a/sql/transaction.cc b/sql/transaction.cc
index a70c075e142..f478b4a18f7 100644
--- a/sql/transaction.cc
+++ b/sql/transaction.cc
@@ -98,6 +98,9 @@ static bool xa_trans_force_rollback(THD *thd)
by ha_rollback()/THD::transaction::cleanup().
*/
thd->transaction.xid_state.rm_error= 0;
+#ifdef WITH_WSREP
+ wsrep_register_hton(thd, TRUE);
+#endif /* WITH_WSREP */
if (ha_rollback_trans(thd, true))
{
my_error(ER_XAER_RMERR, MYF(0));
@@ -136,10 +139,16 @@ bool trans_begin(THD *thd, uint flags)
(thd->variables.option_bits & OPTION_TABLE_LOCK))
{
thd->variables.option_bits&= ~OPTION_TABLE_LOCK;
+#ifdef WITH_WSREP
+ wsrep_register_hton(thd, TRUE);
+#endif /* WITH_WSREP */
thd->server_status&=
~(SERVER_STATUS_IN_TRANS | SERVER_STATUS_IN_TRANS_READONLY);
DBUG_PRINT("info", ("clearing SERVER_STATUS_IN_TRANS"));
res= MY_TEST(ha_commit_trans(thd, TRUE));
+#ifdef WITH_WSREP
+ wsrep_post_commit(thd, TRUE);
+#endif /* WITH_WSREP */
}
thd->variables.option_bits&= ~(OPTION_BEGIN | OPTION_KEEP_LOG);
@@ -184,6 +193,12 @@ bool trans_begin(THD *thd, uint flags)
thd->tx_read_only= false;
}
+#ifdef WITH_WSREP
+ thd->wsrep_PA_safe= true;
+ if (WSREP_CLIENT(thd) && wsrep_sync_wait(thd))
+ DBUG_RETURN(TRUE);
+#endif /* WITH_WSREP */
+
thd->variables.option_bits|= OPTION_BEGIN;
thd->server_status|= SERVER_STATUS_IN_TRANS;
if (thd->tx_read_only)
@@ -215,10 +230,16 @@ bool trans_commit(THD *thd)
if (trans_check(thd))
DBUG_RETURN(TRUE);
+#ifdef WITH_WSREP
+ wsrep_register_hton(thd, TRUE);
+#endif /* WITH_WSREP */
thd->server_status&=
~(SERVER_STATUS_IN_TRANS | SERVER_STATUS_IN_TRANS_READONLY);
DBUG_PRINT("info", ("clearing SERVER_STATUS_IN_TRANS"));
res= ha_commit_trans(thd, TRUE);
+#ifdef WITH_WSREP
+ wsrep_post_commit(thd, TRUE);
+#endif /* WITH_WSREP */
/*
if res is non-zero, then ha_commit_trans has rolled back the
transaction, so the hooks for rollback will be called.
@@ -264,10 +285,16 @@ bool trans_commit_implicit(THD *thd)
/* Safety if one did "drop table" on locked tables */
if (!thd->locked_tables_mode)
thd->variables.option_bits&= ~OPTION_TABLE_LOCK;
+#ifdef WITH_WSREP
+ wsrep_register_hton(thd, TRUE);
+#endif /* WITH_WSREP */
thd->server_status&=
~(SERVER_STATUS_IN_TRANS | SERVER_STATUS_IN_TRANS_READONLY);
DBUG_PRINT("info", ("clearing SERVER_STATUS_IN_TRANS"));
res= MY_TEST(ha_commit_trans(thd, TRUE));
+#ifdef WITH_WSREP
+ wsrep_post_commit(thd, TRUE);
+#endif /* WITH_WSREP */
}
thd->variables.option_bits&= ~(OPTION_BEGIN | OPTION_KEEP_LOG);
@@ -300,9 +327,15 @@ bool trans_rollback(THD *thd)
int res;
DBUG_ENTER("trans_rollback");
+#ifdef WITH_WSREP
+ thd->wsrep_PA_safe= true;
+#endif /* WITH_WSREP */
if (trans_check(thd))
DBUG_RETURN(TRUE);
+#ifdef WITH_WSREP
+ wsrep_register_hton(thd, TRUE);
+#endif /* WITH_WSREP */
thd->server_status&=
~(SERVER_STATUS_IN_TRANS | SERVER_STATUS_IN_TRANS_READONLY);
DBUG_PRINT("info", ("clearing SERVER_STATUS_IN_TRANS"));
@@ -393,11 +426,17 @@ bool trans_commit_stmt(THD *thd)
if (thd->transaction.stmt.ha_list)
{
+#ifdef WITH_WSREP
+ wsrep_register_hton(thd, FALSE);
+#endif /* WITH_WSREP */
res= ha_commit_trans(thd, FALSE);
if (! thd->in_active_multi_stmt_transaction())
{
thd->tx_isolation= (enum_tx_isolation) thd->variables.tx_isolation;
thd->tx_read_only= thd->variables.tx_read_only;
+#ifdef WITH_WSREP
+ wsrep_post_commit(thd, FALSE);
+#endif /* WITH_WSREP */
}
}
@@ -438,6 +477,9 @@ bool trans_rollback_stmt(THD *thd)
if (thd->transaction.stmt.ha_list)
{
+#ifdef WITH_WSREP
+ wsrep_register_hton(thd, FALSE);
+#endif /* WITH_WSREP */
ha_rollback_trans(thd, FALSE);
if (! thd->in_active_multi_stmt_transaction())
{
@@ -815,9 +857,15 @@ bool trans_xa_commit(THD *thd)
}
else if (xa_state == XA_IDLE && thd->lex->xa_opt == XA_ONE_PHASE)
{
+#ifdef WITH_WSREP
+ wsrep_register_hton(thd, TRUE);
+#endif /* WITH_WSREP */
int r= ha_commit_trans(thd, TRUE);
if ((res= MY_TEST(r)))
my_error(r == 1 ? ER_XA_RBROLLBACK : ER_XAER_RMERR, MYF(0));
+#ifdef WITH_WSREP
+ wsrep_post_commit(thd, TRUE);
+#endif /* WITH_WSREP */
}
else if (xa_state == XA_PREPARED && thd->lex->xa_opt == XA_NONE)
{
@@ -836,6 +884,9 @@ bool trans_xa_commit(THD *thd)
if (thd->mdl_context.acquire_lock(&mdl_request,
thd->variables.lock_wait_timeout))
{
+#ifdef WITH_WSREP
+ wsrep_register_hton(thd, TRUE);
+#endif /* WITH_WSREP */
ha_rollback_trans(thd, TRUE);
my_error(ER_XAER_RMERR, MYF(0));
}
diff --git a/sql/tztime.cc b/sql/tztime.cc
index 75b732f4436..d4321604a7a 100644
--- a/sql/tztime.cc
+++ b/sql/tztime.cc
@@ -2705,6 +2705,12 @@ main(int argc, char **argv)
free_defaults(default_argv);
return 1;
}
+
+#ifdef WITH_WSREP
+ // Replicate MyISAM DDL for this session, cf. lp:1161432
+ printf("SET GLOBAL wsrep_replicate_myisam= ON;\n");
+#endif /* WITH_WSREP */
+
if (argc == 1 && !opt_leap)
{
/* Argument is timezonedir */
@@ -2752,6 +2758,11 @@ main(int argc, char **argv)
free_root(&tz_storage, MYF(0));
}
+#ifdef WITH_WSREP
+ // Reset wsrep_replicate_myisam. lp:1161432
+ printf("SET GLOBAL wsrep_replicate_myisam= OFF;\n");
+#endif /* WITH_WSREP */
+
free_defaults(default_argv);
my_end(0);
return 0;
diff --git a/sql/wsrep_applier.cc b/sql/wsrep_applier.cc
new file mode 100644
index 00000000000..73a43185162
--- /dev/null
+++ b/sql/wsrep_applier.cc
@@ -0,0 +1,400 @@
+/* Copyright (C) 2013-2015 Codership Oy <info@codership.com>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */
+
+#include "wsrep_applier.h"
+#include "wsrep_priv.h"
+#include "wsrep_binlog.h" // wsrep_dump_rbr_buf()
+#include "wsrep_xid.h"
+
+#include "log_event.h" // class THD, EVENT_LEN_OFFSET, etc.
+#include "debug_sync.h"
+
+/*
+ read the first event from (*buf). The size of the (*buf) is (*buf_len).
+ At the end (*buf) is shitfed to point to the following event or NULL and
+ (*buf_len) will be changed to account just being read bytes of the 1st event.
+*/
+
+static Log_event* wsrep_read_log_event(
+ char **arg_buf, size_t *arg_buf_len,
+ const Format_description_log_event *description_event)
+{
+ DBUG_ENTER("wsrep_read_log_event");
+ char *head= (*arg_buf);
+
+ uint data_len = uint4korr(head + EVENT_LEN_OFFSET);
+ char *buf= (*arg_buf);
+ const char *error= 0;
+ Log_event *res= 0;
+
+ res= Log_event::read_log_event(buf, data_len, &error, description_event,
+ true);
+
+ if (!res)
+ {
+ DBUG_ASSERT(error != 0);
+ sql_print_error("Error in Log_event::read_log_event(): "
+ "'%s', data_len: %d, event_type: %d",
+ error,data_len,head[EVENT_TYPE_OFFSET]);
+ }
+ (*arg_buf)+= data_len;
+ (*arg_buf_len)-= data_len;
+ DBUG_RETURN(res);
+}
+
+#include "transaction.h" // trans_commit(), trans_rollback()
+#include "rpl_rli.h" // class Relay_log_info;
+#include "sql_base.h" // close_temporary_table()
+
+static inline void
+wsrep_set_apply_format(THD* thd, Format_description_log_event* ev)
+{
+ if (thd->wsrep_apply_format)
+ {
+ delete (Format_description_log_event*)thd->wsrep_apply_format;
+ }
+ thd->wsrep_apply_format= ev;
+}
+
+static inline Format_description_log_event*
+wsrep_get_apply_format(THD* thd)
+{
+ if (thd->wsrep_apply_format)
+ {
+ return (Format_description_log_event*) thd->wsrep_apply_format;
+ }
+ return thd->wsrep_rgi->rli->relay_log.description_event_for_exec;
+}
+
+static wsrep_cb_status_t wsrep_apply_events(THD* thd,
+ const void* events_buf,
+ size_t buf_len)
+{
+ char *buf= (char *)events_buf;
+ int rcode= 0;
+ int event= 1;
+ Log_event_type typ;
+
+ DBUG_ENTER("wsrep_apply_events");
+
+ if (thd->killed == KILL_CONNECTION &&
+ thd->wsrep_conflict_state != REPLAYING)
+ {
+ WSREP_INFO("applier has been aborted, skipping apply_rbr: %lld",
+ (long long) wsrep_thd_trx_seqno(thd));
+ DBUG_RETURN(WSREP_CB_FAILURE);
+ }
+
+ mysql_mutex_lock(&thd->LOCK_wsrep_thd);
+ thd->wsrep_query_state= QUERY_EXEC;
+ if (thd->wsrep_conflict_state!= REPLAYING)
+ thd->wsrep_conflict_state= NO_CONFLICT;
+ mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
+
+ if (!buf_len) WSREP_DEBUG("empty rbr buffer to apply: %lld",
+ (long long) wsrep_thd_trx_seqno(thd));
+
+ while(buf_len)
+ {
+ int exec_res;
+ Log_event* ev= wsrep_read_log_event(&buf, &buf_len,
+ wsrep_get_apply_format(thd));
+
+ if (!ev)
+ {
+ WSREP_ERROR("applier could not read binlog event, seqno: %lld, len: %zu",
+ (long long)wsrep_thd_trx_seqno(thd), buf_len);
+ rcode= 1;
+ goto error;
+ }
+
+ typ= ev->get_type_code();
+
+ switch (typ) {
+ case FORMAT_DESCRIPTION_EVENT:
+ wsrep_set_apply_format(thd, (Format_description_log_event*)ev);
+ continue;
+#ifdef GTID_SUPPORT
+ case GTID_LOG_EVENT:
+ {
+ Gtid_log_event* gev= (Gtid_log_event*)ev;
+ if (gev->get_gno() == 0)
+ {
+ /* Skip GTID log event to make binlog to generate LTID on commit */
+ delete ev;
+ continue;
+ }
+ }
+#endif /* GTID_SUPPORT */
+ default:
+ break;
+ }
+
+ /* Use the original server id for logging. */
+ thd->set_server_id(ev->server_id);
+ thd->set_time(); // time the query
+ wsrep_xid_init(&thd->transaction.xid_state.xid,
+ thd->wsrep_trx_meta.gtid.uuid,
+ thd->wsrep_trx_meta.gtid.seqno);
+ thd->lex->current_select= 0;
+ if (!ev->when)
+ {
+ my_hrtime_t hrtime= my_hrtime();
+ ev->when= hrtime_to_my_time(hrtime);
+ ev->when_sec_part= hrtime_sec_part(hrtime);
+ }
+
+ thd->variables.option_bits=
+ (thd->variables.option_bits & ~OPTION_SKIP_REPLICATION) |
+ (ev->flags & LOG_EVENT_SKIP_REPLICATION_F ? OPTION_SKIP_REPLICATION : 0);
+
+ ev->thd = thd;
+ exec_res = ev->apply_event(thd->wsrep_rgi);
+ DBUG_PRINT("info", ("exec_event result: %d", exec_res));
+
+ if (exec_res)
+ {
+ WSREP_WARN("RBR event %d %s apply warning: %d, %lld",
+ event, ev->get_type_str(), exec_res,
+ (long long) wsrep_thd_trx_seqno(thd));
+ rcode= exec_res;
+ /* stop processing for the first error */
+ delete ev;
+ goto error;
+ }
+ event++;
+
+ if (thd->wsrep_conflict_state!= NO_CONFLICT &&
+ thd->wsrep_conflict_state!= REPLAYING)
+ WSREP_WARN("conflict state after RBR event applying: %d, %lld",
+ thd->wsrep_query_state, (long long)wsrep_thd_trx_seqno(thd));
+
+ if (thd->wsrep_conflict_state == MUST_ABORT) {
+ WSREP_WARN("RBR event apply failed, rolling back: %lld",
+ (long long) wsrep_thd_trx_seqno(thd));
+ trans_rollback(thd);
+ thd->locked_tables_list.unlock_locked_tables(thd);
+ /* Release transactional metadata locks. */
+ thd->mdl_context.release_transactional_locks();
+ thd->wsrep_conflict_state= NO_CONFLICT;
+ DBUG_RETURN(WSREP_CB_FAILURE);
+ }
+
+ delete_or_keep_event_post_apply(thd->wsrep_rgi, typ, ev);
+ }
+
+ error:
+ mysql_mutex_lock(&thd->LOCK_wsrep_thd);
+ thd->wsrep_query_state= QUERY_IDLE;
+ mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
+
+ assert(thd->wsrep_exec_mode== REPL_RECV);
+
+ if (thd->killed == KILL_CONNECTION)
+ WSREP_INFO("applier aborted: %lld", (long long)wsrep_thd_trx_seqno(thd));
+
+ if (rcode) DBUG_RETURN(WSREP_CB_FAILURE);
+ DBUG_RETURN(WSREP_CB_SUCCESS);
+}
+
+wsrep_cb_status_t wsrep_apply_cb(void* const ctx,
+ const void* const buf,
+ size_t const buf_len,
+ uint32_t const flags,
+ const wsrep_trx_meta_t* meta)
+{
+ THD* const thd((THD*)ctx);
+
+ // Allow tests to block the applier thread using the DBUG facilities.
+ DBUG_EXECUTE_IF("sync.wsrep_apply_cb",
+ {
+ const char act[]=
+ "now "
+ "wait_for signal.wsrep_apply_cb";
+ DBUG_ASSERT(!debug_sync_set_action(thd,
+ STRING_WITH_LEN(act)));
+ };);
+
+ thd->wsrep_trx_meta = *meta;
+
+#ifdef WSREP_PROC_INFO
+ snprintf(thd->wsrep_info, sizeof(thd->wsrep_info) - 1,
+ "applying write set %lld: %p, %zu",
+ (long long)wsrep_thd_trx_seqno(thd), buf, buf_len);
+ thd_proc_info(thd, thd->wsrep_info);
+#else
+ thd_proc_info(thd, "applying write set");
+#endif /* WSREP_PROC_INFO */
+
+ /* tune FK and UK checking policy */
+ if (wsrep_slave_UK_checks == FALSE)
+ thd->variables.option_bits|= OPTION_RELAXED_UNIQUE_CHECKS;
+ else
+ thd->variables.option_bits&= ~OPTION_RELAXED_UNIQUE_CHECKS;
+
+ if (wsrep_slave_FK_checks == FALSE)
+ thd->variables.option_bits|= OPTION_NO_FOREIGN_KEY_CHECKS;
+ else
+ thd->variables.option_bits&= ~OPTION_NO_FOREIGN_KEY_CHECKS;
+
+ if (flags & WSREP_FLAG_ISOLATION)
+ {
+ thd->wsrep_apply_toi= true;
+ /*
+ Don't run in transaction mode with TOI actions.
+ */
+ thd->variables.option_bits&= ~OPTION_BEGIN;
+ thd->server_status&= ~SERVER_STATUS_IN_TRANS;
+ }
+ wsrep_cb_status_t rcode(wsrep_apply_events(thd, buf, buf_len));
+
+#ifdef WSREP_PROC_INFO
+ snprintf(thd->wsrep_info, sizeof(thd->wsrep_info) - 1,
+ "applied write set %lld", (long long)wsrep_thd_trx_seqno(thd));
+ thd_proc_info(thd, thd->wsrep_info);
+#else
+ thd_proc_info(thd, "applied write set");
+#endif /* WSREP_PROC_INFO */
+
+ if (WSREP_CB_SUCCESS != rcode)
+ {
+ wsrep_dump_rbr_buf(thd, buf, buf_len);
+ }
+
+ TABLE *tmp;
+ while ((tmp = thd->temporary_tables))
+ {
+ WSREP_DEBUG("Applier %lu, has temporary tables: %s.%s",
+ thd->thread_id,
+ (tmp->s) ? tmp->s->db.str : "void",
+ (tmp->s) ? tmp->s->table_name.str : "void");
+ close_temporary_table(thd, tmp, 1, 1);
+ }
+
+ return rcode;
+}
+
+static wsrep_cb_status_t wsrep_commit(THD* const thd)
+{
+#ifdef WSREP_PROC_INFO
+ snprintf(thd->wsrep_info, sizeof(thd->wsrep_info) - 1,
+ "committing %lld", (long long)wsrep_thd_trx_seqno(thd));
+ thd_proc_info(thd, thd->wsrep_info);
+#else
+ thd_proc_info(thd, "committing");
+#endif /* WSREP_PROC_INFO */
+
+ wsrep_cb_status_t const rcode(trans_commit(thd) ?
+ WSREP_CB_FAILURE : WSREP_CB_SUCCESS);
+
+ if (WSREP_CB_SUCCESS == rcode)
+ {
+ thd->wsrep_rgi->cleanup_context(thd, false);
+#ifdef GTID_SUPPORT
+ thd->variables.gtid_next.set_automatic();
+#endif /* GTID_SUPPORT */
+ if (thd->wsrep_apply_toi)
+ {
+ wsrep_set_SE_checkpoint(thd->wsrep_trx_meta.gtid.uuid,
+ thd->wsrep_trx_meta.gtid.seqno);
+ }
+ }
+
+#ifdef WSREP_PROC_INFO
+ snprintf(thd->wsrep_info, sizeof(thd->wsrep_info) - 1,
+ "committed %lld", (long long) wsrep_thd_trx_seqno(thd));
+ thd_proc_info(thd, thd->wsrep_info);
+#else
+ thd_proc_info(thd, "committed");
+#endif /* WSREP_PROC_INFO */
+
+ return rcode;
+}
+
+static wsrep_cb_status_t wsrep_rollback(THD* const thd)
+{
+#ifdef WSREP_PROC_INFO
+ snprintf(thd->wsrep_info, sizeof(thd->wsrep_info) - 1,
+ "rolling back %lld", (long long)wsrep_thd_trx_seqno(thd));
+ thd_proc_info(thd, thd->wsrep_info);
+#else
+ thd_proc_info(thd, "rolling back");
+#endif /* WSREP_PROC_INFO */
+
+ wsrep_cb_status_t const rcode(trans_rollback(thd) ?
+ WSREP_CB_FAILURE : WSREP_CB_SUCCESS);
+
+#ifdef WSREP_PROC_INFO
+ snprintf(thd->wsrep_info, sizeof(thd->wsrep_info) - 1,
+ "rolled back %lld", (long long)wsrep_thd_trx_seqno(thd));
+ thd_proc_info(thd, thd->wsrep_info);
+#else
+ thd_proc_info(thd, "rolled back");
+#endif /* WSREP_PROC_INFO */
+
+ return rcode;
+}
+
+wsrep_cb_status_t wsrep_commit_cb(void* const ctx,
+ uint32_t const flags,
+ const wsrep_trx_meta_t* meta,
+ wsrep_bool_t* const exit,
+ bool const commit)
+{
+ THD* const thd((THD*)ctx);
+
+ assert(meta->gtid.seqno == wsrep_thd_trx_seqno(thd));
+
+ wsrep_cb_status_t rcode;
+
+ if (commit)
+ rcode = wsrep_commit(thd);
+ else
+ rcode = wsrep_rollback(thd);
+
+ wsrep_set_apply_format(thd, NULL);
+ thd->mdl_context.release_transactional_locks();
+ free_root(thd->mem_root,MYF(MY_KEEP_PREALLOC));
+ thd->tx_isolation= (enum_tx_isolation) thd->variables.tx_isolation;
+
+ if (wsrep_slave_count_change < 0 && commit && WSREP_CB_SUCCESS == rcode)
+ {
+ mysql_mutex_lock(&LOCK_wsrep_slave_threads);
+ if (wsrep_slave_count_change < 0)
+ {
+ wsrep_slave_count_change++;
+ *exit = true;
+ }
+ mysql_mutex_unlock(&LOCK_wsrep_slave_threads);
+ }
+
+ if (*exit == false && thd->wsrep_applier)
+ {
+ /* From trans_begin() */
+ thd->variables.option_bits|= OPTION_BEGIN;
+ thd->server_status|= SERVER_STATUS_IN_TRANS;
+ thd->wsrep_apply_toi= false;
+ }
+
+ return rcode;
+}
+
+
+wsrep_cb_status_t wsrep_unordered_cb(void* const ctx,
+ const void* const data,
+ size_t const size)
+{
+ return WSREP_CB_SUCCESS;
+}
diff --git a/sql/wsrep_applier.h b/sql/wsrep_applier.h
new file mode 100644
index 00000000000..424db466e53
--- /dev/null
+++ b/sql/wsrep_applier.h
@@ -0,0 +1,39 @@
+/* Copyright 2013 Codership Oy <http://www.codership.com>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#ifndef WSREP_APPLIER_H
+#define WSREP_APPLIER_H
+
+#include <my_config.h>
+#include "../wsrep/wsrep_api.h"
+
+/* wsrep callback prototypes */
+
+wsrep_cb_status_t wsrep_apply_cb(void *ctx,
+ const void* buf, size_t buf_len,
+ uint32_t flags,
+ const wsrep_trx_meta_t* meta);
+
+wsrep_cb_status_t wsrep_commit_cb(void *ctx,
+ uint32_t flags,
+ const wsrep_trx_meta_t* meta,
+ wsrep_bool_t* exit,
+ bool commit);
+
+wsrep_cb_status_t wsrep_unordered_cb(void* ctx,
+ const void* data,
+ size_t size);
+
+#endif /* WSREP_APPLIER_H */
diff --git a/sql/wsrep_binlog.cc b/sql/wsrep_binlog.cc
new file mode 100644
index 00000000000..5c5ebb9f780
--- /dev/null
+++ b/sql/wsrep_binlog.cc
@@ -0,0 +1,412 @@
+/* Copyright (C) 2013 Codership Oy <info@codership.com>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */
+
+#include "wsrep_binlog.h"
+#include "wsrep_priv.h"
+
+/*
+ Write the contents of a cache to a memory buffer.
+
+ This function quite the same as MYSQL_BIN_LOG::write_cache(),
+ with the exception that here we write in buffer instead of log file.
+ */
+int wsrep_write_cache_buf(IO_CACHE *cache, uchar **buf, size_t *buf_len)
+{
+ *buf= NULL;
+ *buf_len= 0;
+
+ my_off_t const saved_pos(my_b_tell(cache));
+
+ if (reinit_io_cache(cache, READ_CACHE, 0, 0, 0))
+ {
+ WSREP_ERROR("failed to initialize io-cache");
+ return ER_ERROR_ON_WRITE;
+ }
+
+ uint length = my_b_bytes_in_cache(cache);
+ if (unlikely(0 == length)) length = my_b_fill(cache);
+
+ size_t total_length = 0;
+
+ if (likely(length > 0)) do
+ {
+ total_length += length;
+ /*
+ Bail out if buffer grows too large.
+ A temporary fix to avoid allocating indefinitely large buffer,
+ not a real limit on a writeset size which includes other things
+ like header and keys.
+ */
+ if (total_length > wsrep_max_ws_size)
+ {
+ WSREP_WARN("transaction size limit (%lu) exceeded: %zu",
+ wsrep_max_ws_size, total_length);
+ goto error;
+ }
+ uchar* tmp = (uchar *)my_realloc(*buf, total_length,
+ MYF(MY_ALLOW_ZERO_PTR));
+ if (!tmp)
+ {
+ WSREP_ERROR("could not (re)allocate buffer: %zu + %u",
+ *buf_len, length);
+ goto error;
+ }
+ *buf = tmp;
+
+ memcpy(*buf + *buf_len, cache->read_pos, length);
+ *buf_len = total_length;
+ cache->read_pos = cache->read_end;
+ } while ((cache->file >= 0) && (length = my_b_fill(cache)));
+
+ if (reinit_io_cache(cache, WRITE_CACHE, saved_pos, 0, 0))
+ {
+ WSREP_WARN("failed to initialize io-cache");
+ goto cleanup;
+ }
+
+ return 0;
+
+error:
+ if (reinit_io_cache(cache, WRITE_CACHE, saved_pos, 0, 0))
+ {
+ WSREP_WARN("failed to initialize io-cache");
+ }
+cleanup:
+ my_free(*buf);
+ *buf= NULL;
+ *buf_len= 0;
+ return ER_ERROR_ON_WRITE;
+}
+
+#define STACK_SIZE 4096 /* 4K - for buffer preallocated on the stack:
+ * many transactions would fit in there
+ * so there is no need to reach for the heap */
+
+/* Returns minimum multiple of HEAP_PAGE_SIZE that is >= length */
+static inline size_t
+heap_size(size_t length)
+{
+ return (length + HEAP_PAGE_SIZE - 1)/HEAP_PAGE_SIZE*HEAP_PAGE_SIZE;
+}
+
+/* append data to writeset */
+static inline wsrep_status_t
+wsrep_append_data(wsrep_t* const wsrep,
+ wsrep_ws_handle_t* const ws,
+ const void* const data,
+ size_t const len)
+{
+ struct wsrep_buf const buff = { data, len };
+ wsrep_status_t const rc(wsrep->append_data(wsrep, ws, &buff, 1,
+ WSREP_DATA_ORDERED, true));
+ if (rc != WSREP_OK)
+ {
+ WSREP_WARN("append_data() returned %d", rc);
+ }
+
+ return rc;
+}
+
+/*
+ Write the contents of a cache to wsrep provider.
+
+ This function quite the same as MYSQL_BIN_LOG::write_cache(),
+ with the exception that here we write in buffer instead of log file.
+
+ This version reads all of cache into single buffer and then appends to a
+ writeset at once.
+ */
+static int wsrep_write_cache_once(wsrep_t* const wsrep,
+ THD* const thd,
+ IO_CACHE* const cache,
+ size_t* const len)
+{
+ my_off_t const saved_pos(my_b_tell(cache));
+
+ if (reinit_io_cache(cache, READ_CACHE, 0, 0, 0))
+ {
+ WSREP_ERROR("failed to initialize io-cache");
+ return ER_ERROR_ON_WRITE;
+ }
+
+ int err(WSREP_OK);
+
+ size_t total_length(0);
+ uchar stack_buf[STACK_SIZE]; /* to avoid dynamic allocations for few data*/
+ uchar* heap_buf(NULL);
+ uchar* buf(stack_buf);
+ size_t allocated(sizeof(stack_buf));
+ size_t used(0);
+
+ uint length(my_b_bytes_in_cache(cache));
+ if (unlikely(0 == length)) length = my_b_fill(cache);
+
+ if (likely(length > 0)) do
+ {
+ total_length += length;
+ /*
+ Bail out if buffer grows too large.
+ A temporary fix to avoid allocating indefinitely large buffer,
+ not a real limit on a writeset size which includes other things
+ like header and keys.
+ */
+ if (unlikely(total_length > wsrep_max_ws_size))
+ {
+ WSREP_WARN("transaction size limit (%lu) exceeded: %zu",
+ wsrep_max_ws_size, total_length);
+ err = WSREP_TRX_SIZE_EXCEEDED;
+ goto cleanup;
+ }
+
+ if (total_length > allocated)
+ {
+ size_t const new_size(heap_size(total_length));
+ uchar* tmp = (uchar *)my_realloc(heap_buf, new_size,
+ MYF(MY_ALLOW_ZERO_PTR));
+ if (!tmp)
+ {
+ WSREP_ERROR("could not (re)allocate buffer: %zu + %u",
+ allocated, length);
+ err = WSREP_TRX_SIZE_EXCEEDED;
+ goto cleanup;
+ }
+
+ heap_buf = tmp;
+ buf = heap_buf;
+ allocated = new_size;
+
+ if (used <= STACK_SIZE && used > 0) // there's data in stack_buf
+ {
+ DBUG_ASSERT(buf == stack_buf);
+ memcpy(heap_buf, stack_buf, used);
+ }
+ }
+
+ memcpy(buf + used, cache->read_pos, length);
+ used = total_length;
+ cache->read_pos = cache->read_end;
+ } while ((cache->file >= 0) && (length = my_b_fill(cache)));
+
+ if (used > 0)
+ err = wsrep_append_data(wsrep, &thd->wsrep_ws_handle, buf, used);
+
+ if (WSREP_OK == err) *len = total_length;
+
+cleanup:
+ if (reinit_io_cache(cache, WRITE_CACHE, saved_pos, 0, 0))
+ {
+ WSREP_ERROR("failed to reinitialize io-cache");
+ }
+
+ if (unlikely(WSREP_OK != err)) wsrep_dump_rbr_buf(thd, buf, used);
+
+ my_free(heap_buf);
+ return err;
+}
+
+/*
+ Write the contents of a cache to wsrep provider.
+
+ This function quite the same as MYSQL_BIN_LOG::write_cache(),
+ with the exception that here we write in buffer instead of log file.
+
+ This version uses incremental data appending as it reads it from cache.
+ */
+static int wsrep_write_cache_inc(wsrep_t* const wsrep,
+ THD* const thd,
+ IO_CACHE* const cache,
+ size_t* const len)
+{
+ my_off_t const saved_pos(my_b_tell(cache));
+
+ if (reinit_io_cache(cache, READ_CACHE, 0, 0, 0))
+ {
+ WSREP_ERROR("failed to initialize io-cache");
+ return WSREP_TRX_ERROR;
+ }
+
+ int err(WSREP_OK);
+
+ size_t total_length(0);
+
+ uint length(my_b_bytes_in_cache(cache));
+ if (unlikely(0 == length)) length = my_b_fill(cache);
+
+ if (likely(length > 0)) do
+ {
+ total_length += length;
+ /* bail out if buffer grows too large
+ not a real limit on a writeset size which includes other things
+ like header and keys.
+ */
+ if (unlikely(total_length > wsrep_max_ws_size))
+ {
+ WSREP_WARN("transaction size limit (%lu) exceeded: %zu",
+ wsrep_max_ws_size, total_length);
+ err = WSREP_TRX_SIZE_EXCEEDED;
+ goto cleanup;
+ }
+
+ if(WSREP_OK != (err=wsrep_append_data(wsrep, &thd->wsrep_ws_handle,
+ cache->read_pos, length)))
+ goto cleanup;
+
+ cache->read_pos = cache->read_end;
+ } while ((cache->file >= 0) && (length = my_b_fill(cache)));
+
+ if (WSREP_OK == err) *len = total_length;
+
+cleanup:
+ if (reinit_io_cache(cache, WRITE_CACHE, saved_pos, 0, 0))
+ {
+ WSREP_ERROR("failed to reinitialize io-cache");
+ }
+
+ return err;
+}
+
+/*
+ Write the contents of a cache to wsrep provider.
+
+ This function quite the same as MYSQL_BIN_LOG::write_cache(),
+ with the exception that here we write in buffer instead of log file.
+ */
+int wsrep_write_cache(wsrep_t* const wsrep,
+ THD* const thd,
+ IO_CACHE* const cache,
+ size_t* const len)
+{
+ if (wsrep_incremental_data_collection) {
+ return wsrep_write_cache_inc(wsrep, thd, cache, len);
+ }
+ else {
+ return wsrep_write_cache_once(wsrep, thd, cache, len);
+ }
+}
+
+void wsrep_dump_rbr_buf(THD *thd, const void* rbr_buf, size_t buf_len)
+{
+ char filename[PATH_MAX]= {0};
+ int len= snprintf(filename, PATH_MAX, "%s/GRA_%ld_%lld.log",
+ wsrep_data_home_dir, thd->thread_id,
+ (long long)wsrep_thd_trx_seqno(thd));
+ if (len >= PATH_MAX)
+ {
+ WSREP_ERROR("RBR dump path too long: %d, skipping dump.", len);
+ return;
+ }
+
+ FILE *of= fopen(filename, "wb");
+
+ if (of)
+ {
+ if (fwrite(rbr_buf, buf_len, 1, of) == 0)
+ WSREP_ERROR("Failed to write buffer of length %llu to '%s'",
+ (unsigned long long)buf_len, filename);
+
+ fclose(of);
+ }
+ else
+ {
+ WSREP_ERROR("Failed to open file '%s': %d (%s)",
+ filename, errno, strerror(errno));
+ }
+}
+extern handlerton *binlog_hton;
+
+/*
+ wsrep exploits binlog's caches even if binlogging itself is not
+ activated. In such case connection close needs calling
+ actual binlog's method.
+ Todo: split binlog hton from its caches to use ones by wsrep
+ without referring to binlog's stuff.
+*/
+int wsrep_binlog_close_connection(THD* thd)
+{
+ DBUG_ENTER("wsrep_binlog_close_connection");
+ if (thd_get_ha_data(thd, binlog_hton) != NULL)
+ binlog_hton->close_connection (binlog_hton, thd);
+ DBUG_RETURN(0);
+}
+
+int wsrep_binlog_savepoint_set(THD *thd, void *sv)
+{
+ if (!wsrep_emulate_bin_log) return 0;
+ int rcode = binlog_hton->savepoint_set(binlog_hton, thd, sv);
+ return rcode;
+}
+
+int wsrep_binlog_savepoint_rollback(THD *thd, void *sv)
+{
+ if (!wsrep_emulate_bin_log) return 0;
+ int rcode = binlog_hton->savepoint_rollback(binlog_hton, thd, sv);
+ return rcode;
+}
+
+void wsrep_dump_rbr_direct(THD* thd, IO_CACHE* cache)
+{
+ char filename[PATH_MAX]= {0};
+ int len= snprintf(filename, PATH_MAX, "%s/GRA_%ld_%lld.log",
+ wsrep_data_home_dir, thd->thread_id,
+ (long long)wsrep_thd_trx_seqno(thd));
+ size_t bytes_in_cache = 0;
+ // check path
+ if (len >= PATH_MAX)
+ {
+ WSREP_ERROR("RBR dump path too long: %d, skipping dump.", len);
+ return ;
+ }
+ // init cache
+ my_off_t const saved_pos(my_b_tell(cache));
+ if (reinit_io_cache(cache, READ_CACHE, 0, 0, 0))
+ {
+ WSREP_ERROR("failed to initialize io-cache");
+ return ;
+ }
+ // open file
+ FILE* of = fopen(filename, "wb");
+ if (!of)
+ {
+ WSREP_ERROR("Failed to open file '%s': %d (%s)",
+ filename, errno, strerror(errno));
+ goto cleanup;
+ }
+ // ready to write
+ bytes_in_cache= my_b_bytes_in_cache(cache);
+ if (unlikely(bytes_in_cache == 0)) bytes_in_cache = my_b_fill(cache);
+ if (likely(bytes_in_cache > 0)) do
+ {
+ if (my_fwrite(of, cache->read_pos, bytes_in_cache,
+ MYF(MY_WME | MY_NABP)) == (size_t) -1)
+ {
+ WSREP_ERROR("Failed to write file '%s'", filename);
+ goto cleanup;
+ }
+ cache->read_pos= cache->read_end;
+ } while ((cache->file >= 0) && (bytes_in_cache= my_b_fill(cache)));
+ if(cache->error == -1)
+ {
+ WSREP_ERROR("RBR inconsistent");
+ goto cleanup;
+ }
+cleanup:
+ // init back
+ if (reinit_io_cache(cache, WRITE_CACHE, saved_pos, 0, 0))
+ {
+ WSREP_ERROR("failed to reinitialize io-cache");
+ }
+ // close file
+ if (of) fclose(of);
+}
diff --git a/sql/wsrep_binlog.h b/sql/wsrep_binlog.h
new file mode 100644
index 00000000000..c29d51caf2c
--- /dev/null
+++ b/sql/wsrep_binlog.h
@@ -0,0 +1,56 @@
+/* Copyright (C) 2013 Codership Oy <info@codership.com>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */
+
+#ifndef WSREP_BINLOG_H
+#define WSREP_BINLOG_H
+
+#include "sql_class.h" // THD, IO_CACHE
+
+#define HEAP_PAGE_SIZE 65536 /* 64K */
+#define WSREP_MAX_WS_SIZE 2147483647 /* 2GB */
+
+/*
+ Write the contents of a cache to a memory buffer.
+
+ This function quite the same as MYSQL_BIN_LOG::write_cache(),
+ with the exception that here we write in buffer instead of log file.
+ */
+int wsrep_write_cache_buf(IO_CACHE *cache, uchar **buf, size_t *buf_len);
+
+/*
+ Write the contents of a cache to wsrep provider.
+
+ This function quite the same as MYSQL_BIN_LOG::write_cache(),
+ with the exception that here we write in buffer instead of log file.
+
+ @param len total amount of data written
+ @return wsrep error status
+ */
+int wsrep_write_cache (wsrep_t* wsrep,
+ THD* thd,
+ IO_CACHE* cache,
+ size_t* len);
+
+/* Dump replication buffer to disk */
+void wsrep_dump_rbr_buf(THD *thd, const void* rbr_buf, size_t buf_len);
+
+/* Dump replication buffer to disk without intermediate buffer */
+void wsrep_dump_rbr_direct(THD* thd, IO_CACHE* cache);
+
+int wsrep_binlog_close_connection(THD* thd);
+int wsrep_binlog_savepoint_set(THD *thd, void *sv);
+int wsrep_binlog_savepoint_rollback(THD *thd, void *sv);
+
+#endif /* WSREP_BINLOG_H */
diff --git a/sql/wsrep_check_opts.cc b/sql/wsrep_check_opts.cc
new file mode 100644
index 00000000000..188f0696bff
--- /dev/null
+++ b/sql/wsrep_check_opts.cc
@@ -0,0 +1,396 @@
+/* Copyright 2011 Codership Oy <http://www.codership.com>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+//#include <mysqld.h>
+#include <sql_class.h>
+//#include <sql_plugin.h>
+//#include <set_var.h>
+
+#include "wsrep_mysqld.h"
+
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <ctype.h>
+
+/* This file is about checking for correctness of mysql configuration options */
+
+struct opt
+{
+ const char* const name;
+ const char* value;
+};
+
+/* A list of options to check.
+ * At first we assume default values and then see if they are changed on CLI or
+ * in my.cnf */
+static struct opt opts[] =
+{
+ { "wsrep_slave_threads", "1" }, // mysqld.cc
+ { "bind_address", "0.0.0.0" }, // mysqld.cc
+ { "wsrep_sst_method", "rsync" }, // mysqld.cc
+ { "wsrep_sst_receive_address","AUTO"}, // mysqld.cc
+ { "binlog_format", "ROW" }, // mysqld.cc
+ { "wsrep_provider", "none" }, // mysqld.cc
+#if 0
+ { "query_cache_type", "0" }, // mysqld.cc
+ { "query_cache_size", "0" }, // mysqld.cc
+#endif
+ { "locked_in_memory", "0" }, // mysqld.cc
+ { "wsrep_cluster_address", "0" }, // mysqld.cc
+ { "locks_unsafe_for_binlog", "0" }, // ha_innodb.cc
+ { "autoinc_lock_mode", "1" }, // ha_innodb.cc
+ { 0, 0 }
+};
+
+enum
+{
+ WSREP_SLAVE_THREADS,
+ BIND_ADDRESS,
+ WSREP_SST_METHOD,
+ WSREP_SST_RECEIVE_ADDRESS,
+ BINLOG_FORMAT,
+ WSREP_PROVIDER,
+#if 0
+ QUERY_CACHE_TYPE,
+ QUERY_CACHE_SIZE,
+#endif
+ LOCKED_IN_MEMORY,
+ WSREP_CLUSTER_ADDRESS,
+ LOCKS_UNSAFE_FOR_BINLOG,
+ AUTOINC_LOCK_MODE
+};
+
+
+/* A class to make a copy of argv[] vector */
+struct argv_copy
+{
+ int const argc_;
+ char** argv_;
+
+ argv_copy (int const argc, const char* const argv[]) :
+ argc_ (argc),
+ argv_ (reinterpret_cast<char**>(calloc(argc_, sizeof(char*))))
+ {
+ if (argv_)
+ {
+ for (int i = 0; i < argc_; ++i)
+ {
+ argv_[i] = strdup(argv[i]);
+
+ if (!argv_[i])
+ {
+ argv_free (); // free whatever bee allocated
+ return;
+ }
+ }
+ }
+ }
+
+ ~argv_copy () { argv_free (); }
+
+private:
+ argv_copy (const argv_copy&);
+ argv_copy& operator= (const argv_copy&);
+
+ void argv_free()
+ {
+ if (argv_)
+ {
+ for (int i = 0; (i < argc_) && argv_[i] ; ++i) free (argv_[i]);
+ free (argv_);
+ argv_ = 0;
+ }
+ }
+};
+
+/* a short corresponding to '--' byte sequence */
+static short const long_opt_prefix ('-' + ('-' << 8));
+
+/* Normalizes long options to have '_' instead of '-' */
+static int
+normalize_opts (argv_copy& a)
+{
+ if (a.argv_)
+ {
+ for (int i = 0; i < a.argc_; ++i)
+ {
+ char* ptr = a.argv_[i];
+ if (long_opt_prefix == *(short*)ptr) // long option
+ {
+ ptr += 2;
+ const char* end = strchr(ptr, '=');
+
+ if (!end) end = ptr + strlen(ptr);
+
+ for (; ptr != end; ++ptr) if ('-' == *ptr) *ptr = '_';
+ }
+ }
+
+ return 0;
+ }
+
+ return EINVAL;
+}
+
+/* Find required options in the argument list and change their values */
+static int
+find_opts (argv_copy& a, struct opt* const opts)
+{
+ for (int i = 0; i < a.argc_; ++i)
+ {
+ char *ptr;
+
+ /*
+ We're interested only in long options, ensure that the arg is of
+ sufficient length.
+ */
+ if (strlen(a.argv_[i]) > 2)
+ {
+ ptr= a.argv_[i] + 2;
+ }
+ else
+ {
+ continue;
+ }
+
+ struct opt* opt = opts;
+ for (; 0 != opt->name; ++opt)
+ {
+ if (!strstr(ptr, opt->name)) continue; // try next option
+
+ /* 1. try to find value after the '=' */
+ opt->value = strchr(ptr, '=') + 1;
+
+ /* 2. if no '=', try next element in the argument vector */
+ if (reinterpret_cast<void*>(1) == opt->value)
+ {
+ /* also check that the next element is not an option itself */
+ if (i + 1 < a.argc_ && *(a.argv_[i + 1]) != '-')
+ {
+ ++i;
+ opt->value = a.argv_[i];
+ }
+ else opt->value = ""; // no value supplied (like boolean opt)
+ }
+
+ break; // option found, break inner loop
+ }
+ }
+
+ return 0;
+}
+
+/* Parses string for an integer. Returns 0 on success. */
+int get_long_long (const struct opt& opt, long long* const val, int const base)
+{
+ const char* const str = opt.value;
+
+ if ('\0' != *str)
+ {
+ char* endptr;
+
+ *val = strtoll (str, &endptr, base);
+
+ if ('k' == *endptr || 'K' == *endptr)
+ {
+ *val *= 1024L;
+ endptr++;
+ }
+ else if ('m' == *endptr || 'M' == *endptr)
+ {
+ *val *= 1024L * 1024L;
+ endptr++;
+ }
+ else if ('g' == *endptr || 'G' == *endptr)
+ {
+ *val *= 1024L * 1024L * 1024L;
+ endptr++;
+ }
+
+ if ('\0' == *endptr) return 0; // the whole string was a valid integer
+ }
+
+ WSREP_ERROR ("Bad value for *%s: '%s'. Should be integer.",
+ opt.name, opt.value);
+
+ return EINVAL;
+}
+
+/* This is flimzy coz hell knows how mysql interprets boolean strings...
+ * and, no, I'm not going to become versed in how mysql handles options -
+ * I'd rather sing.
+
+ Aha, http://dev.mysql.com/doc/refman/5.1/en/dynamic-system-variables.html:
+ Variables that have a type of “boolean” can be set to 0, 1, ON or OFF. (If you
+ set them on the command line or in an option file, use the numeric values.)
+
+ So it is '0' for FALSE, '1' or empty string for TRUE
+
+ */
+int get_bool (const struct opt& opt, bool* const val)
+{
+ const char* str = opt.value;
+
+ while (isspace(*str)) ++str; // skip initial whitespaces
+
+ ssize_t str_len = strlen(str);
+ switch (str_len)
+ {
+ case 0:
+ *val = true;
+ return 0;
+ case 1:
+ if ('0' == *str || '1' == *str)
+ {
+ *val = ('1' == *str);
+ return 0;
+ }
+ }
+
+ WSREP_ERROR ("Bad value for *%s: '%s'. Should be '0', '1' or empty string.",
+ opt.name, opt.value);
+
+ return EINVAL;
+}
+
+static int
+check_opts (int const argc, const char* const argv[], struct opt opts[])
+{
+ /* First, make a copy of argv to be able to manipulate it */
+ argv_copy a(argc, argv);
+
+ if (!a.argv_)
+ {
+ WSREP_ERROR ("Could not copy argv vector: not enough memory.");
+ return ENOMEM;
+ }
+
+ int err = normalize_opts (a);
+ if (err)
+ {
+ WSREP_ERROR ("Failed to normalize options.");
+ return err;
+ }
+
+ err = find_opts (a, opts);
+ if (err)
+ {
+ WSREP_ERROR ("Failed to parse options.");
+ return err;
+ }
+
+ /* At this point we have updated default values in our option list to
+ what has been specified on the command line / my.cnf */
+
+ long long slave_threads;
+ err = get_long_long (opts[WSREP_SLAVE_THREADS], &slave_threads, 10);
+ if (err) return err;
+
+ int rcode = 0;
+
+ if (slave_threads > 1)
+ /* Need to check AUTOINC_LOCK_MODE and LOCKS_UNSAFE_FOR_BINLOG */
+ {
+ long long autoinc_lock_mode;
+ err = get_long_long (opts[AUTOINC_LOCK_MODE], &autoinc_lock_mode, 10);
+ if (err) return err;
+
+ bool locks_unsafe_for_binlog;
+ err = get_bool (opts[LOCKS_UNSAFE_FOR_BINLOG],&locks_unsafe_for_binlog);
+ if (err) return err;
+
+ if (autoinc_lock_mode != 2)
+ {
+ WSREP_ERROR ("Parallel applying (wsrep_slave_threads > 1) requires"
+ " innodb_autoinc_lock_mode = 2.");
+ rcode = EINVAL;
+ }
+ }
+
+ bool locked_in_memory;
+ err = get_bool (opts[LOCKED_IN_MEMORY], &locked_in_memory);
+ if (err) { WSREP_ERROR("get_bool error: %s", strerror(err)); return err; }
+ if (locked_in_memory)
+ {
+ WSREP_ERROR ("Memory locking is not supported (locked_in_memory=%s)",
+ locked_in_memory ? "ON" : "OFF");
+ rcode = EINVAL;
+ }
+
+ if (!strcasecmp(opts[WSREP_SST_METHOD].value,"mysqldump"))
+ {
+ if (!strcasecmp(opts[BIND_ADDRESS].value, "127.0.0.1") ||
+ !strcasecmp(opts[BIND_ADDRESS].value, "localhost"))
+ {
+ WSREP_ERROR ("wsrep_sst_method is set to 'mysqldump' yet "
+ "mysqld bind_address is set to '%s', which makes it "
+ "impossible to receive state transfer from another "
+ "node, since mysqld won't accept such connections. "
+ "If you wish to use mysqldump state transfer method, "
+ "set bind_address to allow mysql client connections "
+ "from other cluster members (e.g. 0.0.0.0).",
+ opts[BIND_ADDRESS].value);
+ rcode = EINVAL;
+ }
+ }
+ else
+ {
+ // non-mysqldump SST requires wsrep_cluster_address on startup
+ if (strlen(opts[WSREP_CLUSTER_ADDRESS].value) == 0)
+ {
+ WSREP_ERROR ("%s SST method requires wsrep_cluster_address to be "
+ "configured on startup.",opts[WSREP_SST_METHOD].value);
+ rcode = EINVAL;
+ }
+ }
+
+ if (strcasecmp(opts[WSREP_SST_RECEIVE_ADDRESS].value, "AUTO"))
+ {
+ if (!strncasecmp(opts[WSREP_SST_RECEIVE_ADDRESS].value,
+ "127.0.0.1", strlen("127.0.0.1")) ||
+ !strncasecmp(opts[WSREP_SST_RECEIVE_ADDRESS].value,
+ "localhost", strlen("localhost")))
+ {
+ WSREP_WARN ("wsrep_sst_receive_address is set to '%s' which "
+ "makes it impossible for another host to reach this "
+ "one. Please set it to the address which this node "
+ "can be connected at by other cluster members.",
+ opts[WSREP_SST_RECEIVE_ADDRESS].value);
+// rcode = EINVAL;
+ }
+ }
+
+ if (strcasecmp(opts[WSREP_PROVIDER].value, "none"))
+ {
+ if (strcasecmp(opts[BINLOG_FORMAT].value, "ROW"))
+ {
+ WSREP_ERROR ("Only binlog_format = 'ROW' is currently supported. "
+ "Configured value: '%s'. Please adjust your "
+ "configuration.", opts[BINLOG_FORMAT].value);
+
+ rcode = EINVAL;
+ }
+ }
+
+ return rcode;
+}
+
+int
+wsrep_check_opts (int const argc, char* const* const argv)
+{
+ return check_opts (argc, argv, opts);
+}
+
diff --git a/sql/wsrep_hton.cc b/sql/wsrep_hton.cc
new file mode 100644
index 00000000000..78d189fbd61
--- /dev/null
+++ b/sql/wsrep_hton.cc
@@ -0,0 +1,615 @@
+/* Copyright 2008-2015 Codership Oy <http://www.codership.com>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#include <mysqld.h>
+#include "sql_base.h"
+#include "rpl_filter.h"
+#include <sql_class.h>
+#include "wsrep_mysqld.h"
+#include "wsrep_binlog.h"
+#include "wsrep_xid.h"
+#include <cstdio>
+#include <cstdlib>
+#include "debug_sync.h"
+
+extern ulonglong thd_to_trx_id(THD *thd);
+
+extern "C" int thd_binlog_format(const MYSQL_THD thd);
+// todo: share interface with ha_innodb.c
+
+enum wsrep_trx_status wsrep_run_wsrep_commit(THD *thd, handlerton *hton,
+ bool all);
+
+/*
+ Cleanup after local transaction commit/rollback, replay or TOI.
+*/
+void wsrep_cleanup_transaction(THD *thd)
+{
+ if (!WSREP(thd)) return;
+
+ if (wsrep_emulate_bin_log) thd_binlog_trx_reset(thd);
+ thd->wsrep_ws_handle.trx_id= WSREP_UNDEFINED_TRX_ID;
+ thd->wsrep_trx_meta.gtid= WSREP_GTID_UNDEFINED;
+ thd->wsrep_trx_meta.depends_on= WSREP_SEQNO_UNDEFINED;
+ thd->wsrep_exec_mode= LOCAL_STATE;
+ thd->wsrep_affected_rows= 0;
+ return;
+}
+
+/*
+ wsrep hton
+*/
+handlerton *wsrep_hton;
+
+
+/*
+ Registers wsrep hton at commit time if transaction has registered htons
+ for supported engine types.
+
+ Hton should not be registered for TOTAL_ORDER operations.
+
+ Registration is needed for both LOCAL_MODE and REPL_RECV transactions to run
+ commit in 2pc so that wsrep position gets properly recorded in storage
+ engines.
+
+ Note that all hton calls should immediately return for threads that are
+ in REPL_RECV mode as their states are controlled by wsrep appliers or
+ replaying code. Only threads in LOCAL_MODE should run wsrep callbacks
+ from hton methods.
+*/
+void wsrep_register_hton(THD* thd, bool all)
+{
+ if (thd->wsrep_exec_mode != TOTAL_ORDER && !thd->wsrep_apply_toi)
+ {
+ if (thd->wsrep_exec_mode == LOCAL_STATE &&
+ (thd_sql_command(thd) == SQLCOM_OPTIMIZE ||
+ thd_sql_command(thd) == SQLCOM_ANALYZE ||
+ thd_sql_command(thd) == SQLCOM_REPAIR) &&
+ thd->lex->no_write_to_binlog == 1)
+ {
+ WSREP_DEBUG("Skipping wsrep_register_hton for LOCAL sql admin command : %s",
+ thd->query());
+ return;
+ }
+
+ THD_TRANS *trans=all ? &thd->transaction.all : &thd->transaction.stmt;
+ for (Ha_trx_info *i= trans->ha_list; WSREP(thd) && i; i = i->next())
+ {
+ if ((i->ht()->db_type == DB_TYPE_INNODB) ||
+ (i->ht()->db_type == DB_TYPE_TOKUDB))
+ {
+ trans_register_ha(thd, all, wsrep_hton);
+
+ /* follow innodb read/write settting
+ * but, as an exception: CTAS with empty result set will not be
+ * replicated unless we declare wsrep hton as read/write here
+ */
+ if (i->is_trx_read_write() ||
+ (thd->lex->sql_command == SQLCOM_CREATE_TABLE &&
+ thd->wsrep_exec_mode == LOCAL_STATE))
+ {
+ thd->ha_data[wsrep_hton->slot].ha_info[all].set_trx_read_write();
+ }
+ break;
+ }
+ }
+ }
+}
+
+/*
+ Calls wsrep->post_commit() for locally executed transactions that have
+ got seqno from provider (must commit) and don't require replaying.
+ */
+void wsrep_post_commit(THD* thd, bool all)
+{
+ if (!WSREP(thd)) return;
+
+ switch (thd->wsrep_exec_mode)
+ {
+ case LOCAL_COMMIT:
+ {
+ DBUG_ASSERT(thd->wsrep_trx_meta.gtid.seqno != WSREP_SEQNO_UNDEFINED);
+ if (wsrep->post_commit(wsrep, &thd->wsrep_ws_handle))
+ {
+ DBUG_PRINT("wsrep", ("set committed fail"));
+ WSREP_WARN("set committed fail: %llu %d",
+ (long long)thd->real_id, thd->get_stmt_da()->status());
+ }
+ wsrep_cleanup_transaction(thd);
+ break;
+ }
+ case LOCAL_STATE:
+ {
+ /*
+ Non-InnoDB statements may have populated events in stmt cache => cleanup
+ */
+ WSREP_DEBUG("cleanup transaction for LOCAL_STATE: %s", thd->query());
+ wsrep_cleanup_transaction(thd);
+ break;
+ }
+ default: break;
+ }
+
+}
+
+/*
+ wsrep exploits binlog's caches even if binlogging itself is not
+ activated. In such case connection close needs calling
+ actual binlog's method.
+ Todo: split binlog hton from its caches to use ones by wsrep
+ without referring to binlog's stuff.
+*/
+static int
+wsrep_close_connection(handlerton* hton, THD* thd)
+{
+ DBUG_ENTER("wsrep_close_connection");
+
+ if (thd->wsrep_exec_mode == REPL_RECV)
+ {
+ DBUG_RETURN(0);
+ }
+ DBUG_RETURN(wsrep_binlog_close_connection (thd));
+}
+
+/*
+ prepare/wsrep_run_wsrep_commit can fail in two ways
+ - certification test or an equivalent. As a result,
+ the current transaction just rolls back
+ Error codes:
+ WSREP_TRX_CERT_FAIL, WSREP_TRX_SIZE_EXCEEDED, WSREP_TRX_ERROR
+ - a post-certification failure makes this server unable to
+ commit its own WS and therefore the server must abort
+*/
+static int wsrep_prepare(handlerton *hton, THD *thd, bool all)
+{
+ DBUG_ENTER("wsrep_prepare");
+
+ if (thd->wsrep_exec_mode == REPL_RECV)
+ {
+ DBUG_RETURN(0);
+ }
+
+ DBUG_ASSERT(thd->ha_data[wsrep_hton->slot].ha_info[all].is_trx_read_write());
+ DBUG_ASSERT(thd->wsrep_exec_mode == LOCAL_STATE);
+ DBUG_ASSERT(thd->wsrep_trx_meta.gtid.seqno == WSREP_SEQNO_UNDEFINED);
+
+ if ((all ||
+ !thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) &&
+ (thd->variables.wsrep_on && !wsrep_trans_cache_is_empty(thd)))
+ {
+ DBUG_RETURN (wsrep_run_wsrep_commit(thd, hton, all));
+ }
+ DBUG_RETURN(0);
+}
+
+static int wsrep_savepoint_set(handlerton *hton, THD *thd, void *sv)
+{
+ DBUG_ENTER("wsrep_savepoint_set");
+
+ if (thd->wsrep_exec_mode == REPL_RECV)
+ {
+ DBUG_RETURN(0);
+ }
+
+ if (!wsrep_emulate_bin_log) DBUG_RETURN(0);
+ int rcode = wsrep_binlog_savepoint_set(thd, sv);
+ DBUG_RETURN(rcode);
+}
+
+static int wsrep_savepoint_rollback(handlerton *hton, THD *thd, void *sv)
+{
+ DBUG_ENTER("wsrep_savepoint_rollback");
+
+ if (thd->wsrep_exec_mode == REPL_RECV)
+ {
+ DBUG_RETURN(0);
+ }
+
+ if (!wsrep_emulate_bin_log) DBUG_RETURN(0);
+ int rcode = wsrep_binlog_savepoint_rollback(thd, sv);
+ DBUG_RETURN(rcode);
+}
+
+static int wsrep_rollback(handlerton *hton, THD *thd, bool all)
+{
+ DBUG_ENTER("wsrep_rollback");
+
+ if (thd->wsrep_exec_mode == REPL_RECV)
+ {
+ DBUG_RETURN(0);
+ }
+
+ mysql_mutex_lock(&thd->LOCK_wsrep_thd);
+ switch (thd->wsrep_exec_mode)
+ {
+ case TOTAL_ORDER:
+ case REPL_RECV:
+ mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
+ WSREP_DEBUG("Avoiding wsrep rollback for failed DDL: %s", thd->query());
+ DBUG_RETURN(0);
+ default: break;
+ }
+
+ if ((all || !thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) &&
+ thd->wsrep_conflict_state != MUST_REPLAY)
+ {
+ if (WSREP(thd) && wsrep->post_rollback(wsrep, &thd->wsrep_ws_handle))
+ {
+ DBUG_PRINT("wsrep", ("setting rollback fail"));
+ WSREP_ERROR("settting rollback fail: thd: %llu, schema: %s, SQL: %s",
+ (long long)thd->real_id, (thd->db ? thd->db : "(null)"),
+ thd->query());
+ }
+ wsrep_cleanup_transaction(thd);
+ }
+ mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
+ DBUG_RETURN(0);
+}
+
+int wsrep_commit(handlerton *hton, THD *thd, bool all)
+{
+ DBUG_ENTER("wsrep_commit");
+
+ if (thd->wsrep_exec_mode == REPL_RECV)
+ {
+ DBUG_RETURN(0);
+ }
+
+ mysql_mutex_lock(&thd->LOCK_wsrep_thd);
+ if ((all || !thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) &&
+ (thd->variables.wsrep_on && thd->wsrep_conflict_state != MUST_REPLAY))
+ {
+ if (thd->wsrep_exec_mode == LOCAL_COMMIT)
+ {
+ DBUG_ASSERT(thd->ha_data[wsrep_hton->slot].ha_info[all].is_trx_read_write());
+ /*
+ Call to wsrep->post_commit() (moved to wsrep_post_commit()) must
+ be done only after commit has done for all involved htons.
+ */
+ DBUG_PRINT("wsrep", ("commit"));
+ }
+ else
+ {
+ /*
+ Transaction didn't go through wsrep->pre_commit() so just roll back
+ possible changes to clean state.
+ */
+ if (WSREP(thd) && wsrep->post_rollback(wsrep, &thd->wsrep_ws_handle))
+ {
+ DBUG_PRINT("wsrep", ("setting rollback fail"));
+ WSREP_ERROR("settting rollback fail: thd: %llu, schema: %s, SQL: %s",
+ (long long)thd->real_id, (thd->db ? thd->db : "(null)"),
+ thd->query());
+ }
+ wsrep_cleanup_transaction(thd);
+ }
+ }
+ mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
+ DBUG_RETURN(0);
+}
+
+
+extern Rpl_filter* binlog_filter;
+extern my_bool opt_log_slave_updates;
+
+enum wsrep_trx_status
+wsrep_run_wsrep_commit(THD *thd, handlerton *hton, bool all)
+{
+ int rcode= -1;
+ size_t data_len= 0;
+ IO_CACHE *cache;
+ int replay_round= 0;
+
+ if (thd->get_stmt_da()->is_error()) {
+ WSREP_DEBUG("commit issue, error: %d %s",
+ thd->get_stmt_da()->sql_errno(), thd->get_stmt_da()->message());
+ }
+
+ DBUG_ENTER("wsrep_run_wsrep_commit");
+
+ DEBUG_SYNC(thd, "wsrep_before_replication");
+
+ if (thd->slave_thread && !opt_log_slave_updates) DBUG_RETURN(WSREP_TRX_OK);
+
+ if (thd->wsrep_exec_mode == REPL_RECV) {
+
+ mysql_mutex_lock(&thd->LOCK_wsrep_thd);
+ if (thd->wsrep_conflict_state == MUST_ABORT) {
+ if (wsrep_debug)
+ WSREP_INFO("WSREP: must abort for BF");
+ DBUG_PRINT("wsrep", ("BF apply commit fail"));
+ thd->wsrep_conflict_state = NO_CONFLICT;
+ mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
+ //
+ // TODO: test all calls of the rollback.
+ // rollback must happen automagically innobase_rollback(hton, thd, 1);
+ //
+ DBUG_RETURN(WSREP_TRX_ERROR);
+ }
+ mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
+ }
+
+ if (thd->wsrep_exec_mode != LOCAL_STATE) DBUG_RETURN(WSREP_TRX_OK);
+
+ if (thd->wsrep_consistency_check == CONSISTENCY_CHECK_RUNNING) {
+ WSREP_DEBUG("commit for consistency check: %s", thd->query());
+ DBUG_RETURN(WSREP_TRX_OK);
+ }
+
+ DBUG_PRINT("wsrep", ("replicating commit"));
+
+ mysql_mutex_lock(&thd->LOCK_wsrep_thd);
+ if (thd->wsrep_conflict_state == MUST_ABORT) {
+ DBUG_PRINT("wsrep", ("replicate commit fail"));
+ thd->wsrep_conflict_state = ABORTED;
+ mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
+ if (wsrep_debug) {
+ WSREP_INFO("innobase_commit, abort %s",
+ (thd->query()) ? thd->query() : "void");
+ }
+ DBUG_RETURN(WSREP_TRX_CERT_FAIL);
+ }
+
+ mysql_mutex_lock(&LOCK_wsrep_replaying);
+
+ while (wsrep_replaying > 0 &&
+ thd->wsrep_conflict_state == NO_CONFLICT &&
+ thd->killed == NOT_KILLED &&
+ !shutdown_in_progress)
+ {
+
+ mysql_mutex_unlock(&LOCK_wsrep_replaying);
+ mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
+
+ mysql_mutex_lock(&thd->mysys_var->mutex);
+ thd_proc_info(thd, "wsrep waiting on replaying");
+ thd->mysys_var->current_mutex= &LOCK_wsrep_replaying;
+ thd->mysys_var->current_cond= &COND_wsrep_replaying;
+ mysql_mutex_unlock(&thd->mysys_var->mutex);
+
+ mysql_mutex_lock(&LOCK_wsrep_replaying);
+ // Using timedwait is a hack to avoid deadlock in case if BF victim
+ // misses the signal.
+ struct timespec wtime = {0, 1000000};
+ mysql_cond_timedwait(&COND_wsrep_replaying, &LOCK_wsrep_replaying,
+ &wtime);
+
+ if (replay_round++ % 100000 == 0)
+ WSREP_DEBUG("commit waiting for replaying: replayers %d, thd: (%lu) "
+ "conflict: %d (round: %d)",
+ wsrep_replaying, thd->thread_id,
+ thd->wsrep_conflict_state, replay_round);
+
+ mysql_mutex_unlock(&LOCK_wsrep_replaying);
+
+ mysql_mutex_lock(&thd->mysys_var->mutex);
+ thd->mysys_var->current_mutex= 0;
+ thd->mysys_var->current_cond= 0;
+ mysql_mutex_unlock(&thd->mysys_var->mutex);
+
+ mysql_mutex_lock(&thd->LOCK_wsrep_thd);
+ mysql_mutex_lock(&LOCK_wsrep_replaying);
+ }
+ mysql_mutex_unlock(&LOCK_wsrep_replaying);
+
+ if (thd->wsrep_conflict_state == MUST_ABORT) {
+ DBUG_PRINT("wsrep", ("replicate commit fail"));
+ thd->wsrep_conflict_state = ABORTED;
+ mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
+ WSREP_DEBUG("innobase_commit abort after replaying wait %s",
+ (thd->query()) ? thd->query() : "void");
+ DBUG_RETURN(WSREP_TRX_CERT_FAIL);
+ }
+
+ thd->wsrep_query_state = QUERY_COMMITTING;
+ mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
+
+ cache = get_trans_log(thd);
+ rcode = 0;
+ if (cache) {
+ thd->binlog_flush_pending_rows_event(true);
+ rcode = wsrep_write_cache(wsrep, thd, cache, &data_len);
+ if (WSREP_OK != rcode) {
+ WSREP_ERROR("rbr write fail, data_len: %zu, %d", data_len, rcode);
+ DBUG_RETURN(WSREP_TRX_SIZE_EXCEEDED);
+ }
+ }
+
+ if (data_len == 0)
+ {
+ if (thd->get_stmt_da()->is_ok() &&
+ thd->get_stmt_da()->affected_rows() > 0 &&
+ !binlog_filter->is_on())
+ {
+ WSREP_DEBUG("empty rbr buffer, query: %s, "
+ "affected rows: %llu, "
+ "changed tables: %d, "
+ "sql_log_bin: %d, "
+ "wsrep status (%d %d %d)",
+ thd->query(), thd->get_stmt_da()->affected_rows(),
+ stmt_has_updated_trans_table(thd), thd->variables.sql_log_bin,
+ thd->wsrep_exec_mode, thd->wsrep_query_state,
+ thd->wsrep_conflict_state);
+ }
+ else
+ {
+ WSREP_DEBUG("empty rbr buffer, query: %s", thd->query());
+ }
+ thd->wsrep_query_state= QUERY_EXEC;
+ DBUG_RETURN(WSREP_TRX_OK);
+ }
+
+ if (WSREP_UNDEFINED_TRX_ID == thd->wsrep_ws_handle.trx_id)
+ {
+ WSREP_WARN("SQL statement was ineffective, THD: %lu, buf: %zu\n"
+ "schema: %s \n"
+ "QUERY: %s\n"
+ " => Skipping replication",
+ thd->thread_id, data_len,
+ (thd->db ? thd->db : "(null)"), thd->query());
+ rcode = WSREP_TRX_FAIL;
+ }
+ else if (!rcode)
+ {
+ if (WSREP_OK == rcode)
+ rcode = wsrep->pre_commit(wsrep,
+ (wsrep_conn_id_t)thd->thread_id,
+ &thd->wsrep_ws_handle,
+ WSREP_FLAG_COMMIT |
+ ((thd->wsrep_PA_safe) ?
+ 0ULL : WSREP_FLAG_PA_UNSAFE),
+ &thd->wsrep_trx_meta);
+
+ if (rcode == WSREP_TRX_MISSING) {
+ WSREP_WARN("Transaction missing in provider, thd: %ld, schema: %s, SQL: %s",
+ thd->thread_id, (thd->db ? thd->db : "(null)"), thd->query());
+ rcode = WSREP_TRX_FAIL;
+ } else if (rcode == WSREP_BF_ABORT) {
+ WSREP_DEBUG("thd %lu seqno %lld BF aborted by provider, will replay",
+ thd->thread_id, (long long)thd->wsrep_trx_meta.gtid.seqno);
+ mysql_mutex_lock(&thd->LOCK_wsrep_thd);
+ thd->wsrep_conflict_state = MUST_REPLAY;
+ DBUG_ASSERT(wsrep_thd_trx_seqno(thd) > 0);
+ mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
+ mysql_mutex_lock(&LOCK_wsrep_replaying);
+ wsrep_replaying++;
+ WSREP_DEBUG("replaying increased: %d, thd: %lu",
+ wsrep_replaying, thd->thread_id);
+ mysql_mutex_unlock(&LOCK_wsrep_replaying);
+ }
+ } else {
+ WSREP_ERROR("I/O error reading from thd's binlog iocache: "
+ "errno=%d, io cache code=%d", my_errno, cache->error);
+ DBUG_ASSERT(0); // failure like this can not normally happen
+ DBUG_RETURN(WSREP_TRX_ERROR);
+ }
+
+ mysql_mutex_lock(&thd->LOCK_wsrep_thd);
+ switch(rcode) {
+ case 0:
+ /*
+ About MUST_ABORT: We assume that even if thd conflict state was set
+ to MUST_ABORT, underlying transaction was not rolled back or marked
+ as deadlock victim in QUERY_COMMITTING state. Conflict state is
+ set to NO_CONFLICT and commit proceeds as usual.
+ */
+ if (thd->wsrep_conflict_state == MUST_ABORT)
+ thd->wsrep_conflict_state= NO_CONFLICT;
+
+ if (thd->wsrep_conflict_state != NO_CONFLICT)
+ {
+ WSREP_WARN("thd %lu seqno %lld: conflict state %d after post commit",
+ thd->thread_id,
+ (long long)thd->wsrep_trx_meta.gtid.seqno,
+ thd->wsrep_conflict_state);
+ }
+ thd->wsrep_exec_mode= LOCAL_COMMIT;
+ DBUG_ASSERT(thd->wsrep_trx_meta.gtid.seqno != WSREP_SEQNO_UNDEFINED);
+ /* Override XID iff it was generated by mysql */
+ if (thd->transaction.xid_state.xid.get_my_xid())
+ {
+ wsrep_xid_init(&thd->transaction.xid_state.xid,
+ thd->wsrep_trx_meta.gtid.uuid,
+ thd->wsrep_trx_meta.gtid.seqno);
+ }
+ DBUG_PRINT("wsrep", ("replicating commit success"));
+ break;
+ case WSREP_BF_ABORT:
+ DBUG_ASSERT(thd->wsrep_trx_meta.gtid.seqno != WSREP_SEQNO_UNDEFINED);
+ case WSREP_TRX_FAIL:
+ WSREP_DEBUG("commit failed for reason: %d %lu %s", rcode, thd->thread_id,
+ thd->query());
+ DBUG_PRINT("wsrep", ("replicating commit fail"));
+
+ thd->wsrep_query_state= QUERY_EXEC;
+
+ if (thd->wsrep_conflict_state == MUST_ABORT) {
+ thd->wsrep_conflict_state= ABORTED;
+ }
+ else
+ {
+ WSREP_DEBUG("conflict state: %d", thd->wsrep_conflict_state);
+ if (thd->wsrep_conflict_state == NO_CONFLICT)
+ {
+ thd->wsrep_conflict_state = CERT_FAILURE;
+ WSREP_LOG_CONFLICT(NULL, thd, FALSE);
+ }
+ }
+ mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
+
+ DBUG_RETURN(WSREP_TRX_CERT_FAIL);
+
+ case WSREP_SIZE_EXCEEDED:
+ WSREP_ERROR("transaction size exceeded");
+ mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
+ DBUG_RETURN(WSREP_TRX_SIZE_EXCEEDED);
+ case WSREP_CONN_FAIL:
+ WSREP_ERROR("connection failure");
+ mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
+ DBUG_RETURN(WSREP_TRX_ERROR);
+ default:
+ WSREP_ERROR("unknown connection failure");
+ mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
+ DBUG_RETURN(WSREP_TRX_ERROR);
+ }
+
+ thd->wsrep_query_state= QUERY_EXEC;
+ mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
+
+ DBUG_RETURN(WSREP_TRX_OK);
+}
+
+
+static int wsrep_hton_init(void *p)
+{
+ wsrep_hton= (handlerton *)p;
+ //wsrep_hton->state=opt_bin_log ? SHOW_OPTION_YES : SHOW_OPTION_NO;
+ wsrep_hton->state= SHOW_OPTION_YES;
+ wsrep_hton->db_type=DB_TYPE_WSREP;
+ wsrep_hton->savepoint_offset= sizeof(my_off_t);
+ wsrep_hton->close_connection= wsrep_close_connection;
+ wsrep_hton->savepoint_set= wsrep_savepoint_set;
+ wsrep_hton->savepoint_rollback= wsrep_savepoint_rollback;
+ wsrep_hton->commit= wsrep_commit;
+ wsrep_hton->rollback= wsrep_rollback;
+ wsrep_hton->prepare= wsrep_prepare;
+ wsrep_hton->flags= HTON_NOT_USER_SELECTABLE | HTON_HIDDEN; // todo: fix flags
+ wsrep_hton->slot= 0;
+ return 0;
+}
+
+
+struct st_mysql_storage_engine wsrep_storage_engine=
+{ MYSQL_HANDLERTON_INTERFACE_VERSION };
+
+
+mysql_declare_plugin(wsrep)
+{
+ MYSQL_STORAGE_ENGINE_PLUGIN,
+ &wsrep_storage_engine,
+ "wsrep",
+ "Codership Oy",
+ "A pseudo storage engine to represent transactions in multi-master "
+ "synchornous replication",
+ PLUGIN_LICENSE_GPL,
+ wsrep_hton_init, /* Plugin Init */
+ NULL, /* Plugin Deinit */
+ 0x0100 /* 1.0 */,
+ NULL, /* status variables */
+ NULL, /* system variables */
+ NULL, /* config options */
+ 0, /* flags */
+}
+mysql_declare_plugin_end;
diff --git a/sql/wsrep_mysqld.cc b/sql/wsrep_mysqld.cc
new file mode 100644
index 00000000000..353911dcfde
--- /dev/null
+++ b/sql/wsrep_mysqld.cc
@@ -0,0 +1,1601 @@
+/* Copyright 2008-2015 Codership Oy <http://www.codership.com>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#include <mysqld.h>
+#include <sql_class.h>
+#include <sql_parse.h>
+#include "wsrep_priv.h"
+#include "wsrep_thd.h"
+#include "wsrep_sst.h"
+#include "wsrep_utils.h"
+#include "wsrep_var.h"
+#include "wsrep_binlog.h"
+#include "wsrep_applier.h"
+#include "wsrep_xid.h"
+#include <cstdio>
+#include <cstdlib>
+#include "log_event.h"
+#include <slave.h>
+
+wsrep_t *wsrep = NULL;
+my_bool wsrep_emulate_bin_log = FALSE; // activating parts of binlog interface
+#ifdef GTID_SUPPORT
+/* Sidno in global_sid_map corresponding to group uuid */
+rpl_sidno wsrep_sidno= -1;
+#endif /* GTID_SUPPORT */
+my_bool wsrep_preordered_opt= FALSE;
+
+/*
+ * Begin configuration options and their default values
+ */
+
+const char* wsrep_data_home_dir = NULL;
+const char* wsrep_dbug_option = "";
+
+long wsrep_slave_threads = 1; // # of slave action appliers wanted
+int wsrep_slave_count_change = 0; // # of appliers to stop or start
+my_bool wsrep_debug = 0; // enable debug level logging
+my_bool wsrep_convert_LOCK_to_trx = 1; // convert locking sessions to trx
+ulong wsrep_retry_autocommit = 5; // retry aborted autocommit trx
+my_bool wsrep_auto_increment_control = 1; // control auto increment variables
+my_bool wsrep_drupal_282555_workaround = 1; // retry autoinc insert after dupkey
+my_bool wsrep_incremental_data_collection = 0; // incremental data collection
+ulong wsrep_max_ws_size = 1073741824UL;//max ws (RBR buffer) size
+ulong wsrep_max_ws_rows = 65536; // max number of rows in ws
+int wsrep_to_isolation = 0; // # of active TO isolation threads
+my_bool wsrep_certify_nonPK = 1; // certify, even when no primary key
+long wsrep_max_protocol_version = 3; // maximum protocol version to use
+ulong wsrep_forced_binlog_format = BINLOG_FORMAT_UNSPEC;
+my_bool wsrep_recovery = 0; // recovery
+my_bool wsrep_replicate_myisam = 0; // enable myisam replication
+my_bool wsrep_log_conflicts = 0;
+ulong wsrep_mysql_replication_bundle = 0;
+my_bool wsrep_desync = 0; // desynchronize the node from the
+ // cluster
+my_bool wsrep_load_data_splitting = 1; // commit load data every 10K intervals
+my_bool wsrep_restart_slave = 0; // should mysql slave thread be
+ // restarted, if node joins back
+my_bool wsrep_restart_slave_activated = 0; // node has dropped, and slave
+ // restart will be needed
+my_bool wsrep_slave_UK_checks = 0; // slave thread does UK checks
+my_bool wsrep_slave_FK_checks = 0; // slave thread does FK checks
+// Allow reads even if the node is not in the primary component.
+bool wsrep_dirty_reads = false;
+
+/*
+ Set during the creation of first wsrep applier and rollback threads.
+ Since these threads are critical, abort if the thread creation fails.
+*/
+my_bool wsrep_creating_startup_threads = 0;
+
+/*
+ * End configuration options
+ */
+
+/*
+ * Other wsrep global variables.
+ */
+my_bool wsrep_inited = 0; // initialized ?
+
+static wsrep_uuid_t cluster_uuid = WSREP_UUID_UNDEFINED;
+static char cluster_uuid_str[40]= { 0, };
+static const char* cluster_status_str[WSREP_VIEW_MAX] =
+{
+ "Primary",
+ "non-Primary",
+ "Disconnected"
+};
+
+static char provider_name[256]= { 0, };
+static char provider_version[256]= { 0, };
+static char provider_vendor[256]= { 0, };
+
+/*
+ * wsrep status variables
+ */
+my_bool wsrep_connected = FALSE;
+my_bool wsrep_ready = FALSE; // node can accept queries
+const char* wsrep_cluster_state_uuid = cluster_uuid_str;
+long long wsrep_cluster_conf_id = WSREP_SEQNO_UNDEFINED;
+const char* wsrep_cluster_status = cluster_status_str[WSREP_VIEW_DISCONNECTED];
+long wsrep_cluster_size = 0;
+long wsrep_local_index = -1;
+long long wsrep_local_bf_aborts = 0;
+const char* wsrep_provider_name = provider_name;
+const char* wsrep_provider_version = provider_version;
+const char* wsrep_provider_vendor = provider_vendor;
+/* End wsrep status variables */
+
+wsrep_uuid_t local_uuid = WSREP_UUID_UNDEFINED;
+wsrep_seqno_t local_seqno = WSREP_SEQNO_UNDEFINED;
+wsp::node_status local_status;
+long wsrep_protocol_version = 3;
+
+// Boolean denoting if server is in initial startup phase. This is needed
+// to make sure that main thread waiting in wsrep_sst_wait() is signaled
+// if there was no state gap on receiving first view event.
+static my_bool wsrep_startup = TRUE;
+
+
+static void wsrep_log_cb(wsrep_log_level_t level, const char *msg) {
+ switch (level) {
+ case WSREP_LOG_INFO:
+ sql_print_information("WSREP: %s", msg);
+ break;
+ case WSREP_LOG_WARN:
+ sql_print_warning("WSREP: %s", msg);
+ break;
+ case WSREP_LOG_ERROR:
+ case WSREP_LOG_FATAL:
+ sql_print_error("WSREP: %s", msg);
+ break;
+ case WSREP_LOG_DEBUG:
+ if (wsrep_debug) sql_print_information ("[Debug] WSREP: %s", msg);
+ default:
+ break;
+ }
+}
+
+static void wsrep_log_states (wsrep_log_level_t const level,
+ const wsrep_uuid_t* const group_uuid,
+ wsrep_seqno_t const group_seqno,
+ const wsrep_uuid_t* const node_uuid,
+ wsrep_seqno_t const node_seqno)
+{
+ char uuid_str[37];
+ char msg[256];
+
+ wsrep_uuid_print (group_uuid, uuid_str, sizeof(uuid_str));
+ snprintf (msg, 255, "WSREP: Group state: %s:%lld",
+ uuid_str, (long long)group_seqno);
+ wsrep_log_cb (level, msg);
+
+ wsrep_uuid_print (node_uuid, uuid_str, sizeof(uuid_str));
+ snprintf (msg, 255, "WSREP: Local state: %s:%lld",
+ uuid_str, (long long)node_seqno);
+ wsrep_log_cb (level, msg);
+}
+
+#ifdef GTID_SUPPORT
+void wsrep_init_sidno(const wsrep_uuid_t& wsrep_uuid)
+{
+ /* generate new Sid map entry from inverted uuid */
+ rpl_sid sid;
+ wsrep_uuid_t ltid_uuid;
+
+ for (size_t i= 0; i < sizeof(ltid_uuid.data); ++i)
+ {
+ ltid_uuid.data[i] = ~wsrep_uuid.data[i];
+ }
+
+ sid.copy_from(ltid_uuid.data);
+ global_sid_lock->wrlock();
+ wsrep_sidno= global_sid_map->add_sid(sid);
+ WSREP_INFO("Initialized wsrep sidno %d", wsrep_sidno);
+ global_sid_lock->unlock();
+}
+#endif /* GTID_SUPPORT */
+
+static wsrep_cb_status_t
+wsrep_view_handler_cb (void* app_ctx,
+ void* recv_ctx,
+ const wsrep_view_info_t* view,
+ const char* state,
+ size_t state_len,
+ void** sst_req,
+ size_t* sst_req_len)
+{
+ *sst_req = NULL;
+ *sst_req_len = 0;
+
+ wsrep_member_status_t new_status= local_status.get();
+
+ if (memcmp(&cluster_uuid, &view->state_id.uuid, sizeof(wsrep_uuid_t)))
+ {
+ memcpy((wsrep_uuid_t*)&cluster_uuid, &view->state_id.uuid,
+ sizeof(cluster_uuid));
+
+ wsrep_uuid_print (&cluster_uuid, cluster_uuid_str,
+ sizeof(cluster_uuid_str));
+ }
+
+ wsrep_cluster_conf_id= view->view;
+ wsrep_cluster_status= cluster_status_str[view->status];
+ wsrep_cluster_size= view->memb_num;
+ wsrep_local_index= view->my_idx;
+
+ WSREP_INFO("New cluster view: global state: %s:%lld, view# %lld: %s, "
+ "number of nodes: %ld, my index: %ld, protocol version %d",
+ wsrep_cluster_state_uuid, (long long)view->state_id.seqno,
+ (long long)wsrep_cluster_conf_id, wsrep_cluster_status,
+ wsrep_cluster_size, wsrep_local_index, view->proto_ver);
+
+ /* Proceed further only if view is PRIMARY */
+ if (WSREP_VIEW_PRIMARY != view->status)
+ {
+#ifdef HAVE_QUERY_CACHE
+ // query cache must be initialised by now
+ query_cache.flush();
+#endif /* HAVE_QUERY_CACHE */
+
+ wsrep_ready_set(FALSE);
+ new_status= WSREP_MEMBER_UNDEFINED;
+ /* Always record local_uuid and local_seqno in non-prim since this
+ * may lead to re-initializing provider and start position is
+ * determined according to these variables */
+ // WRONG! local_uuid should be the last primary configuration uuid we were
+ // a member of. local_seqno should be updated in commit calls.
+ // local_uuid= cluster_uuid;
+ // local_seqno= view->first - 1;
+ goto out;
+ }
+
+ switch (view->proto_ver)
+ {
+ case 0:
+ case 1:
+ case 2:
+ case 3:
+ // version change
+ if (view->proto_ver != wsrep_protocol_version)
+ {
+ my_bool wsrep_ready_saved= wsrep_ready;
+ wsrep_ready_set(FALSE);
+ WSREP_INFO("closing client connections for "
+ "protocol change %ld -> %d",
+ wsrep_protocol_version, view->proto_ver);
+ wsrep_close_client_connections(TRUE);
+ wsrep_protocol_version= view->proto_ver;
+ wsrep_ready_set(wsrep_ready_saved);
+ }
+ break;
+ default:
+ WSREP_ERROR("Unsupported application protocol version: %d",
+ view->proto_ver);
+ unireg_abort(1);
+ }
+
+ if (view->state_gap)
+ {
+ WSREP_WARN("Gap in state sequence. Need state transfer.");
+
+ /* After that wsrep will call wsrep_sst_prepare. */
+ /* keep ready flag 0 until we receive the snapshot */
+ wsrep_ready_set(FALSE);
+
+ /* Close client connections to ensure that they don't interfere
+ * with SST. Necessary only if storage engines are initialized
+ * before SST.
+ * TODO: Just killing all ongoing transactions should be enough
+ * since wsrep_ready is OFF and no new transactions can start.
+ */
+ if (!wsrep_before_SE())
+ {
+ WSREP_DEBUG("[debug]: closing client connections for PRIM");
+ wsrep_close_client_connections(TRUE);
+ }
+
+ ssize_t const req_len= wsrep_sst_prepare (sst_req);
+
+ if (req_len < 0)
+ {
+ WSREP_ERROR("SST preparation failed: %zd (%s)", -req_len,
+ strerror(-req_len));
+ new_status= WSREP_MEMBER_UNDEFINED;
+ }
+ else
+ {
+ assert(sst_req != NULL);
+ *sst_req_len= req_len;
+ new_status= WSREP_MEMBER_JOINER;
+ }
+ }
+ else
+ {
+ /*
+ * NOTE: Initialize wsrep_group_uuid here only if it wasn't initialized
+ * before - OR - it was reinitilized on startup (lp:992840)
+ */
+ if (wsrep_startup)
+ {
+ if (wsrep_before_SE())
+ {
+ wsrep_SE_init_grab();
+ // Signal mysqld init thread to continue
+ wsrep_sst_complete (&cluster_uuid, view->state_id.seqno, false);
+ // and wait for SE initialization
+ wsrep_SE_init_wait();
+ }
+ else
+ {
+ local_uuid= cluster_uuid;
+ local_seqno= view->state_id.seqno;
+ }
+ /* Init storage engine XIDs from first view */
+ wsrep_set_SE_checkpoint(local_uuid, local_seqno);
+#ifdef GTID_SUPPORT
+ wsrep_init_sidno(local_uuid);
+#endif /* GTID_SUPPORT */
+ new_status= WSREP_MEMBER_JOINED;
+ }
+
+ // just some sanity check
+ if (memcmp (&local_uuid, &cluster_uuid, sizeof (wsrep_uuid_t)))
+ {
+ WSREP_ERROR("Undetected state gap. Can't continue.");
+ wsrep_log_states(WSREP_LOG_FATAL, &cluster_uuid, view->state_id.seqno,
+ &local_uuid, -1);
+ unireg_abort(1);
+ }
+ }
+
+ if (wsrep_auto_increment_control)
+ {
+ global_system_variables.auto_increment_offset= view->my_idx + 1;
+ global_system_variables.auto_increment_increment= view->memb_num;
+ }
+
+ { /* capabilities may be updated on new configuration */
+ uint64_t const caps(wsrep->capabilities (wsrep));
+
+ my_bool const idc((caps & WSREP_CAP_INCREMENTAL_WRITESET) != 0);
+ if (TRUE == wsrep_incremental_data_collection && FALSE == idc)
+ {
+ WSREP_WARN("Unsupported protocol downgrade: "
+ "incremental data collection disabled. Expect abort.");
+ }
+ wsrep_incremental_data_collection = idc;
+ }
+
+out:
+ if (view->status == WSREP_VIEW_PRIMARY) wsrep_startup= FALSE;
+ local_status.set(new_status, view);
+
+ return WSREP_CB_SUCCESS;
+}
+
+void wsrep_ready_set (my_bool x)
+{
+ WSREP_DEBUG("Setting wsrep_ready to %d", x);
+ if (mysql_mutex_lock (&LOCK_wsrep_ready)) abort();
+ if (wsrep_ready != x)
+ {
+ wsrep_ready= x;
+ mysql_cond_signal (&COND_wsrep_ready);
+ }
+ mysql_mutex_unlock (&LOCK_wsrep_ready);
+}
+
+// Wait until wsrep has reached ready state
+void wsrep_ready_wait ()
+{
+ if (mysql_mutex_lock (&LOCK_wsrep_ready)) abort();
+ while (!wsrep_ready)
+ {
+ WSREP_INFO("Waiting to reach ready state");
+ mysql_cond_wait (&COND_wsrep_ready, &LOCK_wsrep_ready);
+ }
+ WSREP_INFO("ready state reached");
+ mysql_mutex_unlock (&LOCK_wsrep_ready);
+}
+
+static void wsrep_synced_cb(void* app_ctx)
+{
+ WSREP_INFO("Synchronized with group, ready for connections");
+ bool signal_main= false;
+ if (mysql_mutex_lock (&LOCK_wsrep_ready)) abort();
+ if (!wsrep_ready)
+ {
+ wsrep_ready= TRUE;
+ mysql_cond_signal (&COND_wsrep_ready);
+ signal_main= true;
+
+ }
+ local_status.set(WSREP_MEMBER_SYNCED);
+ mysql_mutex_unlock (&LOCK_wsrep_ready);
+
+ if (signal_main)
+ {
+ wsrep_SE_init_grab();
+ // Signal mysqld init thread to continue
+ wsrep_sst_complete (&local_uuid, local_seqno, false);
+ // and wait for SE initialization
+ wsrep_SE_init_wait();
+ }
+ if (wsrep_restart_slave_activated)
+ {
+ int rcode;
+ WSREP_INFO("MySQL slave restart");
+ wsrep_restart_slave_activated= FALSE;
+
+ mysql_mutex_lock(&LOCK_active_mi);
+ if ((rcode = start_slave_threads(1 /* need mutex */,
+ 0 /* no wait for start*/,
+ active_mi,
+ master_info_file,
+ relay_log_info_file,
+ SLAVE_SQL)))
+ {
+ WSREP_WARN("Failed to create slave threads: %d", rcode);
+ }
+ mysql_mutex_unlock(&LOCK_active_mi);
+
+ }
+}
+
+static void wsrep_init_position()
+{
+ /* read XIDs from storage engines */
+ wsrep_uuid_t uuid;
+ wsrep_seqno_t seqno;
+ wsrep_get_SE_checkpoint(uuid, seqno);
+
+ if (!memcmp(&uuid, &WSREP_UUID_UNDEFINED, sizeof(wsrep_uuid_t)))
+ {
+ WSREP_INFO("Read nil XID from storage engines, skipping position init");
+ return;
+ }
+
+ char uuid_str[40] = {0, };
+ wsrep_uuid_print(&uuid, uuid_str, sizeof(uuid_str));
+ WSREP_INFO("Initial position: %s:%lld", uuid_str, (long long)seqno);
+
+ if (!memcmp(&local_uuid, &WSREP_UUID_UNDEFINED, sizeof(local_uuid)) &&
+ local_seqno == WSREP_SEQNO_UNDEFINED)
+ {
+ // Initial state
+ local_uuid= uuid;
+ local_seqno= seqno;
+ }
+ else if (memcmp(&local_uuid, &uuid, sizeof(local_uuid)) ||
+ local_seqno != seqno)
+ {
+ WSREP_WARN("Initial position was provided by configuration or SST, "
+ "avoiding override");
+ }
+}
+
+extern char* my_bind_addr_str;
+
+int wsrep_init()
+{
+ int rcode= -1;
+ DBUG_ASSERT(wsrep_inited == 0);
+
+ wsrep_ready_set(FALSE);
+ assert(wsrep_provider);
+
+ wsrep_init_position();
+
+ if ((rcode= wsrep_load(wsrep_provider, &wsrep, wsrep_log_cb)) != WSREP_OK)
+ {
+ if (strcasecmp(wsrep_provider, WSREP_NONE))
+ {
+ WSREP_ERROR("wsrep_load(%s) failed: %s (%d). Reverting to no provider.",
+ wsrep_provider, strerror(rcode), rcode);
+ strcpy((char*)wsrep_provider, WSREP_NONE); // damn it's a dirty hack
+ return wsrep_init();
+ }
+ else /* this is for recursive call above */
+ {
+ WSREP_ERROR("Could not revert to no provider: %s (%d). Need to abort.",
+ strerror(rcode), rcode);
+ unireg_abort(1);
+ }
+ }
+
+ if (!WSREP_PROVIDER_EXISTS)
+ {
+ // enable normal operation in case no provider is specified
+ wsrep_ready_set(TRUE);
+ wsrep_inited= 1;
+ global_system_variables.wsrep_on = 0;
+ wsrep_init_args args;
+ args.logger_cb = wsrep_log_cb;
+ args.options = (wsrep_provider_options) ?
+ wsrep_provider_options : "";
+ rcode = wsrep->init(wsrep, &args);
+ if (rcode)
+ {
+ DBUG_PRINT("wsrep",("wsrep::init() failed: %d", rcode));
+ WSREP_ERROR("wsrep::init() failed: %d, must shutdown", rcode);
+ wsrep->free(wsrep);
+ free(wsrep);
+ wsrep = NULL;
+ }
+ return rcode;
+ }
+ else
+ {
+ global_system_variables.wsrep_on = 1;
+ strncpy(provider_name,
+ wsrep->provider_name, sizeof(provider_name) - 1);
+ strncpy(provider_version,
+ wsrep->provider_version, sizeof(provider_version) - 1);
+ strncpy(provider_vendor,
+ wsrep->provider_vendor, sizeof(provider_vendor) - 1);
+ }
+
+ char node_addr[512]= { 0, };
+ size_t const node_addr_max= sizeof(node_addr) - 1;
+ if (!wsrep_node_address || !strcmp(wsrep_node_address, ""))
+ {
+ size_t const ret= wsrep_guess_ip(node_addr, node_addr_max);
+ if (!(ret > 0 && ret < node_addr_max))
+ {
+ WSREP_WARN("Failed to guess base node address. Set it explicitly via "
+ "wsrep_node_address.");
+ node_addr[0]= '\0';
+ }
+ }
+ else
+ {
+ strncpy(node_addr, wsrep_node_address, node_addr_max);
+ }
+
+ char inc_addr[512]= { 0, };
+ size_t const inc_addr_max= sizeof (inc_addr);
+ if ((!wsrep_node_incoming_address ||
+ !strcmp (wsrep_node_incoming_address, WSREP_NODE_INCOMING_AUTO)))
+ {
+ unsigned int my_bind_ip= INADDR_ANY; // default if not set
+ if (my_bind_addr_str && strlen(my_bind_addr_str))
+ {
+ my_bind_ip= wsrep_check_ip(my_bind_addr_str);
+ }
+
+ if (INADDR_ANY != my_bind_ip)
+ {
+ if (INADDR_NONE != my_bind_ip && INADDR_LOOPBACK != my_bind_ip)
+ {
+ snprintf(inc_addr, inc_addr_max, "%s:%u",
+ my_bind_addr_str, (int)mysqld_port);
+ } // else leave inc_addr an empty string - mysqld is not listening for
+ // client connections on network interfaces.
+ }
+ else // mysqld binds to 0.0.0.0, take IP from wsrep_node_address if possible
+ {
+ size_t const node_addr_len= strlen(node_addr);
+ if (node_addr_len > 0)
+ {
+ const char* const colon= strrchr(node_addr, ':');
+ if (strchr(node_addr, ':') == colon) // 1 or 0 ':'
+ {
+ size_t const ip_len= colon ? colon - node_addr : node_addr_len;
+ if (ip_len + 7 /* :55555\0 */ < inc_addr_max)
+ {
+ memcpy (inc_addr, node_addr, ip_len);
+ snprintf(inc_addr + ip_len, inc_addr_max - ip_len, ":%u",
+ (int)mysqld_port);
+ }
+ else
+ {
+ WSREP_WARN("Guessing address for incoming client connections: "
+ "address too long.");
+ inc_addr[0]= '\0';
+ }
+ }
+ else
+ {
+ WSREP_WARN("Guessing address for incoming client connections: "
+ "too many colons :) .");
+ inc_addr[0]= '\0';
+ }
+ }
+
+ if (!strlen(inc_addr))
+ {
+ WSREP_WARN("Guessing address for incoming client connections failed. "
+ "Try setting wsrep_node_incoming_address explicitly.");
+ }
+ }
+ }
+ else if (!strchr(wsrep_node_incoming_address, ':')) // no port included
+ {
+ if ((int)inc_addr_max <=
+ snprintf(inc_addr, inc_addr_max, "%s:%u",
+ wsrep_node_incoming_address,(int)mysqld_port))
+ {
+ WSREP_WARN("Guessing address for incoming client connections: "
+ "address too long.");
+ inc_addr[0]= '\0';
+ }
+ }
+ else
+ {
+ size_t const need = strlen (wsrep_node_incoming_address);
+ if (need >= inc_addr_max) {
+ WSREP_WARN("wsrep_node_incoming_address too long: %zu", need);
+ inc_addr[0]= '\0';
+ }
+ else {
+ memcpy (inc_addr, wsrep_node_incoming_address, need);
+ }
+ }
+
+ struct wsrep_init_args wsrep_args;
+
+ struct wsrep_gtid const state_id = { local_uuid, local_seqno };
+
+ wsrep_args.data_dir = wsrep_data_home_dir;
+ wsrep_args.node_name = (wsrep_node_name) ? wsrep_node_name : "";
+ wsrep_args.node_address = node_addr;
+ wsrep_args.node_incoming = inc_addr;
+ wsrep_args.options = (wsrep_provider_options) ?
+ wsrep_provider_options : "";
+ wsrep_args.proto_ver = wsrep_max_protocol_version;
+
+ wsrep_args.state_id = &state_id;
+
+ wsrep_args.logger_cb = wsrep_log_cb;
+ wsrep_args.view_handler_cb = wsrep_view_handler_cb;
+ wsrep_args.apply_cb = wsrep_apply_cb;
+ wsrep_args.commit_cb = wsrep_commit_cb;
+ wsrep_args.unordered_cb = wsrep_unordered_cb;
+ wsrep_args.sst_donate_cb = wsrep_sst_donate_cb;
+ wsrep_args.synced_cb = wsrep_synced_cb;
+
+ rcode = wsrep->init(wsrep, &wsrep_args);
+
+ if (rcode)
+ {
+ DBUG_PRINT("wsrep",("wsrep::init() failed: %d", rcode));
+ WSREP_ERROR("wsrep::init() failed: %d, must shutdown", rcode);
+ wsrep->free(wsrep);
+ free(wsrep);
+ wsrep = NULL;
+ } else {
+ wsrep_inited= 1;
+ }
+
+ return rcode;
+}
+
+extern int wsrep_on(void *);
+
+void wsrep_init_startup (bool first)
+{
+ if (wsrep_init()) unireg_abort(1);
+
+ wsrep_thr_lock_init(wsrep_thd_is_BF, wsrep_abort_thd,
+ wsrep_debug, wsrep_convert_LOCK_to_trx, wsrep_on);
+
+ /* Skip replication start if dummy wsrep provider is loaded */
+ if (!strcmp(wsrep_provider, WSREP_NONE)) return;
+
+ /* Skip replication start if no cluster address */
+ if (!wsrep_cluster_address || strlen(wsrep_cluster_address) == 0) return;
+
+ if (first) wsrep_sst_grab(); // do it so we can wait for SST below
+
+ if (!wsrep_start_replication()) unireg_abort(1);
+
+ wsrep_creating_startup_threads= 1;
+ wsrep_create_rollbacker();
+ wsrep_create_appliers(1);
+
+ if (first && !wsrep_sst_wait()) unireg_abort(1);// wait until SST is completed
+}
+
+
+void wsrep_deinit(bool free_options)
+{
+ DBUG_ASSERT(wsrep_inited == 1);
+ wsrep_unload(wsrep);
+ wsrep= 0;
+ provider_name[0]= '\0';
+ provider_version[0]= '\0';
+ provider_vendor[0]= '\0';
+
+ wsrep_inited= 0;
+
+ if (free_options)
+ {
+ wsrep_sst_auth_free();
+ }
+}
+
+void wsrep_recover()
+{
+ if (!memcmp(&local_uuid, &WSREP_UUID_UNDEFINED, sizeof(wsrep_uuid_t)) &&
+ local_seqno == -2)
+ {
+ char uuid_str[40];
+ wsrep_uuid_print(&local_uuid, uuid_str, sizeof(uuid_str));
+ WSREP_INFO("Position %s:%lld given at startup, skipping position recovery",
+ uuid_str, (long long)local_seqno);
+ return;
+ }
+ wsrep_uuid_t uuid;
+ wsrep_seqno_t seqno;
+ wsrep_get_SE_checkpoint(uuid, seqno);
+ char uuid_str[40];
+ wsrep_uuid_print(&uuid, uuid_str, sizeof(uuid_str));
+ WSREP_INFO("Recovered position: %s:%lld", uuid_str, (long long)seqno);
+}
+
+
+void wsrep_stop_replication(THD *thd)
+{
+ WSREP_INFO("Stop replication");
+ if (!wsrep)
+ {
+ WSREP_INFO("Provider was not loaded, in stop replication");
+ return;
+ }
+
+ /* disconnect from group first to get wsrep_ready == FALSE */
+ WSREP_DEBUG("Provider disconnect");
+ wsrep->disconnect(wsrep);
+
+ wsrep_connected= FALSE;
+
+ wsrep_close_client_connections(TRUE);
+
+ /* wait until appliers have stopped */
+ wsrep_wait_appliers_close(thd);
+
+ return;
+}
+
+
+bool wsrep_start_replication()
+{
+ wsrep_status_t rcode;
+
+ /*
+ if provider is trivial, don't even try to connect,
+ but resume local node operation
+ */
+ if (!WSREP_PROVIDER_EXISTS)
+ {
+ // enable normal operation in case no provider is specified
+ wsrep_ready_set(TRUE);
+ return true;
+ }
+
+ if (!wsrep_cluster_address || strlen(wsrep_cluster_address)== 0)
+ {
+ // if provider is non-trivial, but no address is specified, wait for address
+ wsrep_ready_set(FALSE);
+ return true;
+ }
+
+ bool const bootstrap= wsrep_new_cluster;
+
+ WSREP_INFO("Start replication");
+
+ if (wsrep_new_cluster)
+ {
+ WSREP_INFO("'wsrep-new-cluster' option used, bootstrapping the cluster");
+ wsrep_new_cluster= false;
+ }
+
+ if ((rcode = wsrep->connect(wsrep,
+ wsrep_cluster_name,
+ wsrep_cluster_address,
+ wsrep_sst_donor,
+ bootstrap)))
+ {
+ DBUG_PRINT("wsrep",("wsrep->connect(%s) failed: %d",
+ wsrep_cluster_address, rcode));
+ WSREP_ERROR("wsrep::connect(%s) failed: %d",
+ wsrep_cluster_address, rcode);
+ return false;
+ }
+ else
+ {
+ wsrep_connected= TRUE;
+
+ char* opts= wsrep->options_get(wsrep);
+ if (opts)
+ {
+ wsrep_provider_options_init(opts);
+ free(opts);
+ }
+ else
+ {
+ WSREP_WARN("Failed to get wsrep options");
+ }
+ }
+
+ return true;
+}
+
+bool wsrep_must_sync_wait (THD* thd, uint mask)
+{
+ return (thd->variables.wsrep_sync_wait & mask) &&
+ thd->variables.wsrep_on &&
+ !(thd->variables.wsrep_dirty_reads &&
+ !is_update_query(thd->lex->sql_command)) &&
+ !thd->in_active_multi_stmt_transaction() &&
+ thd->wsrep_conflict_state != REPLAYING &&
+ thd->wsrep_sync_wait_gtid.seqno == WSREP_SEQNO_UNDEFINED;
+}
+
+bool wsrep_sync_wait (THD* thd, uint mask)
+{
+ if (wsrep_must_sync_wait(thd, mask))
+ {
+ WSREP_DEBUG("wsrep_sync_wait: thd->variables.wsrep_sync_wait = %u, mask = %u",
+ thd->variables.wsrep_sync_wait, mask);
+ // This allows autocommit SELECTs and a first SELECT after SET AUTOCOMMIT=0
+ // TODO: modify to check if thd has locked any rows.
+ wsrep_status_t ret= wsrep->causal_read (wsrep, &thd->wsrep_sync_wait_gtid);
+
+ if (unlikely(WSREP_OK != ret))
+ {
+ const char* msg;
+ int err;
+
+ // Possibly relevant error codes:
+ // ER_CHECKREAD, ER_ERROR_ON_READ, ER_INVALID_DEFAULT, ER_EMPTY_QUERY,
+ // ER_FUNCTION_NOT_DEFINED, ER_NOT_ALLOWED_COMMAND, ER_NOT_SUPPORTED_YET,
+ // ER_FEATURE_DISABLED, ER_QUERY_INTERRUPTED
+
+ switch (ret)
+ {
+ case WSREP_NOT_IMPLEMENTED:
+ msg= "synchronous reads by wsrep backend. "
+ "Please unset wsrep_causal_reads variable.";
+ err= ER_NOT_SUPPORTED_YET;
+ break;
+ default:
+ msg= "Synchronous wait failed.";
+ err= ER_LOCK_WAIT_TIMEOUT; // NOTE: the above msg won't be displayed
+ // with ER_LOCK_WAIT_TIMEOUT
+ }
+
+ my_error(err, MYF(0), msg);
+
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/*
+ * Helpers to deal with TOI key arrays
+ */
+typedef struct wsrep_key_arr
+{
+ wsrep_key_t* keys;
+ size_t keys_len;
+} wsrep_key_arr_t;
+
+
+static void wsrep_keys_free(wsrep_key_arr_t* key_arr)
+{
+ for (size_t i= 0; i < key_arr->keys_len; ++i)
+ {
+ my_free((void*)key_arr->keys[i].key_parts);
+ }
+ my_free(key_arr->keys);
+ key_arr->keys= 0;
+ key_arr->keys_len= 0;
+}
+
+
+/*!
+ * @param db Database string
+ * @param table Table string
+ * @param key Array of wsrep_key_t
+ * @param key_len In: number of elements in key array, Out: number of
+ * elements populated
+ *
+ * @return true if preparation was successful, otherwise false.
+ */
+
+static bool wsrep_prepare_key_for_isolation(const char* db,
+ const char* table,
+ wsrep_buf_t* key,
+ size_t* key_len)
+{
+ if (*key_len < 2) return false;
+
+ switch (wsrep_protocol_version)
+ {
+ case 0:
+ *key_len= 0;
+ break;
+ case 1:
+ case 2:
+ case 3:
+ {
+ *key_len= 0;
+ if (db)
+ {
+ // sql_print_information("%s.%s", db, table);
+ if (db)
+ {
+ key[*key_len].ptr= db;
+ key[*key_len].len= strlen(db);
+ ++(*key_len);
+ if (table)
+ {
+ key[*key_len].ptr= table;
+ key[*key_len].len= strlen(table);
+ ++(*key_len);
+ }
+ }
+ }
+ break;
+ }
+ default:
+ return false;
+ }
+
+ return true;
+}
+
+/* Prepare key list from db/table and table_list */
+static bool wsrep_prepare_keys_for_isolation(THD* thd,
+ const char* db,
+ const char* table,
+ const TABLE_LIST* table_list,
+ wsrep_key_arr_t* ka)
+{
+ ka->keys= 0;
+ ka->keys_len= 0;
+
+ extern TABLE* find_temporary_table(THD*, const TABLE_LIST*);
+
+ if (db || table)
+ {
+ TABLE_LIST tmp_table;
+ MDL_request mdl_request;
+
+ memset(&tmp_table, 0, sizeof(tmp_table));
+ tmp_table.table_name= (char*)table;
+ tmp_table.db= (char*)db;
+ tmp_table.mdl_request.init(MDL_key::GLOBAL, (db) ? db : "",
+ (table) ? table : "",
+ MDL_INTENTION_EXCLUSIVE, MDL_STATEMENT);
+
+ if (!table || !find_temporary_table(thd, &tmp_table))
+ {
+ if (!(ka->keys= (wsrep_key_t*)my_malloc(sizeof(wsrep_key_t), MYF(0))))
+ {
+ WSREP_ERROR("Can't allocate memory for key_array");
+ goto err;
+ }
+ ka->keys_len= 1;
+ if (!(ka->keys[0].key_parts= (wsrep_buf_t*)
+ my_malloc(sizeof(wsrep_buf_t)*2, MYF(0))))
+ {
+ WSREP_ERROR("Can't allocate memory for key_parts");
+ goto err;
+ }
+ ka->keys[0].key_parts_num= 2;
+ if (!wsrep_prepare_key_for_isolation(
+ db, table,
+ (wsrep_buf_t*)ka->keys[0].key_parts,
+ &ka->keys[0].key_parts_num))
+ {
+ WSREP_ERROR("Preparing keys for isolation failed");
+ goto err;
+ }
+ }
+ }
+
+ for (const TABLE_LIST* table= table_list; table; table= table->next_global)
+ {
+ if (!find_temporary_table(thd, table))
+ {
+ wsrep_key_t* tmp;
+ tmp= (wsrep_key_t*)my_realloc(
+ ka->keys, (ka->keys_len + 1) * sizeof(wsrep_key_t),
+ MYF(MY_ALLOW_ZERO_PTR));
+
+ if (!tmp)
+ {
+ WSREP_ERROR("Can't allocate memory for key_array");
+ goto err;
+ }
+ ka->keys= tmp;
+ if (!(ka->keys[ka->keys_len].key_parts= (wsrep_buf_t*)
+ my_malloc(sizeof(wsrep_buf_t)*2, MYF(0))))
+ {
+ WSREP_ERROR("Can't allocate memory for key_parts");
+ goto err;
+ }
+ ka->keys[ka->keys_len].key_parts_num= 2;
+ ++ka->keys_len;
+ if (!wsrep_prepare_key_for_isolation(
+ table->db, table->table_name,
+ (wsrep_buf_t*)ka->keys[ka->keys_len - 1].key_parts,
+ &ka->keys[ka->keys_len - 1].key_parts_num))
+ {
+ WSREP_ERROR("Preparing keys for isolation failed");
+ goto err;
+ }
+ }
+ }
+ return true;
+err:
+ wsrep_keys_free(ka);
+ return false;
+}
+
+
+bool wsrep_prepare_key_for_innodb(const uchar* cache_key,
+ size_t cache_key_len,
+ const uchar* row_id,
+ size_t row_id_len,
+ wsrep_buf_t* key,
+ size_t* key_len)
+{
+ if (*key_len < 3) return false;
+
+ *key_len= 0;
+ switch (wsrep_protocol_version)
+ {
+ case 0:
+ {
+ key[0].ptr = cache_key;
+ key[0].len = cache_key_len;
+
+ *key_len = 1;
+ break;
+ }
+ case 1:
+ case 2:
+ case 3:
+ {
+ key[0].ptr = cache_key;
+ key[0].len = strlen( (char*)cache_key );
+
+ key[1].ptr = cache_key + strlen( (char*)cache_key ) + 1;
+ key[1].len = strlen( (char*)(key[1].ptr) );
+
+ *key_len = 2;
+ break;
+ }
+ default:
+ return false;
+ }
+
+ key[*key_len].ptr = row_id;
+ key[*key_len].len = row_id_len;
+ ++(*key_len);
+
+ return true;
+}
+
+
+/*
+ * Construct Query_log_Event from thd query and serialize it
+ * into buffer.
+ *
+ * Return 0 in case of success, 1 in case of error.
+ */
+int wsrep_to_buf_helper(
+ THD* thd, const char *query, uint query_len, uchar** buf, size_t* buf_len)
+{
+ IO_CACHE tmp_io_cache;
+ if (open_cached_file(&tmp_io_cache, mysql_tmpdir, TEMP_PREFIX,
+ 65536, MYF(MY_WME)))
+ return 1;
+ int ret(0);
+
+ Format_description_log_event *tmp_fd= new Format_description_log_event(4);
+ tmp_fd->checksum_alg= binlog_checksum_options;
+ tmp_fd->write(&tmp_io_cache);
+ delete tmp_fd;
+
+#ifdef GTID_SUPPORT
+ if (thd->variables.gtid_next.type == GTID_GROUP)
+ {
+ Gtid_log_event gtid_ev(thd, FALSE, &thd->variables.gtid_next);
+ if (!gtid_ev.is_valid()) ret= 0;
+ if (!ret && gtid_ev.write(&tmp_io_cache)) ret= 1;
+ }
+#endif /* GTID_SUPPORT */
+
+ /* if there is prepare query, add event for it */
+ if (!ret && thd->wsrep_TOI_pre_query)
+ {
+ Query_log_event ev(thd, thd->wsrep_TOI_pre_query,
+ thd->wsrep_TOI_pre_query_len,
+ FALSE, FALSE, FALSE, 0);
+ if (ev.write(&tmp_io_cache)) ret= 1;
+ }
+
+ /* continue to append the actual query */
+ Query_log_event ev(thd, query, query_len, FALSE, FALSE, FALSE, 0);
+ if (!ret && ev.write(&tmp_io_cache)) ret= 1;
+ if (!ret && wsrep_write_cache_buf(&tmp_io_cache, buf, buf_len)) ret= 1;
+ close_cached_file(&tmp_io_cache);
+ return ret;
+}
+
+#include "sql_show.h"
+static int
+create_view_query(THD *thd, uchar** buf, size_t* buf_len)
+{
+ LEX *lex= thd->lex;
+ SELECT_LEX *select_lex= &lex->select_lex;
+ TABLE_LIST *first_table= select_lex->table_list.first;
+ TABLE_LIST *views = first_table;
+
+ String buff;
+ const LEX_STRING command[3]=
+ {{ C_STRING_WITH_LEN("CREATE ") },
+ { C_STRING_WITH_LEN("ALTER ") },
+ { C_STRING_WITH_LEN("CREATE OR REPLACE ") }};
+
+ buff.append(command[thd->lex->create_view_mode].str,
+ command[thd->lex->create_view_mode].length);
+
+ LEX_USER *definer;
+
+ if (lex->definer)
+ {
+ definer= get_current_user(thd, lex->definer);
+ }
+ else
+ {
+ /*
+ DEFINER-clause is missing; we have to create default definer in
+ persistent arena to be PS/SP friendly.
+ If this is an ALTER VIEW then the current user should be set as
+ the definer.
+ */
+ definer= create_default_definer(thd, false);
+ }
+
+ if (definer)
+ {
+ views->definer.user = definer->user;
+ views->definer.host = definer->host;
+ } else {
+ WSREP_ERROR("Failed to get DEFINER for VIEW.");
+ return 1;
+ }
+
+ views->algorithm = lex->create_view_algorithm;
+ views->view_suid = lex->create_view_suid;
+ views->with_check = lex->create_view_check;
+
+ view_store_options4(thd, views, &buff, true);
+ buff.append(STRING_WITH_LEN("VIEW "));
+ /* Test if user supplied a db (ie: we did not use thd->db) */
+ if (views->db && views->db[0] &&
+ (thd->db == NULL || strcmp(views->db, thd->db)))
+ {
+ append_identifier(thd, &buff, views->db,
+ views->db_length);
+ buff.append('.');
+ }
+ append_identifier(thd, &buff, views->table_name,
+ views->table_name_length);
+ if (lex->view_list.elements)
+ {
+ List_iterator_fast<LEX_STRING> names(lex->view_list);
+ LEX_STRING *name;
+ int i;
+
+ for (i= 0; (name= names++); i++)
+ {
+ buff.append(i ? ", " : "(");
+ append_identifier(thd, &buff, name->str, name->length);
+ }
+ buff.append(')');
+ }
+ buff.append(STRING_WITH_LEN(" AS "));
+ //buff.append(views->source.str, views->source.length);
+ buff.append(thd->lex->create_view_select.str,
+ thd->lex->create_view_select.length);
+ //int errcode= query_error_code(thd, TRUE);
+ //if (thd->binlog_query(THD::STMT_QUERY_TYPE,
+ // buff.ptr(), buff.length(), FALSE, FALSE, FALSE, errcod
+ return wsrep_to_buf_helper(thd, buff.ptr(), buff.length(), buf, buf_len);
+}
+
+/*
+ returns:
+ 0: statement was replicated as TOI
+ 1: TOI replication was skipped
+ -1: TOI replication failed
+ */
+static int wsrep_TOI_begin(THD *thd, char *db_, char *table_,
+ const TABLE_LIST* table_list)
+{
+ wsrep_status_t ret(WSREP_WARNING);
+ uchar* buf(0);
+ size_t buf_len(0);
+ int buf_err;
+ int rc= 0;
+
+ WSREP_DEBUG("TO BEGIN: %lld, %d : %s", (long long)wsrep_thd_trx_seqno(thd),
+ thd->wsrep_exec_mode, thd->query() );
+ switch (thd->lex->sql_command)
+ {
+ case SQLCOM_CREATE_VIEW:
+ buf_err= create_view_query(thd, &buf, &buf_len);
+ break;
+ case SQLCOM_CREATE_PROCEDURE:
+ case SQLCOM_CREATE_SPFUNCTION:
+ buf_err= wsrep_create_sp(thd, &buf, &buf_len);
+ break;
+ case SQLCOM_CREATE_TRIGGER:
+ buf_err= wsrep_create_trigger_query(thd, &buf, &buf_len);
+ break;
+ case SQLCOM_CREATE_EVENT:
+ buf_err= wsrep_create_event_query(thd, &buf, &buf_len);
+ break;
+ case SQLCOM_ALTER_EVENT:
+ buf_err= wsrep_alter_event_query(thd, &buf, &buf_len);
+ break;
+ case SQLCOM_CREATE_ROLE:
+ if (sp_process_definer(thd))
+ {
+ WSREP_WARN("Failed to set CREATE ROLE definer for TOI.");
+ }
+ /* fallthrough */
+ default:
+ buf_err= wsrep_to_buf_helper(thd, thd->query(), thd->query_length(), &buf,
+ &buf_len);
+ break;
+ }
+
+ wsrep_key_arr_t key_arr= {0, 0};
+ struct wsrep_buf buff = { buf, buf_len };
+ if (!buf_err &&
+ wsrep_prepare_keys_for_isolation(thd, db_, table_, table_list, &key_arr)&&
+ key_arr.keys_len > 0 &&
+ WSREP_OK == (ret = wsrep->to_execute_start(wsrep, thd->thread_id,
+ key_arr.keys, key_arr.keys_len,
+ &buff, 1,
+ &thd->wsrep_trx_meta)))
+ {
+ thd->wsrep_exec_mode= TOTAL_ORDER;
+ wsrep_to_isolation++;
+ wsrep_keys_free(&key_arr);
+ WSREP_DEBUG("TO BEGIN: %lld, %d",(long long)wsrep_thd_trx_seqno(thd),
+ thd->wsrep_exec_mode);
+ }
+ else if (key_arr.keys_len > 0) {
+ /* jump to error handler in mysql_execute_command() */
+ WSREP_WARN("TO isolation failed for: %d, schema: %s, sql: %s. Check wsrep "
+ "connection state and retry the query.",
+ ret,
+ (thd->db ? thd->db : "(null)"),
+ (thd->query()) ? thd->query() : "void");
+ my_error(ER_LOCK_DEADLOCK, MYF(0), "WSREP replication failed. Check "
+ "your wsrep connection state and retry the query.");
+ wsrep_keys_free(&key_arr);
+ rc= -1;
+ }
+ else {
+ /* non replicated DDL, affecting temporary tables only */
+ WSREP_DEBUG("TO isolation skipped for: %d, sql: %s."
+ "Only temporary tables affected.",
+ ret, (thd->query()) ? thd->query() : "void");
+ rc= 1;
+ }
+ if (buf) my_free(buf);
+ return rc;
+}
+
+static void wsrep_TOI_end(THD *thd) {
+ wsrep_status_t ret;
+ wsrep_to_isolation--;
+
+ WSREP_DEBUG("TO END: %lld, %d : %s", (long long)wsrep_thd_trx_seqno(thd),
+ thd->wsrep_exec_mode, (thd->query()) ? thd->query() : "void");
+
+ wsrep_set_SE_checkpoint(thd->wsrep_trx_meta.gtid.uuid,
+ thd->wsrep_trx_meta.gtid.seqno);
+ WSREP_DEBUG("TO END: %lld, update seqno",
+ (long long)wsrep_thd_trx_seqno(thd));
+
+ if (WSREP_OK == (ret = wsrep->to_execute_end(wsrep, thd->thread_id))) {
+ WSREP_DEBUG("TO END: %lld", (long long)wsrep_thd_trx_seqno(thd));
+ }
+ else {
+ WSREP_WARN("TO isolation end failed for: %d, schema: %s, sql: %s",
+ ret,
+ (thd->db ? thd->db : "(null)"),
+ (thd->query()) ? thd->query() : "void");
+ }
+}
+
+static int wsrep_RSU_begin(THD *thd, char *db_, char *table_)
+{
+ wsrep_status_t ret(WSREP_WARNING);
+ WSREP_DEBUG("RSU BEGIN: %lld, %d : %s", (long long)wsrep_thd_trx_seqno(thd),
+ thd->wsrep_exec_mode, thd->query() );
+
+ ret = wsrep->desync(wsrep);
+ if (ret != WSREP_OK)
+ {
+ WSREP_WARN("RSU desync failed %d for schema: %s, query: %s",
+ ret, (thd->db ? thd->db : "(null)"), thd->query());
+ my_error(ER_LOCK_DEADLOCK, MYF(0));
+ return(ret);
+ }
+
+ mysql_mutex_lock(&LOCK_wsrep_replaying);
+ wsrep_replaying++;
+ mysql_mutex_unlock(&LOCK_wsrep_replaying);
+
+ if (wsrep_wait_committing_connections_close(5000))
+ {
+ /* no can do, bail out from DDL */
+ WSREP_WARN("RSU failed due to pending transactions, schema: %s, query %s",
+ (thd->db ? thd->db : "(null)"),
+ thd->query());
+ mysql_mutex_lock(&LOCK_wsrep_replaying);
+ wsrep_replaying--;
+ mysql_mutex_unlock(&LOCK_wsrep_replaying);
+
+ ret = wsrep->resync(wsrep);
+ if (ret != WSREP_OK)
+ {
+ WSREP_WARN("resync failed %d for schema: %s, query: %s",
+ ret, (thd->db ? thd->db : "(null)"), thd->query());
+ }
+
+ my_error(ER_LOCK_DEADLOCK, MYF(0));
+ return(1);
+ }
+
+ wsrep_seqno_t seqno = wsrep->pause(wsrep);
+ if (seqno == WSREP_SEQNO_UNDEFINED)
+ {
+ WSREP_WARN("pause failed %lld for schema: %s, query: %s", (long long)seqno,
+ (thd->db ? thd->db : "(null)"),
+ thd->query());
+ return(1);
+ }
+ WSREP_DEBUG("paused at %lld", (long long)seqno);
+ thd->variables.wsrep_on = 0;
+ return 0;
+}
+
+static void wsrep_RSU_end(THD *thd)
+{
+ wsrep_status_t ret(WSREP_WARNING);
+ WSREP_DEBUG("RSU END: %lld, %d : %s", (long long)wsrep_thd_trx_seqno(thd),
+ thd->wsrep_exec_mode, thd->query() );
+
+
+ mysql_mutex_lock(&LOCK_wsrep_replaying);
+ wsrep_replaying--;
+ mysql_mutex_unlock(&LOCK_wsrep_replaying);
+
+ ret = wsrep->resume(wsrep);
+ if (ret != WSREP_OK)
+ {
+ WSREP_WARN("resume failed %d for schema: %s, query: %s", ret,
+ (thd->db ? thd->db : "(null)"),
+ thd->query());
+ }
+
+ ret = wsrep->resync(wsrep);
+ if (ret != WSREP_OK)
+ {
+ WSREP_WARN("resync failed %d for schema: %s, query: %s", ret,
+ (thd->db ? thd->db : "(null)"), thd->query());
+ return;
+ }
+
+ thd->variables.wsrep_on = 1;
+}
+
+int wsrep_to_isolation_begin(THD *thd, char *db_, char *table_,
+ const TABLE_LIST* table_list)
+{
+
+ /*
+ No isolation for applier or replaying threads.
+ */
+ if (thd->wsrep_exec_mode == REPL_RECV) return 0;
+
+ int ret= 0;
+ mysql_mutex_lock(&thd->LOCK_wsrep_thd);
+
+ if (thd->wsrep_conflict_state == MUST_ABORT)
+ {
+ WSREP_INFO("thread: %lu, schema: %s, query: %s has been aborted due to multi-master conflict",
+ thd->thread_id,
+ (thd->db ? thd->db : "(null)"),
+ thd->query());
+ mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
+ return WSREP_TRX_FAIL;
+ }
+ mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
+
+ DBUG_ASSERT(thd->wsrep_exec_mode == LOCAL_STATE);
+ DBUG_ASSERT(thd->wsrep_trx_meta.gtid.seqno == WSREP_SEQNO_UNDEFINED);
+
+ if (thd->global_read_lock.can_acquire_protection())
+ {
+ WSREP_DEBUG("Aborting TOI: Global Read-Lock (FTWRL) in place: %s %lu",
+ thd->query(), thd->thread_id);
+ return -1;
+ }
+
+ if (wsrep_debug && thd->mdl_context.has_locks())
+ {
+ WSREP_DEBUG("thread holds MDL locks at TI begin: %s %lu",
+ thd->query(), thd->thread_id);
+ }
+
+ /*
+ It makes sense to set auto_increment_* to defaults in TOI operations.
+ Must be done before wsrep_TOI_begin() since Query_log_event encapsulating
+ TOI statement and auto inc variables for wsrep replication is constructed
+ there. Variables are reset back in THD::reset_for_next_command() before
+ processing of next command.
+ */
+ if (wsrep_auto_increment_control)
+ {
+ thd->variables.auto_increment_offset = 1;
+ thd->variables.auto_increment_increment = 1;
+ }
+
+ if (thd->variables.wsrep_on && thd->wsrep_exec_mode==LOCAL_STATE)
+ {
+ switch (thd->variables.wsrep_OSU_method) {
+ case WSREP_OSU_TOI: ret = wsrep_TOI_begin(thd, db_, table_,
+ table_list); break;
+ case WSREP_OSU_RSU: ret = wsrep_RSU_begin(thd, db_, table_); break;
+ default:
+ WSREP_ERROR("Unsupported OSU method: %lu",
+ thd->variables.wsrep_OSU_method);
+ ret= -1;
+ break;
+ }
+ switch (ret) {
+ case 0: thd->wsrep_exec_mode= TOTAL_ORDER; break;
+ case 1:
+ /* TOI replication skipped, treat as success */
+ ret = 0;
+ break;
+ case -1:
+ /* TOI replication failed, treat as error */
+ break;
+ }
+ }
+ return ret;
+}
+
+void wsrep_to_isolation_end(THD *thd)
+{
+ if (thd->wsrep_exec_mode == TOTAL_ORDER)
+ {
+ switch(thd->variables.wsrep_OSU_method)
+ {
+ case WSREP_OSU_TOI: wsrep_TOI_end(thd); break;
+ case WSREP_OSU_RSU: wsrep_RSU_end(thd); break;
+ default:
+ WSREP_WARN("Unsupported wsrep OSU method at isolation end: %lu",
+ thd->variables.wsrep_OSU_method);
+ break;
+ }
+ wsrep_cleanup_transaction(thd);
+ }
+}
+
+#define WSREP_MDL_LOG(severity, msg, schema, schema_len, req, gra) \
+ WSREP_##severity( \
+ "%s\n" \
+ "schema: %.*s\n" \
+ "request: (%lu \tseqno %lld \twsrep (%d, %d, %d) cmd %d %d \t%s)\n" \
+ "granted: (%lu \tseqno %lld \twsrep (%d, %d, %d) cmd %d %d \t%s)", \
+ msg, schema_len, schema, \
+ req->thread_id, (long long)wsrep_thd_trx_seqno(req), \
+ req->wsrep_exec_mode, req->wsrep_query_state, req->wsrep_conflict_state, \
+ req->get_command(), req->lex->sql_command, req->query(), \
+ gra->thread_id, (long long)wsrep_thd_trx_seqno(gra), \
+ gra->wsrep_exec_mode, gra->wsrep_query_state, gra->wsrep_conflict_state, \
+ gra->get_command(), gra->lex->sql_command, gra->query());
+
+/**
+ Check if request for the metadata lock should be granted to the requester.
+
+ @param requestor_ctx The MDL context of the requestor
+ @param ticket MDL ticket for the requested lock
+
+ @retval TRUE Lock request can be granted
+ @retval FALSE Lock request cannot be granted
+*/
+
+bool
+wsrep_grant_mdl_exception(MDL_context *requestor_ctx,
+ MDL_ticket *ticket,
+ const MDL_key *key
+) {
+ /* Fallback to the non-wsrep behaviour */
+ if (!WSREP_ON) return FALSE;
+
+ THD *request_thd = requestor_ctx->get_thd();
+ THD *granted_thd = ticket->get_ctx()->get_thd();
+ bool ret = FALSE;
+
+ const char* schema= key->db_name();
+ int schema_len= key->db_name_length();
+
+ mysql_mutex_lock(&request_thd->LOCK_wsrep_thd);
+ if (request_thd->wsrep_exec_mode == TOTAL_ORDER ||
+ request_thd->wsrep_exec_mode == REPL_RECV)
+ {
+ mysql_mutex_unlock(&request_thd->LOCK_wsrep_thd);
+ WSREP_MDL_LOG(DEBUG, "MDL conflict ", schema, schema_len,
+ request_thd, granted_thd);
+ ticket->wsrep_report(wsrep_debug);
+
+ mysql_mutex_lock(&granted_thd->LOCK_wsrep_thd);
+ if (granted_thd->wsrep_exec_mode == TOTAL_ORDER ||
+ granted_thd->wsrep_exec_mode == REPL_RECV)
+ {
+ WSREP_MDL_LOG(INFO, "MDL BF-BF conflict", schema, schema_len,
+ request_thd, granted_thd);
+ ticket->wsrep_report(true);
+ mysql_mutex_unlock(&granted_thd->LOCK_wsrep_thd);
+ ret = TRUE;
+ }
+ else if (granted_thd->lex->sql_command == SQLCOM_FLUSH ||
+ granted_thd->mdl_context.wsrep_has_explicit_locks())
+ {
+ WSREP_DEBUG("BF thread waiting for FLUSH");
+ ticket->wsrep_report(wsrep_debug);
+ mysql_mutex_unlock(&granted_thd->LOCK_wsrep_thd);
+ ret = FALSE;
+ }
+ else if (request_thd->lex->sql_command == SQLCOM_DROP_TABLE)
+ {
+ WSREP_DEBUG("DROP caused BF abort");
+ ticket->wsrep_report(wsrep_debug);
+ mysql_mutex_unlock(&granted_thd->LOCK_wsrep_thd);
+ wsrep_abort_thd((void*)request_thd, (void*)granted_thd, 1);
+ ret = FALSE;
+ }
+ else if (granted_thd->wsrep_query_state == QUERY_COMMITTING)
+ {
+ WSREP_DEBUG("mdl granted, but commiting thd abort scheduled");
+ ticket->wsrep_report(wsrep_debug);
+ mysql_mutex_unlock(&granted_thd->LOCK_wsrep_thd);
+ wsrep_abort_thd((void*)request_thd, (void*)granted_thd, 1);
+ ret = FALSE;
+ }
+ else
+ {
+ WSREP_MDL_LOG(DEBUG, "MDL conflict-> BF abort", schema, schema_len,
+ request_thd, granted_thd);
+ ticket->wsrep_report(wsrep_debug);
+ mysql_mutex_unlock(&granted_thd->LOCK_wsrep_thd);
+ wsrep_abort_thd((void*)request_thd, (void*)granted_thd, 1);
+ ret = FALSE;
+ }
+ }
+ else
+ {
+ mysql_mutex_unlock(&request_thd->LOCK_wsrep_thd);
+ }
+ return ret;
+}
+
+bool wsrep_node_is_donor()
+{
+ return (WSREP_ON) ? (local_status.get() == 2) : false;
+}
+bool wsrep_node_is_synced()
+{
+ return (WSREP_ON) ? (local_status.get() == 4) : false;
+}
diff --git a/sql/wsrep_mysqld.h b/sql/wsrep_mysqld.h
new file mode 100644
index 00000000000..5ec183f7186
--- /dev/null
+++ b/sql/wsrep_mysqld.h
@@ -0,0 +1,338 @@
+/* Copyright 2008-2015 Codership Oy <http://www.codership.com>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#ifndef WSREP_MYSQLD_H
+#define WSREP_MYSQLD_H
+
+#include "mysqld.h"
+typedef struct st_mysql_show_var SHOW_VAR;
+#include <sql_priv.h>
+//#include "rpl_gtid.h"
+#include "../wsrep/wsrep_api.h"
+
+#define WSREP_UNDEFINED_TRX_ID ULONGLONG_MAX
+
+class set_var;
+class THD;
+
+enum wsrep_exec_mode {
+ /* Transaction processing before replication. */
+ LOCAL_STATE,
+ /* Slave thread applying write sets from other nodes or replaying thread. */
+ REPL_RECV,
+ /* Total-order-isolation mode */
+ TOTAL_ORDER,
+ /*
+ Transaction procession after it has been replicated in prepare stage and
+ has passed certification
+ */
+ LOCAL_COMMIT
+};
+
+enum wsrep_query_state {
+ QUERY_IDLE,
+ QUERY_EXEC,
+ QUERY_COMMITTING,
+ QUERY_EXITING,
+ QUERY_ROLLINGBACK,
+};
+
+enum wsrep_conflict_state {
+ NO_CONFLICT,
+ MUST_ABORT,
+ ABORTING,
+ ABORTED,
+ MUST_REPLAY,
+ REPLAYING,
+ RETRY_AUTOCOMMIT,
+ CERT_FAILURE,
+};
+
+enum wsrep_consistency_check_mode {
+ NO_CONSISTENCY_CHECK,
+ CONSISTENCY_CHECK_DECLARED,
+ CONSISTENCY_CHECK_RUNNING,
+};
+
+
+// Global wsrep parameters
+extern wsrep_t* wsrep;
+
+// MySQL wsrep options
+extern const char* wsrep_provider;
+extern const char* wsrep_provider_options;
+extern const char* wsrep_cluster_name;
+extern const char* wsrep_cluster_address;
+extern const char* wsrep_node_name;
+extern const char* wsrep_node_address;
+extern const char* wsrep_node_incoming_address;
+extern const char* wsrep_data_home_dir;
+extern const char* wsrep_dbug_option;
+extern long wsrep_slave_threads;
+extern int wsrep_slave_count_change;
+extern MYSQL_PLUGIN_IMPORT my_bool wsrep_debug;
+extern my_bool wsrep_convert_LOCK_to_trx;
+extern ulong wsrep_retry_autocommit;
+extern my_bool wsrep_auto_increment_control;
+extern my_bool wsrep_drupal_282555_workaround;
+extern my_bool wsrep_incremental_data_collection;
+extern bool wsrep_dirty_reads;
+extern const char* wsrep_start_position;
+extern ulong wsrep_max_ws_size;
+extern ulong wsrep_max_ws_rows;
+extern const char* wsrep_notify_cmd;
+extern my_bool wsrep_certify_nonPK;
+extern long wsrep_max_protocol_version;
+extern long wsrep_protocol_version;
+extern ulong wsrep_forced_binlog_format;
+extern my_bool wsrep_desync;
+extern my_bool wsrep_recovery;
+extern my_bool wsrep_replicate_myisam;
+extern my_bool wsrep_log_conflicts;
+extern ulong wsrep_mysql_replication_bundle;
+extern my_bool wsrep_load_data_splitting;
+extern my_bool wsrep_restart_slave;
+extern my_bool wsrep_restart_slave_activated;
+extern my_bool wsrep_slave_FK_checks;
+extern my_bool wsrep_slave_UK_checks;
+extern bool wsrep_new_cluster; // bootstrap the cluster ?
+extern my_bool wsrep_creating_startup_threads;
+
+enum enum_wsrep_OSU_method {
+ WSREP_OSU_TOI,
+ WSREP_OSU_RSU,
+ WSREP_OSU_NONE,
+};
+
+enum enum_wsrep_sync_wait {
+ WSREP_SYNC_WAIT_NONE = 0x0,
+ // show, select, begin
+ WSREP_SYNC_WAIT_BEFORE_READ = 0x1,
+ WSREP_SYNC_WAIT_BEFORE_UPDATE_DELETE = 0x2,
+ WSREP_SYNC_WAIT_BEFORE_INSERT_REPLACE = 0x4,
+ WSREP_SYNC_WAIT_MAX = 0x7
+};
+
+// MySQL status variables
+extern my_bool wsrep_connected;
+extern my_bool wsrep_ready;
+extern const char* wsrep_cluster_state_uuid;
+extern long long wsrep_cluster_conf_id;
+extern const char* wsrep_cluster_status;
+extern long wsrep_cluster_size;
+extern long wsrep_local_index;
+extern long long wsrep_local_bf_aborts;
+extern const char* wsrep_provider_name;
+extern const char* wsrep_provider_version;
+extern const char* wsrep_provider_vendor;
+
+int wsrep_show_status(THD *thd, SHOW_VAR *var, char *buff);
+void wsrep_free_status(THD *thd);
+
+int wsrep_init();
+void wsrep_deinit(bool free_options);
+void wsrep_recover();
+bool wsrep_before_SE(); // initialize wsrep before storage
+ // engines (true) or after (false)
+/* wsrep initialization sequence at startup
+ * @param before wsrep_before_SE() value */
+void wsrep_init_startup(bool before);
+
+// Other wsrep global variables
+extern my_bool wsrep_inited; // whether wsrep is initialized ?
+
+extern "C" enum wsrep_exec_mode wsrep_thd_exec_mode(THD *thd);
+extern "C" enum wsrep_conflict_state wsrep_thd_conflict_state(THD *thd);
+extern "C" enum wsrep_query_state wsrep_thd_query_state(THD *thd);
+extern "C" const char * wsrep_thd_exec_mode_str(THD *thd);
+extern "C" const char * wsrep_thd_conflict_state_str(THD *thd);
+extern "C" const char * wsrep_thd_query_state_str(THD *thd);
+extern "C" wsrep_ws_handle_t* wsrep_thd_ws_handle(THD *thd);
+
+extern "C" void wsrep_thd_set_exec_mode(THD *thd, enum wsrep_exec_mode mode);
+extern "C" void wsrep_thd_set_query_state(
+ THD *thd, enum wsrep_query_state state);
+extern "C" void wsrep_thd_set_conflict_state(
+ THD *thd, enum wsrep_conflict_state state);
+
+extern "C" void wsrep_thd_set_trx_to_replay(THD *thd, uint64 trx_id);
+
+extern "C" void wsrep_thd_LOCK(THD *thd);
+extern "C" void wsrep_thd_UNLOCK(THD *thd);
+extern "C" uint32 wsrep_thd_wsrep_rand(THD *thd);
+extern "C" time_t wsrep_thd_query_start(THD *thd);
+extern "C" my_thread_id wsrep_thd_thread_id(THD *thd);
+extern "C" int64_t wsrep_thd_trx_seqno(THD *thd);
+extern "C" query_id_t wsrep_thd_query_id(THD *thd);
+extern "C" char * wsrep_thd_query(THD *thd);
+extern "C" query_id_t wsrep_thd_wsrep_last_query_id(THD *thd);
+extern "C" void wsrep_thd_set_wsrep_last_query_id(THD *thd, query_id_t id);
+extern "C" void wsrep_thd_awake(THD *thd, my_bool signal);
+extern "C" int wsrep_thd_retry_counter(THD *thd);
+extern "C" bool wsrep_thd_skip_append_keys(THD *thd);
+
+extern void wsrep_close_client_connections(my_bool wait_to_end);
+extern int wsrep_wait_committing_connections_close(int wait_time);
+extern void wsrep_close_applier(THD *thd);
+extern void wsrep_wait_appliers_close(THD *thd);
+extern void wsrep_close_applier_threads(int count);
+extern void wsrep_kill_mysql(THD *thd);
+
+/* new defines */
+extern void wsrep_stop_replication(THD *thd);
+extern bool wsrep_start_replication();
+extern bool wsrep_must_sync_wait (THD* thd, uint mask = WSREP_SYNC_WAIT_BEFORE_READ);
+extern bool wsrep_sync_wait (THD* thd, uint mask = WSREP_SYNC_WAIT_BEFORE_READ);
+extern int wsrep_check_opts (int argc, char* const* argv);
+extern void wsrep_prepend_PATH (const char* path);
+
+/* Other global variables */
+extern wsrep_seqno_t wsrep_locked_seqno;
+
+#define WSREP_ON \
+ ((global_system_variables.wsrep_on) && \
+ wsrep_provider && \
+ strcmp(wsrep_provider, WSREP_NONE))
+
+#define WSREP(thd) \
+ (WSREP_ON && wsrep && (thd && thd->variables.wsrep_on))
+
+#define WSREP_CLIENT(thd) \
+ (WSREP(thd) && thd->wsrep_client_thread)
+
+#define WSREP_EMULATE_BINLOG(thd) \
+ (WSREP(thd) && wsrep_emulate_bin_log)
+
+// MySQL logging functions don't seem to understand long long length modifer.
+// This is a workaround. It also prefixes all messages with "WSREP"
+#define WSREP_LOG(fun, ...) \
+ { \
+ char msg[1024] = {'\0'}; \
+ snprintf(msg, sizeof(msg) - 1, ## __VA_ARGS__); \
+ fun("WSREP: %s", msg); \
+ }
+
+#define WSREP_DEBUG(...) \
+ if (wsrep_debug) WSREP_LOG(sql_print_information, ##__VA_ARGS__)
+#define WSREP_INFO(...) WSREP_LOG(sql_print_information, ##__VA_ARGS__)
+#define WSREP_WARN(...) WSREP_LOG(sql_print_warning, ##__VA_ARGS__)
+#define WSREP_ERROR(...) WSREP_LOG(sql_print_error, ##__VA_ARGS__)
+
+#define WSREP_LOG_CONFLICT_THD(thd, role) \
+ WSREP_LOG(sql_print_information, \
+ "%s: \n " \
+ " THD: %lu, mode: %s, state: %s, conflict: %s, seqno: %lld\n " \
+ " SQL: %s", \
+ role, wsrep_thd_thread_id(thd), wsrep_thd_exec_mode_str(thd), \
+ wsrep_thd_query_state_str(thd), \
+ wsrep_thd_conflict_state_str(thd), (long long)wsrep_thd_trx_seqno(thd), \
+ wsrep_thd_query(thd) \
+ );
+
+#define WSREP_LOG_CONFLICT(bf_thd, victim_thd, bf_abort) \
+ if (wsrep_debug || wsrep_log_conflicts) \
+ { \
+ WSREP_LOG(sql_print_information, "cluster conflict due to %s for threads:",\
+ (bf_abort) ? "high priority abort" : "certification failure" \
+ ); \
+ if (bf_thd != NULL) WSREP_LOG_CONFLICT_THD(bf_thd, "Winning thread"); \
+ if (victim_thd) WSREP_LOG_CONFLICT_THD(victim_thd, "Victim thread"); \
+ }
+
+#define WSREP_PROVIDER_EXISTS \
+ (wsrep_provider && strncasecmp(wsrep_provider, WSREP_NONE, FN_REFLEN))
+
+extern void wsrep_ready_wait();
+
+enum wsrep_trx_status {
+ WSREP_TRX_OK,
+ WSREP_TRX_CERT_FAIL, /* certification failure, must abort */
+ WSREP_TRX_SIZE_EXCEEDED, /* trx size exceeded */
+ WSREP_TRX_ERROR, /* native mysql error */
+};
+
+extern enum wsrep_trx_status
+wsrep_run_wsrep_commit(THD *thd, handlerton *hton, bool all);
+class Ha_trx_info;
+struct THD_TRANS;
+void wsrep_register_hton(THD* thd, bool all);
+void wsrep_post_commit(THD* thd, bool all);
+void wsrep_brute_force_killer(THD *thd);
+int wsrep_hire_brute_force_killer(THD *thd, uint64_t trx_id);
+
+extern "C" bool wsrep_consistency_check(void *thd_ptr);
+
+/* this is visible for client build so that innodb plugin gets this */
+typedef struct wsrep_aborting_thd {
+ struct wsrep_aborting_thd *next;
+ THD *aborting_thd;
+} *wsrep_aborting_thd_t;
+
+extern mysql_mutex_t LOCK_wsrep_ready;
+extern mysql_cond_t COND_wsrep_ready;
+extern mysql_mutex_t LOCK_wsrep_sst;
+extern mysql_cond_t COND_wsrep_sst;
+extern mysql_mutex_t LOCK_wsrep_sst_init;
+extern mysql_cond_t COND_wsrep_sst_init;
+extern mysql_mutex_t LOCK_wsrep_rollback;
+extern mysql_cond_t COND_wsrep_rollback;
+extern int wsrep_replaying;
+extern mysql_mutex_t LOCK_wsrep_replaying;
+extern mysql_cond_t COND_wsrep_replaying;
+extern mysql_mutex_t LOCK_wsrep_slave_threads;
+extern mysql_mutex_t LOCK_wsrep_desync;
+extern wsrep_aborting_thd_t wsrep_aborting_thd;
+extern my_bool wsrep_emulate_bin_log;
+extern int wsrep_to_isolation;
+#ifdef GTID_SUPPORT
+extern rpl_sidno wsrep_sidno;
+#endif /* GTID_SUPPORT */
+extern my_bool wsrep_preordered_opt;
+
+#ifdef HAVE_PSI_INTERFACE
+extern PSI_mutex_key key_LOCK_wsrep_ready;
+extern PSI_mutex_key key_COND_wsrep_ready;
+extern PSI_mutex_key key_LOCK_wsrep_sst;
+extern PSI_cond_key key_COND_wsrep_sst;
+extern PSI_mutex_key key_LOCK_wsrep_sst_init;
+extern PSI_cond_key key_COND_wsrep_sst_init;
+extern PSI_mutex_key key_LOCK_wsrep_sst_thread;
+extern PSI_cond_key key_COND_wsrep_sst_thread;
+extern PSI_mutex_key key_LOCK_wsrep_rollback;
+extern PSI_cond_key key_COND_wsrep_rollback;
+extern PSI_mutex_key key_LOCK_wsrep_replaying;
+extern PSI_cond_key key_COND_wsrep_replaying;
+extern PSI_mutex_key key_LOCK_wsrep_slave_threads;
+extern PSI_mutex_key key_LOCK_wsrep_desync;
+#endif /* HAVE_PSI_INTERFACE */
+struct TABLE_LIST;
+int wsrep_to_isolation_begin(THD *thd, char *db_, char *table_,
+ const TABLE_LIST* table_list);
+void wsrep_to_isolation_end(THD *thd);
+void wsrep_cleanup_transaction(THD *thd);
+int wsrep_to_buf_helper(
+ THD* thd, const char *query, uint query_len, uchar** buf, size_t* buf_len);
+int wsrep_create_sp(THD *thd, uchar** buf, size_t* buf_len);
+int wsrep_create_trigger_query(THD *thd, uchar** buf, size_t* buf_len);
+int wsrep_create_event_query(THD *thd, uchar** buf, size_t* buf_len);
+int wsrep_alter_event_query(THD *thd, uchar** buf, size_t* buf_len);
+
+#ifdef GTID_SUPPORT
+void wsrep_init_sidno(const wsrep_uuid_t&);
+#endif /* GTID_SUPPORT */
+
+bool wsrep_node_is_donor();
+bool wsrep_node_is_synced();
+#endif /* WSREP_MYSQLD_H */
diff --git a/sql/wsrep_notify.cc b/sql/wsrep_notify.cc
new file mode 100644
index 00000000000..e7d30d5a9c1
--- /dev/null
+++ b/sql/wsrep_notify.cc
@@ -0,0 +1,111 @@
+/* Copyright 2010 Codership Oy <http://www.codership.com>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#include <mysqld.h>
+#include "wsrep_priv.h"
+#include "wsrep_utils.h"
+
+const char* wsrep_notify_cmd="";
+
+static const char* _status_str(wsrep_member_status_t status)
+{
+ switch (status)
+ {
+ case WSREP_MEMBER_UNDEFINED: return "Undefined";
+ case WSREP_MEMBER_JOINER: return "Joiner";
+ case WSREP_MEMBER_DONOR: return "Donor";
+ case WSREP_MEMBER_JOINED: return "Joined";
+ case WSREP_MEMBER_SYNCED: return "Synced";
+ default: return "Error(?)";
+ }
+}
+
+void wsrep_notify_status (wsrep_member_status_t status,
+ const wsrep_view_info_t* view)
+{
+ if (!wsrep_notify_cmd || 0 == strlen(wsrep_notify_cmd))
+ {
+ WSREP_INFO("wsrep_notify_cmd is not defined, skipping notification.");
+ return;
+ }
+
+ char cmd_buf[1 << 16]; // this can be long
+ long cmd_len = sizeof(cmd_buf) - 1;
+ char* cmd_ptr = cmd_buf;
+ long cmd_off = 0;
+
+ cmd_off += snprintf (cmd_ptr + cmd_off, cmd_len - cmd_off, "%s",
+ wsrep_notify_cmd);
+
+ if (status >= WSREP_MEMBER_UNDEFINED && status < WSREP_MEMBER_ERROR)
+ {
+ cmd_off += snprintf (cmd_ptr + cmd_off, cmd_len - cmd_off, " --status %s",
+ _status_str(status));
+ }
+ else
+ {
+ /* here we preserve provider error codes */
+ cmd_off += snprintf (cmd_ptr + cmd_off, cmd_len - cmd_off,
+ " --status 'Error(%d)'", status);
+ }
+
+ if (0 != view)
+ {
+ char uuid_str[40];
+
+ wsrep_uuid_print (&view->state_id.uuid, uuid_str, sizeof(uuid_str));
+ cmd_off += snprintf (cmd_ptr + cmd_off, cmd_len - cmd_off,
+ " --uuid %s", uuid_str);
+
+ cmd_off += snprintf (cmd_ptr + cmd_off, cmd_len - cmd_off,
+ " --primary %s", view->view >= 0 ? "yes" : "no");
+
+ cmd_off += snprintf (cmd_ptr + cmd_off, cmd_len - cmd_off,
+ " --index %d", view->my_idx);
+
+ if (view->memb_num)
+ {
+ cmd_off += snprintf (cmd_ptr + cmd_off, cmd_len - cmd_off, " --members");
+
+ for (int i = 0; i < view->memb_num; i++)
+ {
+ wsrep_uuid_print (&view->members[i].id, uuid_str, sizeof(uuid_str));
+ cmd_off += snprintf (cmd_ptr + cmd_off, cmd_len - cmd_off,
+ "%c%s/%s/%s", i > 0 ? ',' : ' ',
+ uuid_str, view->members[i].name,
+ view->members[i].incoming);
+ }
+ }
+ }
+
+ if (cmd_off == cmd_len)
+ {
+ WSREP_ERROR("Notification buffer too short (%ld). Aborting notification.",
+ cmd_len);
+ return;
+ }
+
+ wsp::process p(cmd_ptr, "r", NULL);
+
+ p.wait();
+ int err = p.error();
+
+ if (err)
+ {
+ WSREP_ERROR("Notification command failed: %d (%s): \"%s\"",
+ err, strerror(err), cmd_ptr);
+ }
+}
+
diff --git a/sql/wsrep_priv.h b/sql/wsrep_priv.h
new file mode 100644
index 00000000000..30dce78c1a4
--- /dev/null
+++ b/sql/wsrep_priv.h
@@ -0,0 +1,51 @@
+/* Copyright 2010 Codership Oy <http://www.codership.com>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+//! @file declares symbols private to wsrep integration layer
+
+#ifndef WSREP_PRIV_H
+#define WSREP_PRIV_H
+
+#include "wsrep_mysqld.h"
+#include "../wsrep/wsrep_api.h"
+
+#include <log.h>
+#include <pthread.h>
+#include <cstdio>
+
+void wsrep_ready_set (my_bool x);
+
+ssize_t wsrep_sst_prepare (void** msg);
+wsrep_cb_status wsrep_sst_donate_cb (void* app_ctx,
+ void* recv_ctx,
+ const void* msg, size_t msg_len,
+ const wsrep_gtid_t* state_id,
+ const char* state, size_t state_len,
+ bool bypass);
+
+extern wsrep_uuid_t local_uuid;
+extern wsrep_seqno_t local_seqno;
+
+// a helper function
+void wsrep_sst_received(wsrep_t*, const wsrep_uuid_t&, wsrep_seqno_t,
+ const void*, size_t);
+/*! SST thread signals init thread about sst completion */
+void wsrep_sst_complete(const wsrep_uuid_t*, wsrep_seqno_t, bool);
+
+void wsrep_notify_status (wsrep_member_status_t new_status,
+ const wsrep_view_info_t* view = 0);
+
+#endif /* WSREP_PRIV_H */
diff --git a/sql/wsrep_sst.cc b/sql/wsrep_sst.cc
new file mode 100644
index 00000000000..11698089582
--- /dev/null
+++ b/sql/wsrep_sst.cc
@@ -0,0 +1,1224 @@
+/* Copyright 2008-2015 Codership Oy <http://www.codership.com>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#include "wsrep_sst.h"
+
+#include <mysqld.h>
+#include <m_ctype.h>
+#include <my_sys.h>
+#include <strfunc.h>
+#include <sql_class.h>
+#include <set_var.h>
+#include <sql_acl.h>
+#include <sql_reload.h>
+#include <sql_parse.h>
+#include "wsrep_priv.h"
+#include "wsrep_utils.h"
+#include "wsrep_xid.h"
+#include <cstdio>
+#include <cstdlib>
+
+extern const char wsrep_defaults_file[];
+extern const char wsrep_defaults_group_suffix[];
+
+const char* wsrep_sst_method = WSREP_SST_DEFAULT;
+const char* wsrep_sst_receive_address = WSREP_SST_ADDRESS_AUTO;
+const char* wsrep_sst_donor = "";
+ char* wsrep_sst_auth = NULL;
+
+// container for real auth string
+static const char* sst_auth_real = NULL;
+my_bool wsrep_sst_donor_rejects_queries = FALSE;
+
+bool wsrep_sst_method_check (sys_var *self, THD* thd, set_var* var)
+{
+ if ((! var->save_result.string_value.str) ||
+ (var->save_result.string_value.length == 0 ))
+ {
+ my_error(ER_WRONG_VALUE_FOR_VAR, MYF(0), var->var->name.str,
+ var->save_result.string_value.str ?
+ var->save_result.string_value.str : "NULL");
+ return 1;
+ }
+
+ return 0;
+}
+
+bool wsrep_sst_method_update (sys_var *self, THD* thd, enum_var_type type)
+{
+ return 0;
+}
+
+// TODO: Improve address verification.
+static bool sst_receive_address_check (const char* str)
+{
+ if (!strncasecmp(str, "127.0.0.1", strlen("127.0.0.1")) ||
+ !strncasecmp(str, "localhost", strlen("localhost")))
+ {
+ return 1;
+ }
+
+ return 0;
+}
+
+bool wsrep_sst_receive_address_check (sys_var *self, THD* thd, set_var* var)
+{
+ char addr_buf[FN_REFLEN];
+
+ if ((! var->save_result.string_value.str) ||
+ (var->save_result.string_value.length > (FN_REFLEN - 1))) // safety
+ {
+ goto err;
+ }
+
+ memcpy(addr_buf, var->save_result.string_value.str,
+ var->save_result.string_value.length);
+ addr_buf[var->save_result.string_value.length]= 0;
+
+ if (sst_receive_address_check(addr_buf))
+ {
+ goto err;
+ }
+
+ return 0;
+
+err:
+ my_error(ER_WRONG_VALUE_FOR_VAR, MYF(0), var->var->name.str,
+ var->save_result.string_value.str ?
+ var->save_result.string_value.str : "NULL");
+ return 1;
+}
+
+bool wsrep_sst_receive_address_update (sys_var *self, THD* thd,
+ enum_var_type type)
+{
+ return 0;
+}
+
+bool wsrep_sst_auth_check (sys_var *self, THD* thd, set_var* var)
+{
+ return 0;
+}
+
+static bool sst_auth_real_set (const char* value)
+{
+ const char* v= NULL;
+
+ if (value)
+ {
+ v= my_strdup(value, MYF(0));
+ }
+ else // its NULL
+ {
+ wsrep_sst_auth_free();
+ return 0;
+ }
+
+ if (v)
+ {
+ // set sst_auth_real
+ if (sst_auth_real) { my_free((void *) sst_auth_real); }
+ sst_auth_real = v;
+
+ // mask wsrep_sst_auth
+ if (strlen(sst_auth_real))
+ {
+ if (wsrep_sst_auth) { my_free((void*) wsrep_sst_auth); }
+ wsrep_sst_auth= my_strdup(WSREP_SST_AUTH_MASK, MYF(0));
+ }
+ return 0;
+ }
+ return 1;
+}
+
+void wsrep_sst_auth_free()
+{
+ if (wsrep_sst_auth) { my_free((void *) wsrep_sst_auth); }
+ if (sst_auth_real) { my_free((void *) sst_auth_real); }
+ wsrep_sst_auth= NULL;
+ sst_auth_real= NULL;
+}
+
+bool wsrep_sst_auth_update (sys_var *self, THD* thd, enum_var_type type)
+{
+ return sst_auth_real_set (wsrep_sst_auth);
+}
+
+void wsrep_sst_auth_init (const char* value)
+{
+ if (wsrep_sst_auth == value) wsrep_sst_auth = NULL;
+ if (value) sst_auth_real_set (value);
+}
+
+bool wsrep_sst_donor_check (sys_var *self, THD* thd, set_var* var)
+{
+ return 0;
+}
+
+bool wsrep_sst_donor_update (sys_var *self, THD* thd, enum_var_type type)
+{
+ return 0;
+}
+
+static wsrep_uuid_t cluster_uuid = WSREP_UUID_UNDEFINED;
+
+bool wsrep_before_SE()
+{
+ return (wsrep_provider != NULL
+ && strcmp (wsrep_provider, WSREP_NONE)
+ && strcmp (wsrep_sst_method, WSREP_SST_SKIP)
+ && strcmp (wsrep_sst_method, WSREP_SST_MYSQLDUMP));
+}
+
+static bool sst_complete = false;
+static bool sst_needed = false;
+
+void wsrep_sst_grab ()
+{
+ WSREP_INFO("wsrep_sst_grab()");
+ if (mysql_mutex_lock (&LOCK_wsrep_sst)) abort();
+ sst_complete = false;
+ mysql_mutex_unlock (&LOCK_wsrep_sst);
+}
+
+// Wait for end of SST
+bool wsrep_sst_wait ()
+{
+ if (mysql_mutex_lock (&LOCK_wsrep_sst)) abort();
+ while (!sst_complete)
+ {
+ WSREP_INFO("Waiting for SST to complete.");
+ mysql_cond_wait (&COND_wsrep_sst, &LOCK_wsrep_sst);
+ }
+
+ if (local_seqno >= 0)
+ {
+ WSREP_INFO("SST complete, seqno: %lld", (long long) local_seqno);
+ }
+ else
+ {
+ WSREP_ERROR("SST failed: %d (%s)",
+ int(-local_seqno), strerror(-local_seqno));
+ }
+
+ mysql_mutex_unlock (&LOCK_wsrep_sst);
+
+ return (local_seqno >= 0);
+}
+
+// Signal end of SST
+void wsrep_sst_complete (const wsrep_uuid_t* sst_uuid,
+ wsrep_seqno_t sst_seqno,
+ bool needed)
+{
+ if (mysql_mutex_lock (&LOCK_wsrep_sst)) abort();
+ if (!sst_complete)
+ {
+ sst_complete = true;
+ sst_needed = needed;
+ local_uuid = *sst_uuid;
+ local_seqno = sst_seqno;
+ mysql_cond_signal (&COND_wsrep_sst);
+ }
+ else
+ {
+ /* This can happen when called from wsrep_synced_cb().
+ At the moment there is no way to check there
+ if main thread is still waiting for signal,
+ so wsrep_sst_complete() is called from there
+ each time wsrep_ready changes from FALSE -> TRUE.
+ */
+ WSREP_DEBUG("Nobody is waiting for SST.");
+ }
+ mysql_mutex_unlock (&LOCK_wsrep_sst);
+}
+
+void wsrep_sst_received (wsrep_t* const wsrep,
+ const wsrep_uuid_t& uuid,
+ wsrep_seqno_t const seqno,
+ const void* const state,
+ size_t const state_len)
+{
+ wsrep_get_SE_checkpoint(local_uuid, local_seqno);
+
+ if (memcmp(&local_uuid, &uuid, sizeof(wsrep_uuid_t)) ||
+ local_seqno < seqno || seqno < 0)
+ {
+ wsrep_set_SE_checkpoint(uuid, seqno);
+ local_uuid = uuid;
+ local_seqno = seqno;
+ }
+ else if (local_seqno > seqno)
+ {
+ WSREP_WARN("SST postion is in the past: %lld, current: %lld. "
+ "Can't continue.",
+ (long long)seqno, (long long)local_seqno);
+ unireg_abort(1);
+ }
+
+#ifdef GTID_SUPPORT
+ wsrep_init_sidno(uuid);
+#endif /* GTID_SUPPORT */
+
+ if (wsrep)
+ {
+ int const rcode(seqno < 0 ? seqno : 0);
+ wsrep_gtid_t const state_id = {
+ uuid, (rcode ? WSREP_SEQNO_UNDEFINED : seqno)
+ };
+
+ wsrep->sst_received(wsrep, &state_id, state, state_len, rcode);
+ }
+}
+
+// Let applier threads to continue
+void wsrep_sst_continue ()
+{
+ if (sst_needed)
+ {
+ WSREP_INFO("Signalling provider to continue.");
+ wsrep_sst_received (wsrep, local_uuid, local_seqno, NULL, 0);
+ }
+}
+
+struct sst_thread_arg
+{
+ const char* cmd;
+ char** env;
+ char* ret_str;
+ int err;
+ mysql_mutex_t lock;
+ mysql_cond_t cond;
+
+ sst_thread_arg (const char* c, char** e)
+ : cmd(c), env(e), ret_str(0), err(-1)
+ {
+ mysql_mutex_init(key_LOCK_wsrep_sst_thread, &lock, MY_MUTEX_INIT_FAST);
+ mysql_cond_init(key_COND_wsrep_sst_thread, &cond, NULL);
+ }
+
+ ~sst_thread_arg()
+ {
+ mysql_cond_destroy (&cond);
+ mysql_mutex_unlock (&lock);
+ mysql_mutex_destroy (&lock);
+ }
+};
+
+static int sst_scan_uuid_seqno (const char* str,
+ wsrep_uuid_t* uuid, wsrep_seqno_t* seqno)
+{
+ int offt = wsrep_uuid_scan (str, strlen(str), uuid);
+ if (offt > 0 && strlen(str) > (unsigned int)offt && ':' == str[offt])
+ {
+ *seqno = strtoll (str + offt + 1, NULL, 10);
+ if (*seqno != LLONG_MAX || errno != ERANGE)
+ {
+ return 0;
+ }
+ }
+
+ WSREP_ERROR("Failed to parse uuid:seqno pair: '%s'", str);
+ return EINVAL;
+}
+
+// get rid of trailing \n
+static char* my_fgets (char* buf, size_t buf_len, FILE* stream)
+{
+ char* ret= fgets (buf, buf_len, stream);
+
+ if (ret)
+ {
+ size_t len = strlen(ret);
+ if (len > 0 && ret[len - 1] == '\n') ret[len - 1] = '\0';
+ }
+
+ return ret;
+}
+
+/*
+ Generate opt_binlog_opt_val for sst_donate_other(), sst_prepare_other().
+
+ Returns zero on success, negative error code otherwise.
+
+ String containing binlog name is stored in param ret if binlog is enabled
+ and GTID mode is on, otherwise empty string. Returned string should be
+ freed with my_free().
+ */
+static int generate_binlog_opt_val(char** ret)
+{
+ DBUG_ASSERT(ret);
+ *ret= NULL;
+ if (opt_bin_log)
+ {
+ assert(opt_bin_logname);
+ *ret= strcmp(opt_bin_logname, "0") ?
+ my_strdup(opt_bin_logname, MYF(0)) : my_strdup("", MYF(0));
+ }
+ else
+ {
+ *ret= my_strdup("", MYF(0));
+ }
+ if (!*ret) return -ENOMEM;
+ return 0;
+}
+
+static void* sst_joiner_thread (void* a)
+{
+ sst_thread_arg* arg= (sst_thread_arg*) a;
+ int err= 1;
+
+ {
+ const char magic[] = "ready";
+ const size_t magic_len = sizeof(magic) - 1;
+ const size_t out_len = 512;
+ char out[out_len];
+
+ WSREP_INFO("Running: '%s'", arg->cmd);
+
+ wsp::process proc (arg->cmd, "r", arg->env);
+
+ if (proc.pipe() && !proc.error())
+ {
+ const char* tmp= my_fgets (out, out_len, proc.pipe());
+
+ if (!tmp || strlen(tmp) < (magic_len + 2) ||
+ strncasecmp (tmp, magic, magic_len))
+ {
+ WSREP_ERROR("Failed to read '%s <addr>' from: %s\n\tRead: '%s'",
+ magic, arg->cmd, tmp);
+ proc.wait();
+ if (proc.error()) err = proc.error();
+ }
+ else
+ {
+ err = 0;
+ }
+ }
+ else
+ {
+ err = proc.error();
+ WSREP_ERROR("Failed to execute: %s : %d (%s)",
+ arg->cmd, err, strerror(err));
+ }
+
+ // signal sst_prepare thread with ret code,
+ // it will go on sending SST request
+ mysql_mutex_lock (&arg->lock);
+ if (!err)
+ {
+ arg->ret_str = strdup (out + magic_len + 1);
+ if (!arg->ret_str) err = ENOMEM;
+ }
+ arg->err = -err;
+ mysql_cond_signal (&arg->cond);
+ mysql_mutex_unlock (&arg->lock); //! @note arg is unusable after that.
+
+ if (err) return NULL; /* lp:808417 - return immediately, don't signal
+ * initializer thread to ensure single thread of
+ * shutdown. */
+
+ wsrep_uuid_t ret_uuid = WSREP_UUID_UNDEFINED;
+ wsrep_seqno_t ret_seqno = WSREP_SEQNO_UNDEFINED;
+
+ // in case of successfull receiver start, wait for SST completion/end
+ char* tmp = my_fgets (out, out_len, proc.pipe());
+
+ proc.wait();
+ err= EINVAL;
+
+ if (!tmp)
+ {
+ WSREP_ERROR("Failed to read uuid:seqno from joiner script.");
+ if (proc.error()) err = proc.error();
+ }
+ else
+ {
+ err= sst_scan_uuid_seqno (out, &ret_uuid, &ret_seqno);
+ }
+
+ if (err)
+ {
+ ret_uuid= WSREP_UUID_UNDEFINED;
+ ret_seqno= -err;
+ }
+
+ // Tell initializer thread that SST is complete
+ wsrep_sst_complete (&ret_uuid, ret_seqno, true);
+ }
+
+ return NULL;
+}
+
+#define WSREP_SST_AUTH_ENV "WSREP_SST_OPT_AUTH"
+
+static int sst_append_auth_env(wsp::env& env, const char* sst_auth)
+{
+ int const sst_auth_size= strlen(WSREP_SST_AUTH_ENV) + 1 /* = */
+ + (sst_auth ? strlen(sst_auth) : 0) + 1 /* \0 */;
+
+ wsp::string sst_auth_str(sst_auth_size); // for automatic cleanup on return
+ if (!sst_auth_str()) return -ENOMEM;
+
+ int ret= snprintf(sst_auth_str(), sst_auth_size, "%s=%s",
+ WSREP_SST_AUTH_ENV, sst_auth ? sst_auth : "");
+
+ if (ret < 0 || ret >= sst_auth_size)
+ {
+ WSREP_ERROR("sst_append_auth_env(): snprintf() failed: %d", ret);
+ return (ret < 0 ? ret : -EMSGSIZE);
+ }
+
+ env.append(sst_auth_str());
+ return -env.error();
+}
+
+static ssize_t sst_prepare_other (const char* method,
+ const char* sst_auth,
+ const char* addr_in,
+ const char** addr_out)
+{
+ int const cmd_len= 4096;
+ wsp::string cmd_str(cmd_len);
+
+ if (!cmd_str())
+ {
+ WSREP_ERROR("sst_prepare_other(): could not allocate cmd buffer of %d bytes",
+ cmd_len);
+ return -ENOMEM;
+ }
+
+ const char* binlog_opt= "";
+ char* binlog_opt_val= NULL;
+
+ int ret;
+ if ((ret= generate_binlog_opt_val(&binlog_opt_val)))
+ {
+ WSREP_ERROR("sst_prepare_other(): generate_binlog_opt_val() failed: %d",
+ ret);
+ return ret;
+ }
+ if (strlen(binlog_opt_val)) binlog_opt= WSREP_SST_OPT_BINLOG;
+
+ ret= snprintf (cmd_str(), cmd_len,
+ "wsrep_sst_%s "
+ WSREP_SST_OPT_ROLE" 'joiner' "
+ WSREP_SST_OPT_ADDR" '%s' "
+ WSREP_SST_OPT_DATA" '%s' "
+ WSREP_SST_OPT_CONF" '%s' "
+ WSREP_SST_OPT_CONF_SUFFIX" '%s' "
+ WSREP_SST_OPT_PARENT" '%d'"
+ " %s '%s' ",
+ method, addr_in, mysql_real_data_home,
+ wsrep_defaults_file, wsrep_defaults_group_suffix,
+ (int)getpid(), binlog_opt, binlog_opt_val);
+ my_free(binlog_opt_val);
+
+ if (ret < 0 || ret >= cmd_len)
+ {
+ WSREP_ERROR("sst_prepare_other(): snprintf() failed: %d", ret);
+ return (ret < 0 ? ret : -EMSGSIZE);
+ }
+
+ wsp::env env(NULL);
+ if (env.error())
+ {
+ WSREP_ERROR("sst_prepare_other(): env. var ctor failed: %d", -env.error());
+ return -env.error();
+ }
+
+ if ((ret= sst_append_auth_env(env, sst_auth)))
+ {
+ WSREP_ERROR("sst_prepare_other(): appending auth failed: %d", ret);
+ return ret;
+ }
+
+ pthread_t tmp;
+ sst_thread_arg arg(cmd_str(), env());
+ mysql_mutex_lock (&arg.lock);
+ ret = pthread_create (&tmp, NULL, sst_joiner_thread, &arg);
+ if (ret)
+ {
+ WSREP_ERROR("sst_prepare_other(): pthread_create() failed: %d (%s)",
+ ret, strerror(ret));
+ return -ret;
+ }
+ mysql_cond_wait (&arg.cond, &arg.lock);
+
+ *addr_out= arg.ret_str;
+
+ if (!arg.err)
+ ret = strlen(*addr_out);
+ else
+ {
+ assert (arg.err < 0);
+ ret = arg.err;
+ }
+
+ pthread_detach (tmp);
+
+ return ret;
+}
+
+extern uint mysqld_port;
+
+/*! Just tells donor where to send mysqldump */
+static ssize_t sst_prepare_mysqldump (const char* addr_in,
+ const char** addr_out)
+{
+ ssize_t ret = strlen (addr_in);
+
+ if (!strrchr(addr_in, ':'))
+ {
+ ssize_t s = ret + 7;
+ char* tmp = (char*) malloc (s);
+
+ if (tmp)
+ {
+ ret= snprintf (tmp, s, "%s:%u", addr_in, mysqld_port);
+
+ if (ret > 0 && ret < s)
+ {
+ *addr_out= tmp;
+ return ret;
+ }
+ if (ret > 0) /* buffer too short */ ret = -EMSGSIZE;
+ free (tmp);
+ }
+ else {
+ ret= -ENOMEM;
+ }
+
+ WSREP_ERROR ("Could not prepare state transfer request: "
+ "adding default port failed: %zd.", ret);
+ }
+ else {
+ *addr_out= addr_in;
+ }
+
+ return ret;
+}
+
+static bool SE_initialized = false;
+
+ssize_t wsrep_sst_prepare (void** msg)
+{
+ const ssize_t ip_max= 256;
+ char ip_buf[ip_max];
+ const char* addr_in= NULL;
+ const char* addr_out= NULL;
+
+ if (!strcmp(wsrep_sst_method, WSREP_SST_SKIP))
+ {
+ ssize_t ret = strlen(WSREP_STATE_TRANSFER_TRIVIAL) + 1;
+ *msg = strdup(WSREP_STATE_TRANSFER_TRIVIAL);
+ if (!msg)
+ {
+ WSREP_ERROR("Could not allocate %zd bytes for state request", ret);
+ unireg_abort(1);
+ }
+ return ret;
+ }
+
+ // Figure out SST address. Common for all SST methods
+ if (wsrep_sst_receive_address &&
+ strcmp (wsrep_sst_receive_address, WSREP_SST_ADDRESS_AUTO))
+ {
+ addr_in= wsrep_sst_receive_address;
+ }
+ else if (wsrep_node_address && strlen(wsrep_node_address))
+ {
+ const char* const colon= strchr (wsrep_node_address, ':');
+ if (colon)
+ {
+ ptrdiff_t const len= colon - wsrep_node_address;
+ strncpy (ip_buf, wsrep_node_address, len);
+ ip_buf[len]= '\0';
+ addr_in= ip_buf;
+ }
+ else
+ {
+ addr_in= wsrep_node_address;
+ }
+ }
+ else
+ {
+ ssize_t ret= wsrep_guess_ip (ip_buf, ip_max);
+
+ if (ret && ret < ip_max)
+ {
+ addr_in= ip_buf;
+ }
+ else
+ {
+ WSREP_ERROR("Could not prepare state transfer request: "
+ "failed to guess address to accept state transfer at. "
+ "wsrep_sst_receive_address must be set manually.");
+ unireg_abort(1);
+ }
+ }
+
+ ssize_t addr_len= -ENOSYS;
+ if (!strcmp(wsrep_sst_method, WSREP_SST_MYSQLDUMP))
+ {
+ addr_len= sst_prepare_mysqldump (addr_in, &addr_out);
+ if (addr_len < 0) unireg_abort(1);
+ }
+ else
+ {
+ /*! A heuristic workaround until we learn how to stop and start engines */
+ if (SE_initialized)
+ {
+ // we already did SST at initializaiton, now engines are running
+ // sql_print_information() is here because the message is too long
+ // for WSREP_INFO.
+ sql_print_information ("WSREP: "
+ "You have configured '%s' state snapshot transfer method "
+ "which cannot be performed on a running server. "
+ "Wsrep provider won't be able to fall back to it "
+ "if other means of state transfer are unavailable. "
+ "In that case you will need to restart the server.",
+ wsrep_sst_method);
+ *msg = 0;
+ return 0;
+ }
+
+ addr_len = sst_prepare_other (wsrep_sst_method, sst_auth_real,
+ addr_in, &addr_out);
+ if (addr_len < 0)
+ {
+ WSREP_ERROR("Failed to prepare for '%s' SST. Unrecoverable.",
+ wsrep_sst_method);
+ unireg_abort(1);
+ }
+ }
+
+ size_t const method_len(strlen(wsrep_sst_method));
+ size_t const msg_len (method_len + addr_len + 2 /* + auth_len + 1*/);
+
+ *msg = malloc (msg_len);
+ if (NULL != *msg) {
+ char* const method_ptr(reinterpret_cast<char*>(*msg));
+ strcpy (method_ptr, wsrep_sst_method);
+ char* const addr_ptr(method_ptr + method_len + 1);
+ strcpy (addr_ptr, addr_out);
+
+ WSREP_INFO ("Prepared SST request: %s|%s", method_ptr, addr_ptr);
+ }
+ else {
+ WSREP_ERROR("Failed to allocate SST request of size %zu. Can't continue.",
+ msg_len);
+ unireg_abort(1);
+ }
+
+ if (addr_out != addr_in) /* malloc'ed */ free ((char*)addr_out);
+
+ return msg_len;
+}
+
+// helper method for donors
+static int sst_run_shell (const char* cmd_str, char** env, int max_tries)
+{
+ int ret = 0;
+
+ for (int tries=1; tries <= max_tries; tries++)
+ {
+ wsp::process proc (cmd_str, "r", env);
+
+ if (NULL != proc.pipe())
+ {
+ proc.wait();
+ }
+
+ if ((ret = proc.error()))
+ {
+ WSREP_ERROR("Try %d/%d: '%s' failed: %d (%s)",
+ tries, max_tries, proc.cmd(), ret, strerror(ret));
+ sleep (1);
+ }
+ else
+ {
+ WSREP_DEBUG("SST script successfully completed.");
+ break;
+ }
+ }
+
+ return -ret;
+}
+
+static void sst_reject_queries(my_bool close_conn)
+{
+ wsrep_ready_set (FALSE); // this will be resotred when donor becomes synced
+ WSREP_INFO("Rejecting client queries for the duration of SST.");
+ if (TRUE == close_conn) wsrep_close_client_connections(FALSE);
+}
+
+static int sst_donate_mysqldump (const char* addr,
+ const wsrep_uuid_t* uuid,
+ const char* uuid_str,
+ wsrep_seqno_t seqno,
+ bool bypass,
+ char** env) // carries auth info
+{
+ size_t host_len;
+ const char* port = strchr (addr, ':');
+
+ if (port)
+ {
+ port += 1;
+ host_len = port - addr;
+ }
+ else
+ {
+ port = "";
+ host_len = strlen (addr) + 1;
+ }
+
+ char *host= (char *) alloca(host_len);
+
+ strncpy (host, addr, host_len - 1);
+ host[host_len - 1] = '\0';
+
+ int const cmd_len= 4096;
+ wsp::string cmd_str(cmd_len);
+
+ if (!cmd_str())
+ {
+ WSREP_ERROR("sst_donate_mysqldump(): "
+ "could not allocate cmd buffer of %d bytes", cmd_len);
+ return -ENOMEM;
+ }
+
+ if (!bypass && wsrep_sst_donor_rejects_queries) sst_reject_queries(TRUE);
+
+ int ret= snprintf (cmd_str(), cmd_len,
+ "wsrep_sst_mysqldump "
+ WSREP_SST_OPT_HOST" '%s' "
+ WSREP_SST_OPT_PORT" '%s' "
+ WSREP_SST_OPT_LPORT" '%u' "
+ WSREP_SST_OPT_SOCKET" '%s' "
+ WSREP_SST_OPT_CONF" '%s' "
+ WSREP_SST_OPT_GTID" '%s:%lld'"
+ "%s",
+ host, port, mysqld_port, mysqld_unix_port,
+ wsrep_defaults_file, uuid_str,
+ (long long)seqno, bypass ? " " WSREP_SST_OPT_BYPASS : "");
+
+ if (ret < 0 || ret >= cmd_len)
+ {
+ WSREP_ERROR("sst_donate_mysqldump(): snprintf() failed: %d", ret);
+ return (ret < 0 ? ret : -EMSGSIZE);
+ }
+
+ WSREP_DEBUG("Running: '%s'", cmd_str());
+
+ ret= sst_run_shell (cmd_str(), env, 3);
+
+ wsrep_gtid_t const state_id = { *uuid, (ret ? WSREP_SEQNO_UNDEFINED : seqno)};
+
+ wsrep->sst_sent (wsrep, &state_id, ret);
+
+ return ret;
+}
+
+wsrep_seqno_t wsrep_locked_seqno= WSREP_SEQNO_UNDEFINED;
+
+static int run_sql_command(THD *thd, const char *query)
+{
+ thd->set_query((char *)query, strlen(query));
+
+ Parser_state ps;
+ if (ps.init(thd, thd->query(), thd->query_length()))
+ {
+ WSREP_ERROR("SST query: %s failed", query);
+ return -1;
+ }
+
+ mysql_parse(thd, thd->query(), thd->query_length(), &ps);
+ if (thd->is_error())
+ {
+ int const err= thd->get_stmt_da()->sql_errno();
+ WSREP_WARN ("error executing '%s': %d (%s)%s",
+ query, err, thd->get_stmt_da()->message(),
+ err == ER_UNKNOWN_SYSTEM_VARIABLE ?
+ ". Was mysqld built with --with-innodb-disallow-writes ?" : "");
+ thd->clear_error();
+ return -1;
+ }
+ return 0;
+}
+
+static int sst_flush_tables(THD* thd)
+{
+ WSREP_INFO("Flushing tables for SST...");
+
+ int err;
+ int not_used;
+ CHARSET_INFO *current_charset;
+
+ current_charset = thd->variables.character_set_client;
+
+ if (!is_supported_parser_charset(current_charset))
+ {
+ /* Do not use non-supported parser character sets */
+ WSREP_WARN("Current client character set is non-supported parser character set: %s", current_charset->csname);
+ thd->variables.character_set_client = &my_charset_latin1;
+ WSREP_WARN("For SST temporally setting character set to : %s",
+ my_charset_latin1.csname);
+ }
+
+ if (run_sql_command(thd, "FLUSH TABLES WITH READ LOCK"))
+ {
+ WSREP_ERROR("Failed to flush and lock tables");
+ err = -1;
+ }
+ else
+ {
+ /* make sure logs are flushed after global read lock acquired */
+ err= reload_acl_and_cache(thd, REFRESH_ENGINE_LOG | REFRESH_BINARY_LOG,
+ (TABLE_LIST*) 0, &not_used);
+ }
+
+ thd->variables.character_set_client = current_charset;
+
+
+ if (err)
+ {
+ WSREP_ERROR("Failed to flush tables: %d (%s)", err, strerror(err));
+ }
+ else
+ {
+ WSREP_INFO("Tables flushed.");
+ const char base_name[]= "tables_flushed";
+
+ ssize_t const full_len= strlen(mysql_real_data_home) + strlen(base_name)+2;
+ char *real_name= (char *) alloca(full_len);
+ snprintf(real_name, (size_t) full_len, "%s/%s", mysql_real_data_home,
+ base_name);
+ char *tmp_name= (char *) alloca(full_len + 4);
+ snprintf(tmp_name, (size_t) full_len + 4, "%s.tmp", real_name);
+
+ FILE* file= fopen(tmp_name, "w+");
+ if (0 == file)
+ {
+ err= errno;
+ WSREP_ERROR("Failed to open '%s': %d (%s)", tmp_name, err,strerror(err));
+ }
+ else
+ {
+ fprintf(file, "%s:%lld\n",
+ wsrep_cluster_state_uuid, (long long)wsrep_locked_seqno);
+ fsync(fileno(file));
+ fclose(file);
+ if (rename(tmp_name, real_name) == -1)
+ {
+ err= errno;
+ WSREP_ERROR("Failed to rename '%s' to '%s': %d (%s)",
+ tmp_name, real_name, err,strerror(err));
+ }
+ }
+ }
+
+ return err;
+}
+
+static void sst_disallow_writes (THD* thd, bool yes)
+{
+ char query_str[64] = { 0, };
+ ssize_t const query_max = sizeof(query_str) - 1;
+ CHARSET_INFO *current_charset;
+
+ current_charset = thd->variables.character_set_client;
+
+ if (!is_supported_parser_charset(current_charset))
+ {
+ /* Do not use non-supported parser character sets */
+ WSREP_WARN("Current client character set is non-supported parser character set: %s", current_charset->csname);
+ thd->variables.character_set_client = &my_charset_latin1;
+ WSREP_WARN("For SST temporally setting character set to : %s",
+ my_charset_latin1.csname);
+ }
+
+ snprintf (query_str, query_max, "SET GLOBAL innodb_disallow_writes=%d",
+ yes ? 1 : 0);
+
+ if (run_sql_command(thd, query_str))
+ {
+ WSREP_ERROR("Failed to disallow InnoDB writes");
+ }
+ thd->variables.character_set_client = current_charset;
+}
+
+static void* sst_donor_thread (void* a)
+{
+ sst_thread_arg* arg= (sst_thread_arg*)a;
+
+ WSREP_INFO("Running: '%s'", arg->cmd);
+
+ int err= 1;
+ bool locked= false;
+
+ const char* out= NULL;
+ const size_t out_len= 128;
+ char out_buf[out_len];
+
+ wsrep_uuid_t ret_uuid= WSREP_UUID_UNDEFINED;
+ wsrep_seqno_t ret_seqno= WSREP_SEQNO_UNDEFINED; // seqno of complete SST
+
+ wsp::thd thd(FALSE); // we turn off wsrep_on for this THD so that it can
+ // operate with wsrep_ready == OFF
+ wsp::process proc(arg->cmd, "r", arg->env);
+
+ err= proc.error();
+
+/* Inform server about SST script startup and release TO isolation */
+ mysql_mutex_lock (&arg->lock);
+ arg->err = -err;
+ mysql_cond_signal (&arg->cond);
+ mysql_mutex_unlock (&arg->lock); //! @note arg is unusable after that.
+
+ if (proc.pipe() && !err)
+ {
+wait_signal:
+ out= my_fgets (out_buf, out_len, proc.pipe());
+
+ if (out)
+ {
+ const char magic_flush[]= "flush tables";
+ const char magic_cont[]= "continue";
+ const char magic_done[]= "done";
+
+ if (!strcasecmp (out, magic_flush))
+ {
+ err= sst_flush_tables (thd.ptr);
+ if (!err)
+ {
+ sst_disallow_writes (thd.ptr, true);
+ /*
+ Lets also keep statements that modify binary logs (like RESET LOGS,
+ RESET MASTER) from proceeding until the files have been transferred
+ to the joiner node.
+ */
+ if (mysql_bin_log.is_open())
+ {
+ mysql_mutex_lock(mysql_bin_log.get_log_lock());
+ }
+
+ locked= true;
+ goto wait_signal;
+ }
+ }
+ else if (!strcasecmp (out, magic_cont))
+ {
+ if (locked)
+ {
+ if (mysql_bin_log.is_open())
+ {
+ mysql_mutex_assert_owner(mysql_bin_log.get_log_lock());
+ mysql_mutex_unlock(mysql_bin_log.get_log_lock());
+ }
+ sst_disallow_writes (thd.ptr, false);
+ thd.ptr->global_read_lock.unlock_global_read_lock (thd.ptr);
+ locked= false;
+ }
+ err= 0;
+ goto wait_signal;
+ }
+ else if (!strncasecmp (out, magic_done, strlen(magic_done)))
+ {
+ err= sst_scan_uuid_seqno (out + strlen(magic_done) + 1,
+ &ret_uuid, &ret_seqno);
+ }
+ else
+ {
+ WSREP_WARN("Received unknown signal: '%s'", out);
+ }
+ }
+ else
+ {
+ WSREP_ERROR("Failed to read from: %s", proc.cmd());
+ proc.wait();
+ }
+ if (!err && proc.error()) err= proc.error();
+ }
+ else
+ {
+ WSREP_ERROR("Failed to execute: %s : %d (%s)",
+ proc.cmd(), err, strerror(err));
+ }
+
+ if (locked) // don't forget to unlock server before return
+ {
+ if (mysql_bin_log.is_open())
+ {
+ mysql_mutex_assert_owner(mysql_bin_log.get_log_lock());
+ mysql_mutex_unlock(mysql_bin_log.get_log_lock());
+ }
+ sst_disallow_writes (thd.ptr, false);
+ thd.ptr->global_read_lock.unlock_global_read_lock (thd.ptr);
+ }
+
+ // signal to donor that SST is over
+ struct wsrep_gtid const state_id = {
+ ret_uuid, err ? WSREP_SEQNO_UNDEFINED : ret_seqno
+ };
+ wsrep->sst_sent (wsrep, &state_id, -err);
+ proc.wait();
+
+ return NULL;
+}
+
+
+
+static int sst_donate_other (const char* method,
+ const char* addr,
+ const char* uuid,
+ wsrep_seqno_t seqno,
+ bool bypass,
+ char** env) // carries auth info
+{
+ int const cmd_len= 4096;
+ wsp::string cmd_str(cmd_len);
+
+ if (!cmd_str())
+ {
+ WSREP_ERROR("sst_donate_other(): "
+ "could not allocate cmd buffer of %d bytes", cmd_len);
+ return -ENOMEM;
+ }
+
+ const char* binlog_opt= "";
+ char* binlog_opt_val= NULL;
+
+ int ret;
+ if ((ret= generate_binlog_opt_val(&binlog_opt_val)))
+ {
+ WSREP_ERROR("sst_donate_other(): generate_binlog_opt_val() failed: %d",ret);
+ return ret;
+ }
+ if (strlen(binlog_opt_val)) binlog_opt= WSREP_SST_OPT_BINLOG;
+
+ ret= snprintf (cmd_str(), cmd_len,
+ "wsrep_sst_%s "
+ WSREP_SST_OPT_ROLE" 'donor' "
+ WSREP_SST_OPT_ADDR" '%s' "
+ WSREP_SST_OPT_SOCKET" '%s' "
+ WSREP_SST_OPT_DATA" '%s' "
+ WSREP_SST_OPT_CONF" '%s' "
+ WSREP_SST_OPT_CONF_SUFFIX" '%s' "
+ " %s '%s' "
+ WSREP_SST_OPT_GTID" '%s:%lld'"
+ "%s",
+ method, addr, mysqld_unix_port, mysql_real_data_home,
+ wsrep_defaults_file, wsrep_defaults_group_suffix,
+ binlog_opt, binlog_opt_val,
+ uuid, (long long) seqno,
+ bypass ? " " WSREP_SST_OPT_BYPASS : "");
+ my_free(binlog_opt_val);
+
+ if (ret < 0 || ret >= cmd_len)
+ {
+ WSREP_ERROR("sst_donate_other(): snprintf() failed: %d", ret);
+ return (ret < 0 ? ret : -EMSGSIZE);
+ }
+
+ if (!bypass && wsrep_sst_donor_rejects_queries) sst_reject_queries(FALSE);
+
+ pthread_t tmp;
+ sst_thread_arg arg(cmd_str(), env);
+ mysql_mutex_lock (&arg.lock);
+ ret = pthread_create (&tmp, NULL, sst_donor_thread, &arg);
+ if (ret)
+ {
+ WSREP_ERROR("sst_donate_other(): pthread_create() failed: %d (%s)",
+ ret, strerror(ret));
+ return ret;
+ }
+ mysql_cond_wait (&arg.cond, &arg.lock);
+
+ WSREP_INFO("sst_donor_thread signaled with %d", arg.err);
+ return arg.err;
+}
+
+wsrep_cb_status_t wsrep_sst_donate_cb (void* app_ctx, void* recv_ctx,
+ const void* msg, size_t msg_len,
+ const wsrep_gtid_t* current_gtid,
+ const char* state, size_t state_len,
+ bool bypass)
+{
+ /* This will be reset when sync callback is called.
+ * Should we set wsrep_ready to FALSE here too? */
+// wsrep_notify_status(WSREP_MEMBER_DONOR);
+ local_status.set(WSREP_MEMBER_DONOR);
+
+ const char* method = (char*)msg;
+ size_t method_len = strlen (method);
+ const char* data = method + method_len + 1;
+
+ char uuid_str[37];
+ wsrep_uuid_print (&current_gtid->uuid, uuid_str, sizeof(uuid_str));
+
+ wsp::env env(NULL);
+ if (env.error())
+ {
+ WSREP_ERROR("wsrep_sst_donate_cb(): env var ctor failed: %d", -env.error());
+ return WSREP_CB_FAILURE;
+ }
+
+ int ret;
+ if ((ret= sst_append_auth_env(env, sst_auth_real)))
+ {
+ WSREP_ERROR("wsrep_sst_donate_cb(): appending auth env failed: %d", ret);
+ return WSREP_CB_FAILURE;
+ }
+
+ if (!strcmp (WSREP_SST_MYSQLDUMP, method))
+ {
+ ret = sst_donate_mysqldump(data, &current_gtid->uuid, uuid_str,
+ current_gtid->seqno, bypass, env());
+ }
+ else
+ {
+ ret = sst_donate_other(method, data, uuid_str,
+ current_gtid->seqno, bypass, env());
+ }
+
+ return (ret >= 0 ? WSREP_CB_SUCCESS : WSREP_CB_FAILURE);
+}
+
+void wsrep_SE_init_grab()
+{
+ if (mysql_mutex_lock (&LOCK_wsrep_sst_init)) abort();
+}
+
+void wsrep_SE_init_wait()
+{
+ while (SE_initialized == false)
+ {
+ mysql_cond_wait (&COND_wsrep_sst_init, &LOCK_wsrep_sst_init);
+ }
+ mysql_mutex_unlock (&LOCK_wsrep_sst_init);
+}
+
+void wsrep_SE_init_done()
+{
+ mysql_cond_signal (&COND_wsrep_sst_init);
+ mysql_mutex_unlock (&LOCK_wsrep_sst_init);
+}
+
+void wsrep_SE_initialized()
+{
+ SE_initialized = true;
+}
diff --git a/sql/wsrep_sst.h b/sql/wsrep_sst.h
new file mode 100644
index 00000000000..42f1055bde2
--- /dev/null
+++ b/sql/wsrep_sst.h
@@ -0,0 +1,70 @@
+/* Copyright (C) 2013 Codership Oy <info@codership.com>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */
+
+#ifndef WSREP_SST_H
+#define WSREP_SST_H
+
+#include <my_config.h>
+#include <mysql.h> // my_bool
+
+#define WSREP_SST_OPT_ROLE "--role"
+#define WSREP_SST_OPT_ADDR "--address"
+#define WSREP_SST_OPT_AUTH "--auth"
+#define WSREP_SST_OPT_DATA "--datadir"
+#define WSREP_SST_OPT_CONF "--defaults-file"
+#define WSREP_SST_OPT_CONF_SUFFIX "--defaults-group-suffix"
+#define WSREP_SST_OPT_PARENT "--parent"
+#define WSREP_SST_OPT_BINLOG "--binlog"
+
+// mysqldump-specific options
+#define WSREP_SST_OPT_USER "--user"
+#define WSREP_SST_OPT_PSWD "--password"
+#define WSREP_SST_OPT_HOST "--host"
+#define WSREP_SST_OPT_PORT "--port"
+#define WSREP_SST_OPT_LPORT "--local-port"
+
+// donor-specific
+#define WSREP_SST_OPT_SOCKET "--socket"
+#define WSREP_SST_OPT_GTID "--gtid"
+#define WSREP_SST_OPT_BYPASS "--bypass"
+
+#define WSREP_SST_MYSQLDUMP "mysqldump"
+#define WSREP_SST_RSYNC "rsync"
+#define WSREP_SST_SKIP "skip"
+#define WSREP_SST_DEFAULT WSREP_SST_RSYNC
+#define WSREP_SST_ADDRESS_AUTO "AUTO"
+#define WSREP_SST_AUTH_MASK "********"
+
+/* system variables */
+extern const char* wsrep_sst_method;
+extern const char* wsrep_sst_receive_address;
+extern const char* wsrep_sst_donor;
+extern char* wsrep_sst_auth;
+extern my_bool wsrep_sst_donor_rejects_queries;
+
+/*! Synchronizes applier thread start with init thread */
+extern void wsrep_sst_grab();
+/*! Init thread waits for SST completion */
+extern bool wsrep_sst_wait();
+/*! Signals wsrep that initialization is complete, writesets can be applied */
+extern void wsrep_sst_continue();
+extern void wsrep_sst_auth_free();
+
+extern void wsrep_SE_init_grab(); /*! grab init critical section */
+extern void wsrep_SE_init_wait(); /*! wait for SE init to complete */
+extern void wsrep_SE_init_done(); /*! signal that SE init is complte */
+extern void wsrep_SE_initialized(); /*! mark SE initialization complete */
+
+#endif /* WSREP_SST_H */
diff --git a/sql/wsrep_thd.cc b/sql/wsrep_thd.cc
new file mode 100644
index 00000000000..307745ff1b0
--- /dev/null
+++ b/sql/wsrep_thd.cc
@@ -0,0 +1,666 @@
+/* Copyright (C) 2013 Codership Oy <info@codership.com>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */
+
+#include "wsrep_thd.h"
+
+#include "transaction.h"
+#include "rpl_rli.h"
+#include "log_event.h"
+#include "sql_parse.h"
+//#include "global_threads.h" // LOCK_thread_count, etc.
+#include "sql_base.h" // close_thread_tables()
+#include "mysqld.h" // start_wsrep_THD();
+
+#include "slave.h" // opt_log_slave_updates
+#include "rpl_filter.h"
+#include "rpl_rli.h"
+#include "rpl_mi.h"
+
+#if (__LP64__)
+static volatile int64 wsrep_bf_aborts_counter(0);
+#define WSREP_ATOMIC_LOAD_LONG my_atomic_load64
+#define WSREP_ATOMIC_ADD_LONG my_atomic_add64
+#else
+static volatile int32 wsrep_bf_aborts_counter(0);
+#define WSREP_ATOMIC_LOAD_LONG my_atomic_load32
+#define WSREP_ATOMIC_ADD_LONG my_atomic_add32
+#endif
+
+int wsrep_show_bf_aborts (THD *thd, SHOW_VAR *var, char *buff)
+{
+ wsrep_local_bf_aborts = WSREP_ATOMIC_LOAD_LONG(&wsrep_bf_aborts_counter);
+ var->type = SHOW_LONGLONG;
+ var->value = (char*)&wsrep_local_bf_aborts;
+ return 0;
+}
+
+/* must have (&thd->LOCK_wsrep_thd) */
+void wsrep_client_rollback(THD *thd)
+{
+ WSREP_DEBUG("client rollback due to BF abort for (%ld), query: %s",
+ thd->thread_id, thd->query());
+
+ WSREP_ATOMIC_ADD_LONG(&wsrep_bf_aborts_counter, 1);
+
+ thd->wsrep_conflict_state= ABORTING;
+ mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
+ trans_rollback(thd);
+
+ if (thd->locked_tables_mode && thd->lock)
+ {
+ WSREP_DEBUG("unlocking tables for BF abort (%ld)", thd->thread_id);
+ thd->locked_tables_list.unlock_locked_tables(thd);
+ thd->variables.option_bits&= ~(OPTION_TABLE_LOCK);
+ }
+
+ if (thd->global_read_lock.is_acquired())
+ {
+ WSREP_DEBUG("unlocking GRL for BF abort (%ld)", thd->thread_id);
+ thd->global_read_lock.unlock_global_read_lock(thd);
+ }
+
+ /* Release transactional metadata locks. */
+ thd->mdl_context.release_transactional_locks();
+
+ /* release explicit MDL locks */
+ thd->mdl_context.release_explicit_locks();
+
+ if (thd->get_binlog_table_maps())
+ {
+ WSREP_DEBUG("clearing binlog table map for BF abort (%ld)", thd->thread_id);
+ thd->clear_binlog_table_maps();
+ }
+ mysql_mutex_lock(&thd->LOCK_wsrep_thd);
+ thd->wsrep_conflict_state= ABORTED;
+}
+
+#define NUMBER_OF_FIELDS_TO_IDENTIFY_COORDINATOR 1
+#define NUMBER_OF_FIELDS_TO_IDENTIFY_WORKER 2
+
+static rpl_group_info* wsrep_relay_group_init(const char* log_fname)
+{
+ Relay_log_info* rli= new Relay_log_info(false);
+
+ rli->no_storage= true;
+ if (!rli->relay_log.description_event_for_exec)
+ {
+ rli->relay_log.description_event_for_exec=
+ new Format_description_log_event(4);
+ }
+
+ static LEX_STRING connection_name= { C_STRING_WITH_LEN("wsrep") };
+
+ /*
+ Master_info's constructor initializes rpl_filter by either an already
+ constructed Rpl_filter object from global 'rpl_filters' list if the
+ specified connection name is same, or it constructs a new Rpl_filter
+ object and adds it to rpl_filters. This object is later destructed by
+ Mater_info's destructor by looking it up based on connection name in
+ rpl_filters list.
+
+ However, since all Master_info objects created here would share same
+ connection name ("wsrep"), destruction of any of the existing Master_info
+ objects (in wsrep_return_from_bf_mode()) would free rpl_filter referenced
+ by any/all existing Master_info objects.
+
+ In order to avoid that, we have added a check in Master_info's destructor
+ to not free the "wsrep" rpl_filter. It will eventually be freed by
+ free_all_rpl_filters() when server terminates.
+ */
+ rli->mi = new Master_info(&connection_name, false);
+
+ struct rpl_group_info *rgi= new rpl_group_info(rli);
+ rgi->thd= rli->sql_driver_thd= current_thd;
+
+ if ((rgi->deferred_events_collecting= rli->mi->rpl_filter->is_on()))
+ {
+ rgi->deferred_events= new Deferred_log_events(rli);
+ }
+
+ return rgi;
+}
+
+static void wsrep_prepare_bf_thd(THD *thd, struct wsrep_thd_shadow* shadow)
+{
+ shadow->options = thd->variables.option_bits;
+ shadow->server_status = thd->server_status;
+ shadow->wsrep_exec_mode = thd->wsrep_exec_mode;
+ shadow->vio = thd->net.vio;
+
+ // Disable general logging on applier threads
+ thd->variables.option_bits |= OPTION_LOG_OFF;
+ // Enable binlogging if opt_log_slave_updates is set
+ if (opt_log_slave_updates)
+ thd->variables.option_bits|= OPTION_BIN_LOG;
+ else
+ thd->variables.option_bits&= ~(OPTION_BIN_LOG);
+
+ if (!thd->wsrep_rgi) thd->wsrep_rgi= wsrep_relay_group_init("wsrep_relay");
+
+ /* thd->system_thread_info.rpl_sql_info isn't initialized. */
+ thd->system_thread_info.rpl_sql_info=
+ new rpl_sql_thread_info(thd->wsrep_rgi->rli->mi->rpl_filter);
+
+ thd->wsrep_exec_mode= REPL_RECV;
+ thd->net.vio= 0;
+ thd->clear_error();
+
+ shadow->tx_isolation = thd->variables.tx_isolation;
+ thd->variables.tx_isolation = ISO_READ_COMMITTED;
+ thd->tx_isolation = ISO_READ_COMMITTED;
+
+ shadow->db = thd->db;
+ shadow->db_length = thd->db_length;
+ shadow->user_time = thd->user_time;
+ shadow->row_count_func= thd->get_row_count_func();
+ thd->reset_db(NULL, 0);
+}
+
+static void wsrep_return_from_bf_mode(THD *thd, struct wsrep_thd_shadow* shadow)
+{
+ thd->variables.option_bits = shadow->options;
+ thd->server_status = shadow->server_status;
+ thd->wsrep_exec_mode = shadow->wsrep_exec_mode;
+ thd->net.vio = shadow->vio;
+ thd->variables.tx_isolation = shadow->tx_isolation;
+ thd->user_time = shadow->user_time;
+ thd->reset_db(shadow->db, shadow->db_length);
+
+ delete thd->system_thread_info.rpl_sql_info;
+ delete thd->wsrep_rgi->rli->mi;
+ delete thd->wsrep_rgi->rli;
+
+ thd->wsrep_rgi->cleanup_after_session();
+ delete thd->wsrep_rgi;
+ thd->wsrep_rgi = NULL;
+ thd->set_row_count_func(shadow->row_count_func);
+}
+
+void wsrep_replay_transaction(THD *thd)
+{
+ DBUG_ENTER("wsrep_replay_transaction");
+ /* checking if BF trx must be replayed */
+ if (thd->wsrep_conflict_state== MUST_REPLAY) {
+ DBUG_ASSERT(wsrep_thd_trx_seqno(thd));
+ if (thd->wsrep_exec_mode!= REPL_RECV) {
+ if (thd->get_stmt_da()->is_sent())
+ {
+ WSREP_ERROR("replay issue, thd has reported status already");
+ }
+
+
+ /*
+ PS reprepare observer should have been removed already.
+ open_table() will fail if we have dangling observer here.
+ */
+ DBUG_ASSERT(thd->m_reprepare_observer == NULL);
+
+ struct da_shadow
+ {
+ enum Diagnostics_area::enum_diagnostics_status status;
+ ulonglong affected_rows;
+ ulonglong last_insert_id;
+ char message[MYSQL_ERRMSG_SIZE];
+ };
+ struct da_shadow da_status;
+ da_status.status= thd->get_stmt_da()->status();
+ if (da_status.status == Diagnostics_area::DA_OK)
+ {
+ da_status.affected_rows= thd->get_stmt_da()->affected_rows();
+ da_status.last_insert_id= thd->get_stmt_da()->last_insert_id();
+ strmake(da_status.message,
+ thd->get_stmt_da()->message(),
+ sizeof(da_status.message)-1);
+ }
+
+ thd->get_stmt_da()->reset_diagnostics_area();
+
+ thd->wsrep_conflict_state= REPLAYING;
+ mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
+
+ mysql_reset_thd_for_next_command(thd);
+ thd->killed= NOT_KILLED;
+ close_thread_tables(thd);
+ if (thd->locked_tables_mode && thd->lock)
+ {
+ WSREP_DEBUG("releasing table lock for replaying (%ld)",
+ thd->thread_id);
+ thd->locked_tables_list.unlock_locked_tables(thd);
+ thd->variables.option_bits&= ~(OPTION_TABLE_LOCK);
+ }
+ thd->mdl_context.release_transactional_locks();
+ /*
+ Replaying will call MYSQL_START_STATEMENT when handling
+ BEGIN Query_log_event so end statement must be called before
+ replaying.
+ */
+ MYSQL_END_STATEMENT(thd->m_statement_psi, thd->get_stmt_da());
+ thd->m_statement_psi= NULL;
+ thd->m_digest= NULL;
+ thd_proc_info(thd, "wsrep replaying trx");
+ WSREP_DEBUG("replay trx: %s %lld",
+ thd->query() ? thd->query() : "void",
+ (long long)wsrep_thd_trx_seqno(thd));
+ struct wsrep_thd_shadow shadow;
+ wsrep_prepare_bf_thd(thd, &shadow);
+
+ /* From trans_begin() */
+ thd->variables.option_bits|= OPTION_BEGIN;
+ thd->server_status|= SERVER_STATUS_IN_TRANS;
+
+ int rcode = wsrep->replay_trx(wsrep,
+ &thd->wsrep_ws_handle,
+ (void *)thd);
+
+ wsrep_return_from_bf_mode(thd, &shadow);
+ if (thd->wsrep_conflict_state!= REPLAYING)
+ WSREP_WARN("lost replaying mode: %d", thd->wsrep_conflict_state );
+
+ mysql_mutex_lock(&thd->LOCK_wsrep_thd);
+
+ switch (rcode)
+ {
+ case WSREP_OK:
+ thd->wsrep_conflict_state= NO_CONFLICT;
+ wsrep->post_commit(wsrep, &thd->wsrep_ws_handle);
+ WSREP_DEBUG("trx_replay successful for: %ld %llu",
+ thd->thread_id, (long long)thd->real_id);
+ if (thd->get_stmt_da()->is_sent())
+ {
+ WSREP_WARN("replay ok, thd has reported status");
+ }
+ else if (thd->get_stmt_da()->is_set())
+ {
+ if (thd->get_stmt_da()->status() != Diagnostics_area::DA_OK)
+ {
+ WSREP_WARN("replay ok, thd has error status %d",
+ thd->get_stmt_da()->status());
+ }
+ }
+ else
+ {
+ if (da_status.status == Diagnostics_area::DA_OK)
+ {
+ my_ok(thd,
+ da_status.affected_rows,
+ da_status.last_insert_id,
+ da_status.message);
+ }
+ else
+ {
+ my_ok(thd);
+ }
+ }
+ break;
+ case WSREP_TRX_FAIL:
+ if (thd->get_stmt_da()->is_sent())
+ {
+ WSREP_ERROR("replay failed, thd has reported status");
+ }
+ else
+ {
+ WSREP_DEBUG("replay failed, rolling back");
+ //my_error(ER_LOCK_DEADLOCK, MYF(0), "wsrep aborted transaction");
+ }
+ thd->wsrep_conflict_state= ABORTED;
+ wsrep->post_rollback(wsrep, &thd->wsrep_ws_handle);
+ break;
+ default:
+ WSREP_ERROR("trx_replay failed for: %d, schema: %s, query: %s",
+ rcode,
+ (thd->db ? thd->db : "(null)"),
+ thd->query() ? thd->query() : "void");
+ /* we're now in inconsistent state, must abort */
+
+ /* http://bazaar.launchpad.net/~codership/codership-mysql/5.6/revision/3962#sql/wsrep_thd.cc */
+ mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
+
+ unireg_abort(1);
+ break;
+ }
+
+ wsrep_cleanup_transaction(thd);
+
+ mysql_mutex_lock(&LOCK_wsrep_replaying);
+ wsrep_replaying--;
+ WSREP_DEBUG("replaying decreased: %d, thd: %lu",
+ wsrep_replaying, thd->thread_id);
+ mysql_cond_broadcast(&COND_wsrep_replaying);
+ mysql_mutex_unlock(&LOCK_wsrep_replaying);
+ }
+ }
+ DBUG_VOID_RETURN;
+}
+
+static void wsrep_replication_process(THD *thd)
+{
+ int rcode;
+ DBUG_ENTER("wsrep_replication_process");
+
+ struct wsrep_thd_shadow shadow;
+ wsrep_prepare_bf_thd(thd, &shadow);
+
+ /* From trans_begin() */
+ thd->variables.option_bits|= OPTION_BEGIN;
+ thd->server_status|= SERVER_STATUS_IN_TRANS;
+
+ rcode = wsrep->recv(wsrep, (void *)thd);
+ DBUG_PRINT("wsrep",("wsrep_repl returned: %d", rcode));
+
+ WSREP_INFO("applier thread exiting (code:%d)", rcode);
+
+ switch (rcode) {
+ case WSREP_OK:
+ case WSREP_NOT_IMPLEMENTED:
+ case WSREP_CONN_FAIL:
+ /* provider does not support slave operations / disconnected from group,
+ * just close applier thread */
+ break;
+ case WSREP_NODE_FAIL:
+ /* data inconsistency => SST is needed */
+ /* Note: we cannot just blindly restart replication here,
+ * SST might require server restart if storage engines must be
+ * initialized after SST */
+ WSREP_ERROR("node consistency compromised, aborting");
+ wsrep_kill_mysql(thd);
+ break;
+ case WSREP_WARNING:
+ case WSREP_TRX_FAIL:
+ case WSREP_TRX_MISSING:
+ /* these suggests a bug in provider code */
+ WSREP_WARN("bad return from recv() call: %d", rcode);
+ /* fall through to node shutdown */
+ case WSREP_FATAL:
+ /* Cluster connectivity is lost.
+ *
+ * If applier was killed on purpose (KILL_CONNECTION), we
+ * avoid mysql shutdown. This is because the killer will then handle
+ * shutdown processing (or replication restarting)
+ */
+ if (thd->killed != KILL_CONNECTION)
+ {
+ wsrep_kill_mysql(thd);
+ }
+ break;
+ }
+
+ mysql_mutex_lock(&LOCK_thread_count);
+ wsrep_close_applier(thd);
+ mysql_cond_broadcast(&COND_thread_count);
+ mysql_mutex_unlock(&LOCK_thread_count);
+
+ TABLE *tmp;
+ while ((tmp = thd->temporary_tables))
+ {
+ WSREP_WARN("Applier %lu, has temporary tables at exit: %s.%s",
+ thd->thread_id,
+ (tmp->s) ? tmp->s->db.str : "void",
+ (tmp->s) ? tmp->s->table_name.str : "void");
+ }
+ wsrep_return_from_bf_mode(thd, &shadow);
+ DBUG_VOID_RETURN;
+}
+
+void wsrep_create_appliers(long threads)
+{
+ if (!wsrep_connected)
+ {
+ /* see wsrep_replication_start() for the logic */
+ if (wsrep_cluster_address && strlen(wsrep_cluster_address) &&
+ wsrep_provider && strcasecmp(wsrep_provider, "none"))
+ {
+ WSREP_ERROR("Trying to launch slave threads before creating "
+ "connection at '%s'", wsrep_cluster_address);
+ assert(0);
+ }
+ return;
+ }
+
+ long wsrep_threads=0;
+ pthread_t hThread;
+ while (wsrep_threads++ < threads) {
+ if (pthread_create(
+ &hThread, &connection_attrib,
+ start_wsrep_THD, (void*)wsrep_replication_process))
+ WSREP_WARN("Can't create thread to manage wsrep replication");
+ }
+}
+
+static void wsrep_rollback_process(THD *thd)
+{
+ DBUG_ENTER("wsrep_rollback_process");
+
+ mysql_mutex_lock(&LOCK_wsrep_rollback);
+ wsrep_aborting_thd= NULL;
+
+ while (thd->killed == NOT_KILLED) {
+ thd_proc_info(thd, "wsrep aborter idle");
+ thd->mysys_var->current_mutex= &LOCK_wsrep_rollback;
+ thd->mysys_var->current_cond= &COND_wsrep_rollback;
+
+ mysql_cond_wait(&COND_wsrep_rollback,&LOCK_wsrep_rollback);
+
+ WSREP_DEBUG("WSREP rollback thread wakes for signal");
+
+ mysql_mutex_lock(&thd->mysys_var->mutex);
+ thd_proc_info(thd, "wsrep aborter active");
+ thd->mysys_var->current_mutex= 0;
+ thd->mysys_var->current_cond= 0;
+ mysql_mutex_unlock(&thd->mysys_var->mutex);
+
+ /* check for false alarms */
+ if (!wsrep_aborting_thd)
+ {
+ WSREP_DEBUG("WSREP rollback thread has empty abort queue");
+ }
+ /* process all entries in the queue */
+ while (wsrep_aborting_thd) {
+ THD *aborting;
+ wsrep_aborting_thd_t next = wsrep_aborting_thd->next;
+ aborting = wsrep_aborting_thd->aborting_thd;
+ my_free(wsrep_aborting_thd);
+ wsrep_aborting_thd= next;
+ /*
+ * must release mutex, appliers my want to add more
+ * aborting thds in our work queue, while we rollback
+ */
+ mysql_mutex_unlock(&LOCK_wsrep_rollback);
+
+ mysql_mutex_lock(&aborting->LOCK_wsrep_thd);
+ if (aborting->wsrep_conflict_state== ABORTED)
+ {
+ WSREP_DEBUG("WSREP, thd already aborted: %llu state: %d",
+ (long long)aborting->real_id,
+ aborting->wsrep_conflict_state);
+
+ mysql_mutex_unlock(&aborting->LOCK_wsrep_thd);
+ mysql_mutex_lock(&LOCK_wsrep_rollback);
+ continue;
+ }
+ aborting->wsrep_conflict_state= ABORTING;
+
+ mysql_mutex_unlock(&aborting->LOCK_wsrep_thd);
+
+ set_current_thd(aborting);
+ aborting->store_globals();
+
+ mysql_mutex_lock(&aborting->LOCK_wsrep_thd);
+ wsrep_client_rollback(aborting);
+ WSREP_DEBUG("WSREP rollbacker aborted thd: (%lu %llu)",
+ aborting->thread_id, (long long)aborting->real_id);
+ mysql_mutex_unlock(&aborting->LOCK_wsrep_thd);
+
+ set_current_thd(thd);
+ thd->store_globals();
+
+ mysql_mutex_lock(&LOCK_wsrep_rollback);
+ }
+ }
+
+ mysql_mutex_unlock(&LOCK_wsrep_rollback);
+ sql_print_information("WSREP: rollbacker thread exiting");
+
+ DBUG_PRINT("wsrep",("wsrep rollbacker thread exiting"));
+ DBUG_VOID_RETURN;
+}
+
+void wsrep_create_rollbacker()
+{
+ if (wsrep_provider && strcasecmp(wsrep_provider, "none"))
+ {
+ pthread_t hThread;
+ /* create rollbacker */
+ if (pthread_create( &hThread, &connection_attrib,
+ start_wsrep_THD, (void*)wsrep_rollback_process))
+ WSREP_WARN("Can't create thread to manage wsrep rollback");
+ }
+}
+
+void wsrep_thd_set_PA_safe(void *thd_ptr, my_bool safe)
+{
+ if (thd_ptr)
+ {
+ THD* thd = (THD*)thd_ptr;
+ thd->wsrep_PA_safe = safe;
+ }
+}
+
+int wsrep_thd_conflict_state(void *thd_ptr, my_bool sync)
+{
+ int state = -1;
+ if (thd_ptr)
+ {
+ THD* thd = (THD*)thd_ptr;
+ if (sync) mysql_mutex_lock(&thd->LOCK_wsrep_thd);
+
+ state = thd->wsrep_conflict_state;
+ if (sync) mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
+ }
+ return state;
+}
+
+my_bool wsrep_thd_is_wsrep(void *thd_ptr)
+{
+ my_bool status = FALSE;
+ if (thd_ptr)
+ {
+ THD* thd = (THD*)thd_ptr;
+
+ status = (WSREP(thd) && WSREP_PROVIDER_EXISTS);
+ }
+ return status;
+}
+
+my_bool wsrep_thd_is_BF(void *thd_ptr, my_bool sync)
+{
+ my_bool status = FALSE;
+ if (thd_ptr)
+ {
+ THD* thd = (THD*)thd_ptr;
+ // THD can be BF only if provider exists
+ if (wsrep_thd_is_wsrep(thd_ptr))
+ {
+ if (sync)
+ mysql_mutex_lock(&thd->LOCK_wsrep_thd);
+
+ status = ((thd->wsrep_exec_mode == REPL_RECV) ||
+ (thd->wsrep_exec_mode == TOTAL_ORDER));
+ if (sync)
+ mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
+ }
+ }
+ return status;
+}
+
+extern "C"
+my_bool wsrep_thd_is_BF_or_commit(void *thd_ptr, my_bool sync)
+{
+ bool status = FALSE;
+ if (thd_ptr)
+ {
+ THD* thd = (THD*)thd_ptr;
+ if (sync) mysql_mutex_lock(&thd->LOCK_wsrep_thd);
+
+ status = ((thd->wsrep_exec_mode == REPL_RECV) ||
+ (thd->wsrep_exec_mode == TOTAL_ORDER) ||
+ (thd->wsrep_exec_mode == LOCAL_COMMIT));
+ if (sync) mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
+ }
+ return status;
+}
+
+extern "C"
+my_bool wsrep_thd_is_local(void *thd_ptr, my_bool sync)
+{
+ bool status = FALSE;
+ if (thd_ptr)
+ {
+ THD* thd = (THD*)thd_ptr;
+ if (sync) mysql_mutex_lock(&thd->LOCK_wsrep_thd);
+
+ status = (thd->wsrep_exec_mode == LOCAL_STATE);
+ if (sync) mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
+ }
+ return status;
+}
+
+int wsrep_abort_thd(void *bf_thd_ptr, void *victim_thd_ptr, my_bool signal)
+{
+ THD *victim_thd = (THD *) victim_thd_ptr;
+ THD *bf_thd = (THD *) bf_thd_ptr;
+ DBUG_ENTER("wsrep_abort_thd");
+
+ if ( (WSREP(bf_thd) ||
+ ( (WSREP_ON || bf_thd->variables.wsrep_OSU_method == WSREP_OSU_RSU) &&
+ bf_thd->wsrep_exec_mode == TOTAL_ORDER) ) &&
+ victim_thd)
+ {
+ if ((victim_thd->wsrep_conflict_state == MUST_ABORT) ||
+ (victim_thd->wsrep_conflict_state == ABORTED) ||
+ (victim_thd->wsrep_conflict_state == ABORTING))
+ {
+ WSREP_DEBUG("wsrep_abort_thd called by %llu with victim %llu already "
+ "aborted. Ignoring.",
+ (bf_thd) ? (long long)bf_thd->real_id : 0,
+ (long long)victim_thd->real_id);
+ DBUG_RETURN(1);
+ }
+
+ WSREP_DEBUG("wsrep_abort_thd, by: %llu, victim: %llu", (bf_thd) ?
+ (long long)bf_thd->real_id : 0, (long long)victim_thd->real_id);
+ ha_wsrep_abort_transaction(bf_thd, victim_thd, signal);
+ }
+ else
+ {
+ WSREP_DEBUG("wsrep_abort_thd not effective: %p %p", bf_thd, victim_thd);
+ }
+
+ DBUG_RETURN(1);
+}
+
+extern "C"
+int wsrep_thd_in_locking_session(void *thd_ptr)
+{
+ if (thd_ptr && ((THD *)thd_ptr)->in_lock_tables) {
+ return 1;
+ }
+ return 0;
+}
+
+bool wsrep_thd_has_explicit_locks(THD *thd)
+{
+ assert(thd);
+ return (thd->mdl_context.wsrep_has_explicit_locks());
+}
diff --git a/sql/wsrep_thd.h b/sql/wsrep_thd.h
new file mode 100644
index 00000000000..700e0f1cc56
--- /dev/null
+++ b/sql/wsrep_thd.h
@@ -0,0 +1,40 @@
+/* Copyright (C) 2013 Codership Oy <info@codership.com>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */
+
+#ifndef WSREP_THD_H
+#define WSREP_THD_H
+
+#include "sql_class.h"
+
+int wsrep_show_bf_aborts (THD *thd, SHOW_VAR *var, char *buff);
+void wsrep_client_rollback(THD *thd);
+void wsrep_replay_transaction(THD *thd);
+void wsrep_create_appliers(long threads);
+void wsrep_create_rollbacker();
+
+int wsrep_abort_thd(void *bf_thd_ptr, void *victim_thd_ptr,
+ my_bool signal);
+
+extern void wsrep_thd_set_PA_safe(void *thd_ptr, my_bool safe);
+extern my_bool wsrep_thd_is_BF(void *thd_ptr, my_bool sync);
+extern my_bool wsrep_thd_is_wsrep(void *thd_ptr);
+
+extern int wsrep_thd_conflict_state(void *thd_ptr, my_bool sync);
+//extern "C" my_bool wsrep_thd_is_BF(void *thd_ptr, my_bool sync);
+extern "C" my_bool wsrep_thd_is_BF_or_commit(void *thd_ptr, my_bool sync);
+extern "C" my_bool wsrep_thd_is_local(void *thd_ptr, my_bool sync);
+extern "C" int wsrep_thd_in_locking_session(void *thd_ptr);
+
+#endif /* WSREP_THD_H */
diff --git a/sql/wsrep_utils.cc b/sql/wsrep_utils.cc
new file mode 100644
index 00000000000..719e8e6b473
--- /dev/null
+++ b/sql/wsrep_utils.cc
@@ -0,0 +1,556 @@
+/* Copyright 2010-2015 Codership Oy <http://www.codership.com>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+//! @file some utility functions and classes not directly related to replication
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE // POSIX_SPAWN_USEVFORK flag
+#endif
+
+#include "wsrep_utils.h"
+#include "wsrep_mysqld.h"
+
+#include <sql_class.h>
+
+#include <spawn.h> // posix_spawn()
+#include <unistd.h> // pipe()
+#include <errno.h> // errno
+#include <string.h> // strerror()
+#include <sys/wait.h> // waitpid()
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netdb.h> // getaddrinfo()
+
+#ifdef HAVE_GETIFADDRS
+#include <net/if.h>
+#include <ifaddrs.h>
+#endif /* HAVE_GETIFADDRS */
+
+extern char** environ; // environment variables
+
+static wsp::string wsrep_PATH;
+
+void
+wsrep_prepend_PATH (const char* path)
+{
+ int count = 0;
+
+ while (environ[count])
+ {
+ if (strncmp (environ[count], "PATH=", 5))
+ {
+ count++;
+ continue;
+ }
+
+ char* const old_path (environ[count]);
+
+ if (strstr (old_path, path)) return; // path already there
+
+ size_t const new_path_len(strlen(old_path) + strlen(":") +
+ strlen(path) + 1);
+
+ char* const new_path (static_cast<char*>(malloc(new_path_len)));
+
+ if (new_path)
+ {
+ snprintf (new_path, new_path_len, "PATH=%s:%s", path,
+ old_path + strlen("PATH="));
+
+ wsrep_PATH.set (new_path);
+ environ[count] = new_path;
+ }
+ else
+ {
+ WSREP_ERROR ("Failed to allocate 'PATH' environment variable "
+ "buffer of size %zu.", new_path_len);
+ }
+
+ return;
+ }
+
+ WSREP_ERROR ("Failed to find 'PATH' environment variable. "
+ "State snapshot transfer may not be working.");
+}
+
+namespace wsp
+{
+
+bool
+env::ctor_common(char** e)
+{
+ env_ = static_cast<char**>(malloc((len_ + 1) * sizeof(char*)));
+
+ if (env_)
+ {
+ for (size_t i(0); i < len_; ++i)
+ {
+ assert(e[i]); // caller should make sure about len_
+ env_[i] = strdup(e[i]);
+ if (!env_[i])
+ {
+ errno_ = errno;
+ WSREP_ERROR("Failed to allocate env. var: %s", e[i]);
+ return true;
+ }
+ }
+
+ env_[len_] = NULL;
+ return false;
+ }
+ else
+ {
+ errno_ = errno;
+ WSREP_ERROR("Failed to allocate env. var vector of length: %zu", len_);
+ return true;
+ }
+}
+
+void
+env::dtor()
+{
+ if (env_)
+ {
+ /* don't need to go beyond the first NULL */
+ for (size_t i(0); env_[i] != NULL; ++i) { free(env_[i]); }
+ free(env_);
+ env_ = NULL;
+ }
+ len_ = 0;
+}
+
+env::env(char** e)
+ : len_(0), env_(NULL), errno_(0)
+{
+ if (!e) { e = environ; }
+ /* count the size of the vector */
+ while (e[len_]) { ++len_; }
+
+ if (ctor_common(e)) dtor();
+}
+
+env::env(const env& e)
+ : len_(e.len_), env_(0), errno_(0)
+{
+ if (ctor_common(e.env_)) dtor();
+}
+
+env::~env() { dtor(); }
+
+int
+env::append(const char* val)
+{
+ char** tmp = static_cast<char**>(realloc(env_, (len_ + 2)*sizeof(char*)));
+
+ if (tmp)
+ {
+ env_ = tmp;
+ env_[len_] = strdup(val);
+
+ if (env_[len_])
+ {
+ ++len_;
+ env_[len_] = NULL;
+ }
+ else errno_ = errno;
+ }
+ else errno_ = errno;
+
+ return errno_;
+}
+
+
+#define PIPE_READ 0
+#define PIPE_WRITE 1
+#define STDIN_FD 0
+#define STDOUT_FD 1
+
+#ifndef POSIX_SPAWN_USEVFORK
+# define POSIX_SPAWN_USEVFORK 0
+#endif
+
+process::process (const char* cmd, const char* type, char** env)
+ : str_(cmd ? strdup(cmd) : strdup("")), io_(NULL), err_(EINVAL), pid_(0)
+{
+ if (0 == str_)
+ {
+ WSREP_ERROR ("Can't allocate command line of size: %zu", strlen(cmd));
+ err_ = ENOMEM;
+ return;
+ }
+
+ if (0 == strlen(str_))
+ {
+ WSREP_ERROR ("Can't start a process: null or empty command line.");
+ return;
+ }
+
+ if (NULL == type || (strcmp (type, "w") && strcmp(type, "r")))
+ {
+ WSREP_ERROR ("type argument should be either \"r\" or \"w\".");
+ return;
+ }
+
+ if (NULL == env) { env = environ; } // default to global environment
+
+ int pipe_fds[2] = { -1, };
+ if (::pipe(pipe_fds))
+ {
+ err_ = errno;
+ WSREP_ERROR ("pipe() failed: %d (%s)", err_, strerror(err_));
+ return;
+ }
+
+ // which end of pipe will be returned to parent
+ int const parent_end (strcmp(type,"w") ? PIPE_READ : PIPE_WRITE);
+ int const child_end (parent_end == PIPE_READ ? PIPE_WRITE : PIPE_READ);
+ int const close_fd (parent_end == PIPE_READ ? STDOUT_FD : STDIN_FD);
+
+ char* const pargv[4] = { strdup("sh"), strdup("-c"), strdup(str_), NULL };
+ if (!(pargv[0] && pargv[1] && pargv[2]))
+ {
+ err_ = ENOMEM;
+ WSREP_ERROR ("Failed to allocate pargv[] array.");
+ goto cleanup_pipe;
+ }
+
+ posix_spawnattr_t attr;
+ err_ = posix_spawnattr_init (&attr);
+ if (err_)
+ {
+ WSREP_ERROR ("posix_spawnattr_init() failed: %d (%s)",
+ err_, strerror(err_));
+ goto cleanup_pipe;
+ }
+
+ /* make sure that no signlas are masked in child process */
+ sigset_t sigmask_empty; sigemptyset(&sigmask_empty);
+ err_ = posix_spawnattr_setsigmask(&attr, &sigmask_empty);
+ if (err_)
+ {
+ WSREP_ERROR ("posix_spawnattr_setsigmask() failed: %d (%s)",
+ err_, strerror(err_));
+ goto cleanup_attr;
+ }
+
+ /* make sure the following signals are not ignored in child process */
+ sigset_t default_signals; sigemptyset(&default_signals);
+ sigaddset(&default_signals, SIGHUP);
+ sigaddset(&default_signals, SIGINT);
+ sigaddset(&default_signals, SIGQUIT);
+ sigaddset(&default_signals, SIGPIPE);
+ sigaddset(&default_signals, SIGTERM);
+ sigaddset(&default_signals, SIGCHLD);
+ err_ = posix_spawnattr_setsigdefault(&attr, &default_signals);
+ if (err_)
+ {
+ WSREP_ERROR ("posix_spawnattr_setsigdefault() failed: %d (%s)",
+ err_, strerror(err_));
+ goto cleanup_attr;
+ }
+
+ err_ = posix_spawnattr_setflags (&attr, POSIX_SPAWN_SETSIGDEF |
+ POSIX_SPAWN_SETSIGMASK |
+ /* start a new process group */ POSIX_SPAWN_SETPGROUP |
+ POSIX_SPAWN_USEVFORK);
+ if (err_)
+ {
+ WSREP_ERROR ("posix_spawnattr_setflags() failed: %d (%s)",
+ err_, strerror(err_));
+ goto cleanup_attr;
+ }
+
+ posix_spawn_file_actions_t fact;
+ err_ = posix_spawn_file_actions_init (&fact);
+ if (err_)
+ {
+ WSREP_ERROR ("posix_spawn_file_actions_init() failed: %d (%s)",
+ err_, strerror(err_));
+ goto cleanup_attr;
+ }
+
+ // close child's stdout|stdin depending on what we returning
+ err_ = posix_spawn_file_actions_addclose (&fact, close_fd);
+ if (err_)
+ {
+ WSREP_ERROR ("posix_spawn_file_actions_addclose() failed: %d (%s)",
+ err_, strerror(err_));
+ goto cleanup_fact;
+ }
+
+ // substitute our pipe descriptor in place of the closed one
+ err_ = posix_spawn_file_actions_adddup2 (&fact,
+ pipe_fds[child_end], close_fd);
+ if (err_)
+ {
+ WSREP_ERROR ("posix_spawn_file_actions_addup2() failed: %d (%s)",
+ err_, strerror(err_));
+ goto cleanup_fact;
+ }
+
+ err_ = posix_spawnp (&pid_, pargv[0], &fact, &attr, pargv, env);
+ if (err_)
+ {
+ WSREP_ERROR ("posix_spawnp(%s) failed: %d (%s)",
+ pargv[2], err_, strerror(err_));
+ pid_ = 0; // just to make sure it was not messed up in the call
+ goto cleanup_fact;
+ }
+
+ io_ = fdopen (pipe_fds[parent_end], type);
+
+ if (io_)
+ {
+ pipe_fds[parent_end] = -1; // skip close on cleanup
+ }
+ else
+ {
+ err_ = errno;
+ WSREP_ERROR ("fdopen() failed: %d (%s)", err_, strerror(err_));
+ }
+
+cleanup_fact:
+ int err; // to preserve err_ code
+ err = posix_spawn_file_actions_destroy (&fact);
+ if (err)
+ {
+ WSREP_ERROR ("posix_spawn_file_actions_destroy() failed: %d (%s)\n",
+ err, strerror(err));
+ }
+
+cleanup_attr:
+ err = posix_spawnattr_destroy (&attr);
+ if (err)
+ {
+ WSREP_ERROR ("posix_spawnattr_destroy() failed: %d (%s)",
+ err, strerror(err));
+ }
+
+cleanup_pipe:
+ if (pipe_fds[0] >= 0) close (pipe_fds[0]);
+ if (pipe_fds[1] >= 0) close (pipe_fds[1]);
+
+ free (pargv[0]);
+ free (pargv[1]);
+ free (pargv[2]);
+}
+
+process::~process ()
+{
+ if (io_)
+ {
+ assert (pid_);
+ assert (str_);
+
+ WSREP_WARN("Closing pipe to child process: %s, PID(%ld) "
+ "which might still be running.", str_, (long)pid_);
+
+ if (fclose (io_) == -1)
+ {
+ err_ = errno;
+ WSREP_ERROR("fclose() failed: %d (%s)", err_, strerror(err_));
+ }
+ }
+
+ if (str_) free (const_cast<char*>(str_));
+}
+
+int
+process::wait ()
+{
+ if (pid_)
+ {
+ int status;
+ if (-1 == waitpid(pid_, &status, 0))
+ {
+ err_ = errno; assert (err_);
+ WSREP_ERROR("Waiting for process failed: %s, PID(%ld): %d (%s)",
+ str_, (long)pid_, err_, strerror (err_));
+ }
+ else
+ { // command completed, check exit status
+ if (WIFEXITED (status)) {
+ err_ = WEXITSTATUS (status);
+ }
+ else { // command didn't complete with exit()
+ WSREP_ERROR("Process was aborted.");
+ err_ = errno ? errno : ECHILD;
+ }
+
+ if (err_) {
+ switch (err_) /* Translate error codes to more meaningful */
+ {
+ case 126: err_ = EACCES; break; /* Permission denied */
+ case 127: err_ = ENOENT; break; /* No such file or directory */
+ case 143: err_ = EINTR; break; /* Subprocess killed */
+ }
+ WSREP_ERROR("Process completed with error: %s: %d (%s)",
+ str_, err_, strerror(err_));
+ }
+
+ pid_ = 0;
+ if (io_) fclose (io_);
+ io_ = NULL;
+ }
+ }
+ else {
+ assert (NULL == io_);
+ WSREP_ERROR("Command did not run: %s", str_);
+ }
+
+ return err_;
+}
+
+thd::thd (my_bool won) : init(), ptr(new THD)
+{
+ if (ptr)
+ {
+ ptr->thread_stack= (char*) &ptr;
+ ptr->store_globals();
+ ptr->variables.option_bits&= ~OPTION_BIN_LOG; // disable binlog
+ ptr->variables.wsrep_on = won;
+ ptr->security_ctx->master_access= ~(ulong)0;
+ lex_start(ptr);
+ }
+}
+
+thd::~thd ()
+{
+ if (ptr)
+ {
+ delete ptr;
+ my_pthread_setspecific_ptr (THR_THD, 0);
+ }
+}
+
+} // namespace wsp
+
+/* Returns INADDR_NONE, INADDR_ANY, INADDR_LOOPBACK or something else */
+unsigned int wsrep_check_ip (const char* const addr)
+{
+ unsigned int ret = INADDR_NONE;
+ struct addrinfo *res, hints;
+
+ memset (&hints, 0, sizeof(hints));
+ hints.ai_flags= AI_PASSIVE/*|AI_ADDRCONFIG*/;
+ hints.ai_socktype= SOCK_STREAM;
+ hints.ai_family= AF_UNSPEC;
+
+ int gai_ret = getaddrinfo(addr, NULL, &hints, &res);
+ if (0 == gai_ret)
+ {
+ if (AF_INET == res->ai_family) /* IPv4 */
+ {
+ struct sockaddr_in* a= (struct sockaddr_in*)res->ai_addr;
+ ret= htonl(a->sin_addr.s_addr);
+ }
+ else /* IPv6 */
+ {
+ struct sockaddr_in6* a= (struct sockaddr_in6*)res->ai_addr;
+ if (IN6_IS_ADDR_UNSPECIFIED(&a->sin6_addr))
+ ret= INADDR_ANY;
+ else if (IN6_IS_ADDR_LOOPBACK(&a->sin6_addr))
+ ret= INADDR_LOOPBACK;
+ else
+ ret= 0xdeadbeef;
+ }
+ freeaddrinfo (res);
+ }
+ else {
+ WSREP_ERROR ("getaddrinfo() failed on '%s': %d (%s)",
+ addr, gai_ret, gai_strerror(gai_ret));
+ }
+
+ // uint8_t* b= (uint8_t*)&ret;
+ // fprintf (stderr, "########## wsrep_check_ip returning: %hhu.%hhu.%hhu.%hhu\n",
+ // b[0], b[1], b[2], b[3]);
+
+ return ret;
+}
+
+extern char* my_bind_addr_str;
+
+size_t wsrep_guess_ip (char* buf, size_t buf_len)
+{
+ size_t ip_len = 0;
+
+ if (my_bind_addr_str && my_bind_addr_str[0] != '\0')
+ {
+ unsigned int const ip_type= wsrep_check_ip(my_bind_addr_str);
+
+ if (INADDR_NONE == ip_type) {
+ WSREP_ERROR("Networking not configured, cannot receive state "
+ "transfer.");
+ return 0;
+ }
+
+ if (INADDR_ANY != ip_type) {
+ strncpy (buf, my_bind_addr_str, buf_len);
+ return strlen(buf);
+ }
+ }
+
+ // mysqld binds to all interfaces - try IP from wsrep_node_address
+ if (wsrep_node_address && wsrep_node_address[0] != '\0') {
+ const char* const colon_ptr = strchr(wsrep_node_address, ':');
+
+ if (colon_ptr)
+ ip_len = colon_ptr - wsrep_node_address;
+ else
+ ip_len = strlen(wsrep_node_address);
+
+ if (ip_len >= buf_len) {
+ WSREP_WARN("default_ip(): buffer too short: %zu <= %zd", buf_len, ip_len);
+ return 0;
+ }
+
+ memcpy (buf, wsrep_node_address, ip_len);
+ buf[ip_len] = '\0';
+ return ip_len;
+ }
+
+ /*
+ getifaddrs() is avaiable at least on Linux since glib 2.3, FreeBSD,
+ MAC OSX, OpenSolaris, Solaris.
+
+ On platforms which do not support getifaddrs() this function returns
+ a failure and user is prompted to do manual configuration.
+ */
+#if HAVE_GETIFADDRS
+ struct ifaddrs *ifaddr, *ifa;
+ if (getifaddrs(&ifaddr) == 0)
+ {
+ for (ifa= ifaddr; ifa != NULL; ifa = ifa->ifa_next)
+ {
+ if (!ifa->ifa_addr || ifa->ifa_addr->sa_family != AF_INET) // TODO AF_INET6
+ continue;
+
+ // Skip loopback interfaces (like lo:127.0.0.1)
+ if (ifa->ifa_flags & IFF_LOOPBACK)
+ continue;
+
+ if (vio_getnameinfo(ifa->ifa_addr, buf, buf_len, NULL, 0, NI_NUMERICHOST))
+ continue;
+
+ freeifaddrs(ifaddr);
+ return strlen(buf);
+ }
+ freeifaddrs(ifaddr);
+ }
+#endif /* HAVE_GETIFADDRS */
+
+ return 0;
+}
diff --git a/sql/wsrep_utils.h b/sql/wsrep_utils.h
new file mode 100644
index 00000000000..7d864603c7f
--- /dev/null
+++ b/sql/wsrep_utils.h
@@ -0,0 +1,229 @@
+/* Copyright (C) 2013-2015 Codership Oy <info@codership.com>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */
+
+#ifndef WSREP_UTILS_H
+#define WSREP_UTILS_H
+
+#include "wsrep_priv.h"
+
+unsigned int wsrep_check_ip (const char* addr);
+size_t wsrep_guess_ip (char* buf, size_t buf_len);
+
+namespace wsp {
+class node_status
+{
+public:
+ node_status() : status(WSREP_MEMBER_UNDEFINED) {}
+ void set(wsrep_member_status_t new_status,
+ const wsrep_view_info_t* view = 0)
+ {
+ if (status != new_status || 0 != view)
+ {
+ wsrep_notify_status(new_status, view);
+ status = new_status;
+ }
+ }
+ wsrep_member_status_t get() const { return status; }
+private:
+ wsrep_member_status_t status;
+};
+} /* namespace wsp */
+
+extern wsp::node_status local_status;
+
+namespace wsp {
+/* a class to manage env vars array */
+class env
+{
+private:
+ size_t len_;
+ char** env_;
+ int errno_;
+ bool ctor_common(char** e);
+ void dtor();
+ env& operator =(env);
+public:
+ explicit env(char** env);
+ explicit env(const env&);
+ ~env();
+ int append(const char* var); /* add a new env. var */
+ int error() const { return errno_; }
+ char** operator()() { return env_; }
+};
+
+/* A small class to run external programs. */
+class process
+{
+private:
+ const char* const str_;
+ FILE* io_;
+ int err_;
+ pid_t pid_;
+
+public:
+/*! @arg type is a pointer to a null-terminated string which must contain
+ either the letter 'r' for reading or the letter 'w' for writing.
+ @arg env optional null-terminated vector of environment variables
+ */
+ process (const char* cmd, const char* type, char** env);
+ ~process ();
+
+ FILE* pipe () { return io_; }
+ int error() { return err_; }
+ int wait ();
+ const char* cmd() { return str_; }
+};
+
+class thd
+{
+ class thd_init
+ {
+ public:
+ thd_init() { my_thread_init(); }
+ ~thd_init() { my_thread_end(); }
+ }
+ init;
+
+ thd (const thd&);
+ thd& operator= (const thd&);
+
+public:
+
+ thd(my_bool wsrep_on);
+ ~thd();
+ THD* const ptr;
+};
+
+class string
+{
+public:
+ string() : string_(0) {}
+ explicit string(size_t s) : string_(static_cast<char*>(malloc(s))) {}
+ char* operator()() { return string_; }
+ void set(char* str) { if (string_) free (string_); string_ = str; }
+ ~string() { set (0); }
+private:
+ char* string_;
+};
+
+#ifdef REMOVED
+class lock
+{
+ pthread_mutex_t* const mtx_;
+
+public:
+
+ lock (pthread_mutex_t* mtx) : mtx_(mtx)
+ {
+ int err = pthread_mutex_lock (mtx_);
+
+ if (err)
+ {
+ WSREP_ERROR("Mutex lock failed: %s", strerror(err));
+ abort();
+ }
+ }
+
+ virtual ~lock ()
+ {
+ int err = pthread_mutex_unlock (mtx_);
+
+ if (err)
+ {
+ WSREP_ERROR("Mutex unlock failed: %s", strerror(err));
+ abort();
+ }
+ }
+
+ inline void wait (pthread_cond_t* cond)
+ {
+ pthread_cond_wait (cond, mtx_);
+ }
+
+private:
+
+ lock (const lock&);
+ lock& operator=(const lock&);
+
+};
+
+class monitor
+{
+ int mutable refcnt;
+ pthread_mutex_t mutable mtx;
+ pthread_cond_t mutable cond;
+
+public:
+
+ monitor() : refcnt(0)
+ {
+ pthread_mutex_init (&mtx, NULL);
+ pthread_cond_init (&cond, NULL);
+ }
+
+ ~monitor()
+ {
+ pthread_mutex_destroy (&mtx);
+ pthread_cond_destroy (&cond);
+ }
+
+ void enter() const
+ {
+ lock l(&mtx);
+
+ while (refcnt)
+ {
+ l.wait(&cond);
+ }
+ refcnt++;
+ }
+
+ void leave() const
+ {
+ lock l(&mtx);
+
+ refcnt--;
+ if (refcnt == 0)
+ {
+ pthread_cond_signal (&cond);
+ }
+ }
+
+private:
+
+ monitor (const monitor&);
+ monitor& operator= (const monitor&);
+};
+
+class critical
+{
+ const monitor& mon;
+
+public:
+
+ critical(const monitor& m) : mon(m) { mon.enter(); }
+
+ ~critical() { mon.leave(); }
+
+private:
+
+ critical (const critical&);
+ critical& operator= (const critical&);
+};
+#endif
+
+} // namespace wsrep
+
+#endif /* WSREP_UTILS_H */
diff --git a/sql/wsrep_var.cc b/sql/wsrep_var.cc
new file mode 100644
index 00000000000..8a507711daf
--- /dev/null
+++ b/sql/wsrep_var.cc
@@ -0,0 +1,670 @@
+/* Copyright 2008-2015 Codership Oy <http://www.codership.com>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#include "wsrep_var.h"
+
+#include <mysqld.h>
+#include <sql_class.h>
+#include <sql_plugin.h>
+#include <set_var.h>
+#include <sql_acl.h>
+#include "wsrep_priv.h"
+#include "wsrep_thd.h"
+#include "wsrep_xid.h"
+#include <my_dir.h>
+#include <cstdio>
+#include <cstdlib>
+
+const char* wsrep_provider = 0;
+const char* wsrep_provider_options = 0;
+const char* wsrep_cluster_address = 0;
+const char* wsrep_cluster_name = 0;
+const char* wsrep_node_name = 0;
+const char* wsrep_node_address = 0;
+const char* wsrep_node_incoming_address = 0;
+const char* wsrep_start_position = 0;
+
+int wsrep_init_vars()
+{
+ wsrep_provider = my_strdup(WSREP_NONE, MYF(MY_WME));
+ wsrep_provider_options= my_strdup("", MYF(MY_WME));
+ wsrep_cluster_address = my_strdup("", MYF(MY_WME));
+ wsrep_cluster_name = my_strdup(WSREP_CLUSTER_NAME, MYF(MY_WME));
+ wsrep_node_name = my_strdup("", MYF(MY_WME));
+ wsrep_node_address = my_strdup("", MYF(MY_WME));
+ wsrep_node_incoming_address= my_strdup(WSREP_NODE_INCOMING_AUTO, MYF(MY_WME));
+ wsrep_start_position = my_strdup(WSREP_START_POSITION_ZERO, MYF(MY_WME));
+
+ global_system_variables.binlog_format=BINLOG_FORMAT_ROW;
+ return 0;
+}
+
+bool wsrep_on_update (sys_var *self, THD* thd, enum_var_type var_type)
+{
+ if (var_type == OPT_GLOBAL) {
+ // FIXME: this variable probably should be changed only per session
+ thd->variables.wsrep_on = global_system_variables.wsrep_on;
+ }
+ return false;
+}
+
+bool wsrep_causal_reads_update (sys_var *self, THD* thd, enum_var_type var_type)
+{
+ // wsrep_sync_wait should also be updated.
+ if (var_type == OPT_GLOBAL) {
+ if (global_system_variables.wsrep_causal_reads) {
+ global_system_variables.wsrep_sync_wait |= WSREP_SYNC_WAIT_BEFORE_READ;
+ } else {
+ global_system_variables.wsrep_sync_wait &= ~WSREP_SYNC_WAIT_BEFORE_READ;
+ }
+ } else {
+ if (thd->variables.wsrep_causal_reads) {
+ thd->variables.wsrep_sync_wait |= WSREP_SYNC_WAIT_BEFORE_READ;
+ } else {
+ thd->variables.wsrep_sync_wait &= ~WSREP_SYNC_WAIT_BEFORE_READ;
+ }
+ }
+
+ return false;
+}
+
+bool wsrep_sync_wait_update (sys_var* self, THD* thd, enum_var_type var_type)
+{
+ // wsrep_causal_reads should also be updated.
+ if (var_type == OPT_GLOBAL) {
+ global_system_variables.wsrep_causal_reads=
+ global_system_variables.wsrep_sync_wait & WSREP_SYNC_WAIT_BEFORE_READ;
+ } else {
+ thd->variables.wsrep_causal_reads=
+ thd->variables.wsrep_sync_wait & WSREP_SYNC_WAIT_BEFORE_READ;
+ }
+ return false;
+}
+
+static int wsrep_start_position_verify (const char* start_str)
+{
+ size_t start_len;
+ wsrep_uuid_t uuid;
+ ssize_t uuid_len;
+
+ start_len = strlen (start_str);
+ if (start_len < 34)
+ return 1;
+
+ uuid_len = wsrep_uuid_scan (start_str, start_len, &uuid);
+ if (uuid_len < 0 || (start_len - uuid_len) < 2)
+ return 1;
+
+ if (start_str[uuid_len] != ':') // separator should follow UUID
+ return 1;
+
+ char* endptr;
+ wsrep_seqno_t const seqno __attribute__((unused)) // to avoid GCC warnings
+ (strtoll(&start_str[uuid_len + 1], &endptr, 10));
+
+ if (*endptr == '\0') return 0; // remaining string was seqno
+
+ return 1;
+}
+
+bool wsrep_start_position_check (sys_var *self, THD* thd, set_var* var)
+{
+ char start_pos_buf[FN_REFLEN];
+
+ if ((! var->save_result.string_value.str) ||
+ (var->save_result.string_value.length > (FN_REFLEN - 1))) // safety
+ goto err;
+
+ memcpy(start_pos_buf, var->save_result.string_value.str,
+ var->save_result.string_value.length);
+ start_pos_buf[var->save_result.string_value.length]= 0;
+
+ if (!wsrep_start_position_verify(start_pos_buf)) return 0;
+
+err:
+ my_error(ER_WRONG_VALUE_FOR_VAR, MYF(0), var->var->name.str,
+ var->save_result.string_value.str ?
+ var->save_result.string_value.str : "NULL");
+ return 1;
+}
+
+static
+void wsrep_set_local_position(const char* const value, bool const sst)
+{
+ size_t const value_len = strlen(value);
+ wsrep_uuid_t uuid;
+ size_t const uuid_len = wsrep_uuid_scan(value, value_len, &uuid);
+ wsrep_seqno_t const seqno = strtoll(value + uuid_len + 1, NULL, 10);
+
+ if (sst) {
+ wsrep_sst_received (wsrep, uuid, seqno, NULL, 0);
+ } else {
+ // initialization
+ local_uuid = uuid;
+ local_seqno = seqno;
+ }
+}
+
+bool wsrep_start_position_update (sys_var *self, THD* thd, enum_var_type type)
+{
+ WSREP_INFO ("wsrep_start_position var submitted: '%s'",
+ wsrep_start_position);
+ // since this value passed wsrep_start_position_check, don't check anything
+ // here
+ wsrep_set_local_position (wsrep_start_position, true);
+ return 0;
+}
+
+void wsrep_start_position_init (const char* val)
+{
+ if (NULL == val || wsrep_start_position_verify (val))
+ {
+ WSREP_ERROR("Bad initial value for wsrep_start_position: %s",
+ (val ? val : ""));
+ return;
+ }
+
+ wsrep_set_local_position (val, false);
+}
+
+static int get_provider_option_value(const char* opts,
+ const char* opt_name,
+ ulong* opt_value)
+{
+ int ret= 1;
+ ulong opt_value_tmp;
+ char *opt_value_str, *s, *opts_copy= my_strdup(opts, MYF(MY_WME));
+
+ if ((opt_value_str= strstr(opts_copy, opt_name)) == NULL)
+ goto end;
+ opt_value_str= strtok_r(opt_value_str, "=", &s);
+ if (opt_value_str == NULL) goto end;
+ opt_value_str= strtok_r(NULL, ";", &s);
+ if (opt_value_str == NULL) goto end;
+
+ opt_value_tmp= strtoul(opt_value_str, NULL, 10);
+ if (errno == ERANGE) goto end;
+
+ *opt_value= opt_value_tmp;
+ ret= 0;
+
+end:
+ my_free(opts_copy);
+ return ret;
+}
+
+static bool refresh_provider_options()
+{
+ WSREP_DEBUG("refresh_provider_options: %s",
+ (wsrep_provider_options) ? wsrep_provider_options : "null");
+ char* opts= wsrep->options_get(wsrep);
+ if (opts)
+ {
+ wsrep_provider_options_init(opts);
+ get_provider_option_value(wsrep_provider_options,
+ (char*)"repl.max_ws_size",
+ &wsrep_max_ws_size);
+ free(opts);
+ }
+ else
+ {
+ WSREP_ERROR("Failed to get provider options");
+ return true;
+ }
+ return false;
+}
+
+static int wsrep_provider_verify (const char* provider_str)
+{
+ MY_STAT f_stat;
+ char path[FN_REFLEN];
+
+ if (!provider_str || strlen(provider_str)== 0)
+ return 1;
+
+ if (!strcmp(provider_str, WSREP_NONE))
+ return 0;
+
+ if (!unpack_filename(path, provider_str))
+ return 1;
+
+ /* check that provider file exists */
+ memset(&f_stat, 0, sizeof(MY_STAT));
+ if (!my_stat(path, &f_stat, MYF(0)))
+ {
+ return 1;
+ }
+ return 0;
+}
+
+bool wsrep_provider_check (sys_var *self, THD* thd, set_var* var)
+{
+ char wsrep_provider_buf[FN_REFLEN];
+
+ if ((! var->save_result.string_value.str) ||
+ (var->save_result.string_value.length > (FN_REFLEN - 1))) // safety
+ goto err;
+
+ memcpy(wsrep_provider_buf, var->save_result.string_value.str,
+ var->save_result.string_value.length);
+ wsrep_provider_buf[var->save_result.string_value.length]= 0;
+
+ if (!wsrep_provider_verify(wsrep_provider_buf)) return 0;
+
+err:
+ my_error(ER_WRONG_VALUE_FOR_VAR, MYF(0), var->var->name.str,
+ var->save_result.string_value.str ?
+ var->save_result.string_value.str : "NULL");
+ return 1;
+}
+
+bool wsrep_provider_update (sys_var *self, THD* thd, enum_var_type type)
+{
+ bool rcode= false;
+
+ bool wsrep_on_saved= thd->variables.wsrep_on;
+ thd->variables.wsrep_on= false;
+
+ WSREP_DEBUG("wsrep_provider_update: %s", wsrep_provider);
+
+ /* stop replication is heavy operation, and includes closing all client
+ connections. Closing clients may need to get LOCK_global_system_variables
+ at least in MariaDB.
+
+ Note: releasing LOCK_global_system_variables may cause race condition, if
+ there can be several concurrent clients changing wsrep_provider
+ */
+ mysql_mutex_unlock(&LOCK_global_system_variables);
+ wsrep_stop_replication(thd);
+ mysql_mutex_lock(&LOCK_global_system_variables);
+
+ if (wsrep_inited == 1)
+ wsrep_deinit(false);
+
+ char* tmp= strdup(wsrep_provider); // wsrep_init() rewrites provider
+ //when fails
+ if (wsrep_init())
+ {
+ my_error(ER_CANT_OPEN_LIBRARY, MYF(0), tmp);
+ rcode = true;
+ }
+ free(tmp);
+
+ // we sure don't want to use old address with new provider
+ wsrep_cluster_address_init(NULL);
+ wsrep_provider_options_init(NULL);
+
+ thd->variables.wsrep_on= wsrep_on_saved;
+
+ refresh_provider_options();
+
+ return rcode;
+}
+
+void wsrep_provider_init (const char* value)
+{
+ WSREP_DEBUG("wsrep_provider_init: %s -> %s",
+ (wsrep_provider) ? wsrep_provider : "null",
+ (value) ? value : "null");
+ if (NULL == value || wsrep_provider_verify (value))
+ {
+ WSREP_ERROR("Bad initial value for wsrep_provider: %s",
+ (value ? value : ""));
+ return;
+ }
+
+ if (wsrep_provider) my_free((void *)wsrep_provider);
+ wsrep_provider = my_strdup(value, MYF(0));
+}
+
+bool wsrep_provider_options_check(sys_var *self, THD* thd, set_var* var)
+{
+ return 0;
+}
+
+bool wsrep_provider_options_update(sys_var *self, THD* thd, enum_var_type type)
+{
+ wsrep_status_t ret= wsrep->options_set(wsrep, wsrep_provider_options);
+ if (ret != WSREP_OK)
+ {
+ WSREP_ERROR("Set options returned %d", ret);
+ refresh_provider_options();
+ return true;
+ }
+ return refresh_provider_options();
+}
+
+void wsrep_provider_options_init(const char* value)
+{
+ if (wsrep_provider_options && wsrep_provider_options != value)
+ my_free((void *)wsrep_provider_options);
+ wsrep_provider_options = (value) ? my_strdup(value, MYF(0)) : NULL;
+}
+
+static int wsrep_cluster_address_verify (const char* cluster_address_str)
+{
+ /* There is no predefined address format, it depends on provider. */
+ return 0;
+}
+
+bool wsrep_cluster_address_check (sys_var *self, THD* thd, set_var* var)
+{
+ char addr_buf[FN_REFLEN];
+
+ if ((! var->save_result.string_value.str) ||
+ (var->save_result.string_value.length > (FN_REFLEN - 1))) // safety
+ goto err;
+
+ memcpy(addr_buf, var->save_result.string_value.str,
+ var->save_result.string_value.length);
+ addr_buf[var->save_result.string_value.length]= 0;
+
+ if (!wsrep_cluster_address_verify(addr_buf)) return 0;
+
+ err:
+ my_error(ER_WRONG_VALUE_FOR_VAR, MYF(0), var->var->name.str,
+ var->save_result.string_value.str ?
+ var->save_result.string_value.str : "NULL");
+ return 1;
+}
+
+bool wsrep_cluster_address_update (sys_var *self, THD* thd, enum_var_type type)
+{
+ bool wsrep_on_saved= thd->variables.wsrep_on;
+ thd->variables.wsrep_on= false;
+
+ /* stop replication is heavy operation, and includes closing all client
+ connections. Closing clients may need to get LOCK_global_system_variables
+ at least in MariaDB.
+
+ Note: releasing LOCK_global_system_variables may cause race condition, if
+ there can be several concurrent clients changing wsrep_provider
+ */
+ mysql_mutex_unlock(&LOCK_global_system_variables);
+ wsrep_stop_replication(thd);
+
+ /*
+ Unlock and lock LOCK_wsrep_slave_threads to maintain lock order & avoid
+ any potential deadlock.
+ */
+ mysql_mutex_unlock(&LOCK_wsrep_slave_threads);
+ mysql_mutex_lock(&LOCK_global_system_variables);
+ mysql_mutex_lock(&LOCK_wsrep_slave_threads);
+
+ if (wsrep_start_replication())
+ {
+ wsrep_create_rollbacker();
+ wsrep_create_appliers(wsrep_slave_threads);
+ }
+
+ thd->variables.wsrep_on= wsrep_on_saved;
+
+ return false;
+}
+
+void wsrep_cluster_address_init (const char* value)
+{
+ WSREP_DEBUG("wsrep_cluster_address_init: %s -> %s",
+ (wsrep_cluster_address) ? wsrep_cluster_address : "null",
+ (value) ? value : "null");
+
+ if (wsrep_cluster_address) my_free ((void*)wsrep_cluster_address);
+ wsrep_cluster_address = (value) ? my_strdup(value, MYF(0)) : NULL;
+}
+
+/* wsrep_cluster_name cannot be NULL or an empty string. */
+bool wsrep_cluster_name_check (sys_var *self, THD* thd, set_var* var)
+{
+ if (!var->save_result.string_value.str ||
+ (var->save_result.string_value.length == 0))
+ {
+ my_error(ER_WRONG_VALUE_FOR_VAR, MYF(0), var->var->name.str,
+ (var->save_result.string_value.str ?
+ var->save_result.string_value.str : "NULL"));
+ return 1;
+ }
+ return 0;
+}
+
+bool wsrep_cluster_name_update (sys_var *self, THD* thd, enum_var_type type)
+{
+ return 0;
+}
+
+bool wsrep_node_name_check (sys_var *self, THD* thd, set_var* var)
+{
+ // TODO: for now 'allow' 0-length string to be valid (default)
+ if (!var->save_result.string_value.str)
+ {
+ my_error(ER_WRONG_VALUE_FOR_VAR, MYF(0), var->var->name.str,
+ (var->save_result.string_value.str ?
+ var->save_result.string_value.str : "NULL"));
+ return 1;
+ }
+ return 0;
+}
+
+bool wsrep_node_name_update (sys_var *self, THD* thd, enum_var_type type)
+{
+ return 0;
+}
+
+// TODO: do something more elaborate, like checking connectivity
+bool wsrep_node_address_check (sys_var *self, THD* thd, set_var* var)
+{
+ char addr_buf[FN_REFLEN];
+
+ if ((! var->save_result.string_value.str) ||
+ (var->save_result.string_value.length > (FN_REFLEN - 1))) // safety
+ goto err;
+
+ memcpy(addr_buf, var->save_result.string_value.str,
+ var->save_result.string_value.length);
+ addr_buf[var->save_result.string_value.length]= 0;
+
+ // TODO: for now 'allow' 0-length string to be valid (default)
+ return 0;
+
+err:
+ my_error(ER_WRONG_VALUE_FOR_VAR, MYF(0), var->var->name.str,
+ var->save_result.string_value.str ?
+ var->save_result.string_value.str : "NULL");
+ return 1;
+}
+
+bool wsrep_node_address_update (sys_var *self, THD* thd, enum_var_type type)
+{
+ return 0;
+}
+
+void wsrep_node_address_init (const char* value)
+{
+ if (wsrep_node_address && strcmp(wsrep_node_address, value))
+ my_free ((void*)wsrep_node_address);
+
+ wsrep_node_address = (value) ? my_strdup(value, MYF(0)) : NULL;
+}
+
+bool wsrep_slave_threads_check (sys_var *self, THD* thd, set_var* var)
+{
+ mysql_mutex_lock(&LOCK_wsrep_slave_threads);
+ wsrep_slave_count_change += (var->save_result.ulonglong_value -
+ wsrep_slave_threads);
+ mysql_mutex_unlock(&LOCK_wsrep_slave_threads);
+
+ return 0;
+}
+
+bool wsrep_slave_threads_update (sys_var *self, THD* thd, enum_var_type type)
+{
+ if (wsrep_slave_count_change > 0)
+ {
+ wsrep_create_appliers(wsrep_slave_count_change);
+ wsrep_slave_count_change = 0;
+ }
+ return false;
+}
+
+bool wsrep_desync_check (sys_var *self, THD* thd, set_var* var)
+{
+ bool new_wsrep_desync= (bool) var->save_result.ulonglong_value;
+ if (wsrep_desync == new_wsrep_desync) {
+ if (new_wsrep_desync) {
+ push_warning (thd, Sql_condition::WARN_LEVEL_WARN,
+ ER_WRONG_VALUE_FOR_VAR,
+ "'wsrep_desync' is already ON.");
+ } else {
+ push_warning (thd, Sql_condition::WARN_LEVEL_WARN,
+ ER_WRONG_VALUE_FOR_VAR,
+ "'wsrep_desync' is already OFF.");
+ }
+ return false;
+ }
+ wsrep_status_t ret(WSREP_WARNING);
+ if (new_wsrep_desync) {
+ ret = wsrep->desync (wsrep);
+ if (ret != WSREP_OK) {
+ WSREP_WARN ("SET desync failed %d for schema: %s, query: %s", ret,
+ (thd->db ? thd->db : "(null)"),
+ thd->query());
+ my_error (ER_CANNOT_USER, MYF(0), "'desync'", thd->query());
+ return true;
+ }
+ } else {
+ ret = wsrep->resync (wsrep);
+ if (ret != WSREP_OK) {
+ WSREP_WARN ("SET resync failed %d for schema: %s, query: %s", ret,
+ (thd->db ? thd->db : "(null)"),
+ thd->query());
+ my_error (ER_CANNOT_USER, MYF(0), "'resync'", thd->query());
+ return true;
+ }
+ }
+ return false;
+}
+
+bool wsrep_desync_update (sys_var *self, THD* thd, enum_var_type type)
+{
+ return false;
+}
+
+bool wsrep_max_ws_size_update (sys_var *self, THD *thd, enum_var_type)
+{
+ char max_ws_size_opt[128];
+ my_snprintf(max_ws_size_opt, sizeof(max_ws_size_opt),
+ "repl.max_ws_size=%d", wsrep_max_ws_size);
+ wsrep_status_t ret= wsrep->options_set(wsrep, max_ws_size_opt);
+ if (ret != WSREP_OK)
+ {
+ WSREP_ERROR("Set options returned %d", ret);
+ refresh_provider_options();
+ return true;
+ }
+ return refresh_provider_options();
+}
+
+/*
+ * Status variables stuff below
+ */
+static inline void
+wsrep_assign_to_mysql (SHOW_VAR* mysql, wsrep_stats_var* wsrep)
+{
+ mysql->name = wsrep->name;
+ switch (wsrep->type) {
+ case WSREP_VAR_INT64:
+ mysql->value = (char*) &wsrep->value._int64;
+ mysql->type = SHOW_LONGLONG;
+ break;
+ case WSREP_VAR_STRING:
+ mysql->value = (char*) &wsrep->value._string;
+ mysql->type = SHOW_CHAR_PTR;
+ break;
+ case WSREP_VAR_DOUBLE:
+ mysql->value = (char*) &wsrep->value._double;
+ mysql->type = SHOW_DOUBLE;
+ break;
+ }
+}
+
+#if DYNAMIC
+// somehow this mysql status thing works only with statically allocated arrays.
+static SHOW_VAR* mysql_status_vars = NULL;
+static int mysql_status_len = -1;
+#else
+static SHOW_VAR mysql_status_vars[512 + 1];
+static const int mysql_status_len = 512;
+#endif
+
+static void export_wsrep_status_to_mysql(THD* thd)
+{
+ int wsrep_status_len, i;
+
+ wsrep_free_status(thd);
+
+ thd->wsrep_status_vars = wsrep->stats_get(wsrep);
+
+ if (!thd->wsrep_status_vars) {
+ return;
+ }
+
+ for (wsrep_status_len = 0;
+ thd->wsrep_status_vars[wsrep_status_len].name != NULL;
+ wsrep_status_len++) {
+ /* */
+ }
+
+#if DYNAMIC
+ if (wsrep_status_len != mysql_status_len) {
+ void* tmp = realloc (mysql_status_vars,
+ (wsrep_status_len + 1) * sizeof(SHOW_VAR));
+ if (!tmp) {
+
+ sql_print_error ("Out of memory for wsrep status variables."
+ "Number of variables: %d", wsrep_status_len);
+ return;
+ }
+
+ mysql_status_len = wsrep_status_len;
+ mysql_status_vars = (SHOW_VAR*)tmp;
+ }
+ /* @TODO: fix this: */
+#else
+ if (mysql_status_len < wsrep_status_len) wsrep_status_len= mysql_status_len;
+#endif
+
+ for (i = 0; i < wsrep_status_len; i++)
+ wsrep_assign_to_mysql (mysql_status_vars + i, thd->wsrep_status_vars + i);
+
+ mysql_status_vars[wsrep_status_len].name = NullS;
+ mysql_status_vars[wsrep_status_len].value = NullS;
+ mysql_status_vars[wsrep_status_len].type = SHOW_LONG;
+}
+
+int wsrep_show_status (THD *thd, SHOW_VAR *var, char *buff)
+{
+ export_wsrep_status_to_mysql(thd);
+ var->type= SHOW_ARRAY;
+ var->value= (char *) &mysql_status_vars;
+ return 0;
+}
+
+void wsrep_free_status (THD* thd)
+{
+ if (thd->wsrep_status_vars)
+ {
+ wsrep->stats_free (wsrep, thd->wsrep_status_vars);
+ thd->wsrep_status_vars = 0;
+ }
+}
diff --git a/sql/wsrep_var.h b/sql/wsrep_var.h
new file mode 100644
index 00000000000..f72df9d098a
--- /dev/null
+++ b/sql/wsrep_var.h
@@ -0,0 +1,88 @@
+/* Copyright (C) 2013 Codership Oy <info@codership.com>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */
+
+#ifndef WSREP_VAR_H
+#define WSREP_VAR_H
+
+#define WSREP_CLUSTER_NAME "my_wsrep_cluster"
+#define WSREP_NODE_INCOMING_AUTO "AUTO"
+#define WSREP_START_POSITION_ZERO "00000000-0000-0000-0000-000000000000:-1"
+
+// MySQL variables funcs
+
+#include "sql_priv.h"
+class sys_var;
+class set_var;
+class THD;
+
+int wsrep_init_vars();
+
+#define CHECK_ARGS (sys_var *self, THD* thd, set_var *var)
+#define UPDATE_ARGS (sys_var *self, THD* thd, enum_var_type type)
+#define DEFAULT_ARGS (THD* thd, enum_var_type var_type)
+#define INIT_ARGS (const char* opt)
+
+extern bool wsrep_on_update UPDATE_ARGS;
+extern bool wsrep_causal_reads_update UPDATE_ARGS;
+extern bool wsrep_sync_wait_update UPDATE_ARGS;
+extern bool wsrep_start_position_check CHECK_ARGS;
+extern bool wsrep_start_position_update UPDATE_ARGS;
+extern void wsrep_start_position_init INIT_ARGS;
+
+extern bool wsrep_provider_check CHECK_ARGS;
+extern bool wsrep_provider_update UPDATE_ARGS;
+extern void wsrep_provider_init INIT_ARGS;
+
+extern bool wsrep_provider_options_check CHECK_ARGS;
+extern bool wsrep_provider_options_update UPDATE_ARGS;
+extern void wsrep_provider_options_init INIT_ARGS;
+
+extern bool wsrep_cluster_address_check CHECK_ARGS;
+extern bool wsrep_cluster_address_update UPDATE_ARGS;
+extern void wsrep_cluster_address_init INIT_ARGS;
+
+extern bool wsrep_cluster_name_check CHECK_ARGS;
+extern bool wsrep_cluster_name_update UPDATE_ARGS;
+
+extern bool wsrep_node_name_check CHECK_ARGS;
+extern bool wsrep_node_name_update UPDATE_ARGS;
+
+extern bool wsrep_node_address_check CHECK_ARGS;
+extern bool wsrep_node_address_update UPDATE_ARGS;
+extern void wsrep_node_address_init INIT_ARGS;
+
+extern bool wsrep_sst_method_check CHECK_ARGS;
+extern bool wsrep_sst_method_update UPDATE_ARGS;
+extern void wsrep_sst_method_init INIT_ARGS;
+
+extern bool wsrep_sst_receive_address_check CHECK_ARGS;
+extern bool wsrep_sst_receive_address_update UPDATE_ARGS;
+
+extern bool wsrep_sst_auth_check CHECK_ARGS;
+extern bool wsrep_sst_auth_update UPDATE_ARGS;
+extern void wsrep_sst_auth_init INIT_ARGS;
+
+extern bool wsrep_sst_donor_check CHECK_ARGS;
+extern bool wsrep_sst_donor_update UPDATE_ARGS;
+
+extern bool wsrep_slave_threads_check CHECK_ARGS;
+extern bool wsrep_slave_threads_update UPDATE_ARGS;
+
+extern bool wsrep_desync_check CHECK_ARGS;
+extern bool wsrep_desync_update UPDATE_ARGS;
+
+extern bool wsrep_max_ws_size_update UPDATE_ARGS;
+
+#endif /* WSREP_VAR_H */
diff --git a/sql/wsrep_xid.cc b/sql/wsrep_xid.cc
new file mode 100644
index 00000000000..056da5748b9
--- /dev/null
+++ b/sql/wsrep_xid.cc
@@ -0,0 +1,150 @@
+/* Copyright 2015 Codership Oy <http://www.codership.com>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+//! @file some utility functions and classes not directly related to replication
+
+#include "wsrep_xid.h"
+#include "sql_class.h"
+#include "wsrep_mysqld.h" // for logging macros
+
+/*
+ * WSREPXid
+ */
+
+#define WSREP_XID_PREFIX "WSREPXid"
+#define WSREP_XID_PREFIX_LEN MYSQL_XID_PREFIX_LEN
+#define WSREP_XID_UUID_OFFSET 8
+#define WSREP_XID_SEQNO_OFFSET (WSREP_XID_UUID_OFFSET + sizeof(wsrep_uuid_t))
+#define WSREP_XID_GTRID_LEN (WSREP_XID_SEQNO_OFFSET + sizeof(wsrep_seqno_t))
+
+void wsrep_xid_init(XID* xid, const wsrep_uuid_t& uuid, wsrep_seqno_t seqno)
+{
+ xid->formatID= 1;
+ xid->gtrid_length= WSREP_XID_GTRID_LEN;
+ xid->bqual_length= 0;
+ memset(xid->data, 0, sizeof(xid->data));
+ memcpy(xid->data, WSREP_XID_PREFIX, WSREP_XID_PREFIX_LEN);
+ memcpy(xid->data + WSREP_XID_UUID_OFFSET, &uuid, sizeof(wsrep_uuid_t));
+ memcpy(xid->data + WSREP_XID_SEQNO_OFFSET, &seqno, sizeof(wsrep_seqno_t));
+}
+
+int wsrep_is_wsrep_xid(const void* xid_ptr)
+{
+ const XID* xid= reinterpret_cast<const XID*>(xid_ptr);
+ return (xid->formatID == 1 &&
+ xid->gtrid_length == WSREP_XID_GTRID_LEN &&
+ xid->bqual_length == 0 &&
+ !memcmp(xid->data, WSREP_XID_PREFIX, WSREP_XID_PREFIX_LEN));
+}
+
+const wsrep_uuid_t* wsrep_xid_uuid(const XID& xid)
+{
+ if (wsrep_is_wsrep_xid(&xid))
+ return reinterpret_cast<const wsrep_uuid_t*>(xid.data
+ + WSREP_XID_UUID_OFFSET);
+ else
+ return &WSREP_UUID_UNDEFINED;
+}
+
+wsrep_seqno_t wsrep_xid_seqno(const XID& xid)
+{
+ if (wsrep_is_wsrep_xid(&xid))
+ {
+ wsrep_seqno_t seqno;
+ memcpy(&seqno, xid.data + WSREP_XID_SEQNO_OFFSET, sizeof(wsrep_seqno_t));
+ return seqno;
+ }
+ else
+ {
+ return WSREP_SEQNO_UNDEFINED;
+ }
+}
+
+static my_bool set_SE_checkpoint(THD* unused, plugin_ref plugin, void* arg)
+{
+ XID* xid= static_cast<XID*>(arg);
+ handlerton* hton= plugin_data(plugin, handlerton *);
+
+ if (hton->db_type == DB_TYPE_INNODB)
+ {
+ const wsrep_uuid_t* uuid(wsrep_xid_uuid(*xid));
+ char uuid_str[40] = {0, };
+ wsrep_uuid_print(uuid, uuid_str, sizeof(uuid_str));
+ WSREP_DEBUG("Set WSREPXid for InnoDB: %s:%lld",
+ uuid_str, (long long)wsrep_xid_seqno(*xid));
+ hton->wsrep_set_checkpoint(hton, xid);
+ }
+
+ return FALSE;
+}
+
+void wsrep_set_SE_checkpoint(XID& xid)
+{
+ plugin_foreach(NULL, set_SE_checkpoint, MYSQL_STORAGE_ENGINE_PLUGIN, &xid);
+}
+
+void wsrep_set_SE_checkpoint(const wsrep_uuid_t& uuid, wsrep_seqno_t seqno)
+{
+ XID xid;
+ wsrep_xid_init(&xid, uuid, seqno);
+ wsrep_set_SE_checkpoint(xid);
+}
+
+static my_bool get_SE_checkpoint(THD* unused, plugin_ref plugin, void* arg)
+{
+ XID* xid= reinterpret_cast<XID*>(arg);
+ handlerton* hton= plugin_data(plugin, handlerton *);
+
+ if (hton->db_type == DB_TYPE_INNODB)
+ {
+ hton->wsrep_get_checkpoint(hton, xid);
+ const wsrep_uuid_t* uuid(wsrep_xid_uuid(*xid));
+ char uuid_str[40] = {0, };
+ wsrep_uuid_print(uuid, uuid_str, sizeof(uuid_str));
+ WSREP_DEBUG("Read WSREPXid from InnoDB: %s:%lld",
+ uuid_str, (long long)wsrep_xid_seqno(*xid));
+ }
+
+ return FALSE;
+}
+
+void wsrep_get_SE_checkpoint(XID& xid)
+{
+ plugin_foreach(NULL, get_SE_checkpoint, MYSQL_STORAGE_ENGINE_PLUGIN, &xid);
+}
+
+void wsrep_get_SE_checkpoint(wsrep_uuid_t& uuid, wsrep_seqno_t& seqno)
+{
+ uuid= WSREP_UUID_UNDEFINED;
+ seqno= WSREP_SEQNO_UNDEFINED;
+
+ XID xid;
+ memset(&xid, 0, sizeof(xid));
+ xid.formatID= -1;
+
+ wsrep_get_SE_checkpoint(xid);
+
+ if (xid.formatID == -1) return; // nil XID
+
+ if (!wsrep_is_wsrep_xid(&xid))
+ {
+ WSREP_WARN("Read non-wsrep XID from storage engines.");
+ return;
+ }
+
+ uuid= *wsrep_xid_uuid(xid);
+ seqno= wsrep_xid_seqno(xid);
+}
diff --git a/sql/wsrep_xid.h b/sql/wsrep_xid.h
new file mode 100644
index 00000000000..8a43e49c733
--- /dev/null
+++ b/sql/wsrep_xid.h
@@ -0,0 +1,33 @@
+/* Copyright (C) 2015 Codership Oy <info@codership.com>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */
+
+#ifndef WSREP_XID_H
+#define WSREP_XID_H
+
+#include <my_config.h>
+#include "../wsrep/wsrep_api.h"
+#include "handler.h" // XID typedef
+
+void wsrep_xid_init(xid_t*, const wsrep_uuid_t&, wsrep_seqno_t);
+int wsrep_is_wsrep_xid(const void* xid);
+const wsrep_uuid_t* wsrep_xid_uuid(const XID&);
+wsrep_seqno_t wsrep_xid_seqno(const XID&);
+
+//void wsrep_get_SE_checkpoint(XID&); /* uncomment if needed */
+void wsrep_get_SE_checkpoint(wsrep_uuid_t&, wsrep_seqno_t&);
+//void wsrep_set_SE_checkpoint(XID&); /* uncomment if needed */
+void wsrep_set_SE_checkpoint(const wsrep_uuid_t&, wsrep_seqno_t);
+
+#endif /* WSREP_UTILS_H */