diff options
author | Seppo Jaakola <seppo.jaakola@codership.com> | 2012-04-13 01:33:24 +0300 |
---|---|---|
committer | Seppo Jaakola <seppo.jaakola@codership.com> | 2012-04-13 01:33:24 +0300 |
commit | 2fc1ec43560b453b4694adbc1aac11f3f23b1761 (patch) | |
tree | 880c3875dae28574a90bf891ef901027723d21f6 | |
parent | 51c77ec5d406843bb8c8131f0687f4f75839d045 (diff) | |
download | mariadb-git-2fc1ec43560b453b4694adbc1aac11f3f23b1761.tar.gz |
Initial push of codership-wsrep API implementation for MariaDB.
Merge of:
lp:maria/5.5, #3334: http://bazaar.launchpad.net/~maria-captains/maria/5.5/revision/3334
lp:codership-mysql/5.5, #3725: http://bazaar.launchpad.net/~codership/codership-mysql/wsrep-5.5/revision/3725
80 files changed, 7198 insertions, 66 deletions
diff --git a/BUILD/SETUP.sh b/BUILD/SETUP.sh index c9eb1ebc03f..b0d6f97284d 100755 --- a/BUILD/SETUP.sh +++ b/BUILD/SETUP.sh @@ -204,7 +204,7 @@ all_configs="$SSL_LIBRARY --with-plugins=max --with-plugin-ndbcluster --with-emb alpha_cflags="$check_cpu_cflags -Wa,-m$cpu_flag" amd64_cflags="$check_cpu_cflags" amd64_cxxflags="" # If dropping '--with-big-tables', add here "-DBIG_TABLES" -pentium_cflags="$check_cpu_cflags" +pentium_cflags="$check_cpu_cflags -m32" pentium64_cflags="$check_cpu_cflags -m64" ppc_cflags="$check_cpu_cflags" sparc_cflags="" diff --git a/CMakeLists.txt b/CMakeLists.txt index 1b6d1acb45f..1e137382da4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -137,6 +137,7 @@ INCLUDE(plugin) INCLUDE(install_macros) INCLUDE(install_layout) INCLUDE(mysql_add_executable) +INCLUDE(wsrep) # Handle options OPTION(DISABLE_SHARED @@ -176,6 +177,12 @@ MARK_AS_ADVANCED(ENABLED_LOCAL_INFILE) OPTION(WITH_FAST_MUTEXES "Compile with fast mutexes" OFF) MARK_AS_ADVANCED(WITH_FAST_MUTEXES) +OPTION(WITH_INNODB_DISALLOW_WRITES "InnoDB freeze writes patch from Google" ${WITH_WSREP}) +IF (WITH_INNODB_DISALLOW_WRITES) + SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DWITH_INNODB_DISALLOW_WRITES") + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DWITH_INNODB_DISALLOW_WRITES") +ENDIF() + # Set DBUG_OFF and other optional release-only flags for non-debug project types FOREACH(BUILD_TYPE RELEASE RELWITHDEBINFO MINSIZEREL) FOREACH(LANG C CXX) @@ -284,6 +291,9 @@ ADD_SUBDIRECTORY(vio) ADD_SUBDIRECTORY(regex) ADD_SUBDIRECTORY(mysys) ADD_SUBDIRECTORY(libmysql) +IF(WITH_WSREP) +ADD_SUBDIRECTORY(wsrep) +ENDIF() ADD_SUBDIRECTORY(extra) IF(NOT WITHOUT_SERVER) @@ -369,7 +379,7 @@ IF(NOT INSTALL_LAYOUT MATCHES "RPM") INSTALL(FILES README DESTINATION ${INSTALL_DOCREADMEDIR} COMPONENT Readme) INSTALL(FILES ${CMAKE_BINARY_DIR}/Docs/INFO_SRC ${CMAKE_BINARY_DIR}/Docs/INFO_BIN DESTINATION ${INSTALL_DOCDIR}) IF(UNIX) - INSTALL(FILES Docs/INSTALL-BINARY DESTINATION ${INSTALL_DOCREADMEDIR} COMPONENT Readme) + INSTALL(FILES Docs/INSTALL-BINARY Docs/README-wsrep DESTINATION ${INSTALL_DOCREADMEDIR} COMPONENT Readme) ENDIF() # MYSQL_DOCS_LOCATON is used in "make dist", points to the documentation directory SET(MYSQL_DOCS_LOCATION "" CACHE PATH "Location from where documentation is copied") @@ -377,6 +387,7 @@ IF(NOT INSTALL_LAYOUT MATCHES "RPM") INSTALL(DIRECTORY Docs/ DESTINATION ${INSTALL_DOCDIR} COMPONENT Documentation PATTERN "INSTALL-BINARY" EXCLUDE + PATTERN "README-wsrep" EXCLUDE PATTERN "Makefile.*" EXCLUDE PATTERN "glibc*" EXCLUDE PATTERN "linuxthreads.txt" EXCLUDE diff --git a/cmake/configure.pl b/cmake/configure.pl index 69f973c41fb..e8cfa0b1261 100644 --- a/cmake/configure.pl +++ b/cmake/configure.pl @@ -200,6 +200,16 @@ foreach my $option (@ARGV) $cmakeargs = $cmakeargs." -DMYSQL_DATADIR=".substr($option,14); next; } + if ($option =~ /layout=/) + { + $cmakeargs = $cmakeargs." -DINSTALL_LAYOUT=".substr($option,7); + next; + } + if ($option =~ /with-unix-socket-path=/) + { + $cmakeargs = $cmakeargs." -DMYSQL_UNIX_ADDR=".substr($option,22); + next; + } if ($option =~ /mysql-maintainer-mode/) { $cmakeargs = $cmakeargs." -DMYSQL_MAINTAINER_MODE=" . diff --git a/include/thr_lock.h b/include/thr_lock.h index 3f7a5ca988f..4551a3160ff 100644 --- a/include/thr_lock.h +++ b/include/thr_lock.h @@ -20,6 +20,15 @@ #ifdef __cplusplus extern "C" { #endif +#ifdef WITH_WSREP +#include <my_sys.h> + typedef int (* wsrep_thd_is_brute_force_fun)(void *); + typedef int (* wsrep_abort_thd_fun)(void *, void *, my_bool); + typedef int (* wsrep_on_fun)(void *); + void wsrep_thr_lock_init( + wsrep_thd_is_brute_force_fun bf_fun, wsrep_abort_thd_fun abort_fun, + my_bool debug, my_bool convert_LOCK_to_trx, wsrep_on_fun on_fun); +#endif #include <my_pthread.h> #include <my_list.h> @@ -95,6 +104,10 @@ typedef struct st_thr_lock_info { pthread_t thread; my_thread_id thread_id; +#ifdef WITH_WSREP + void *mysql_thd; // THD pointer + my_bool in_lock_tables; // true, if inside locking session +#endif } THR_LOCK_INFO; diff --git a/mysys/default.c b/mysys/default.c index c7ac0d89462..f331668de9f 100644 --- a/mysys/default.c +++ b/mysys/default.c @@ -87,6 +87,12 @@ static char my_defaults_extra_file_buffer[FN_REFLEN]; static my_bool defaults_already_read= FALSE; +#ifdef WITH_WSREP +/* The only purpose of this global array is to hold full name of my.cnf + * which seems to be otherwise unavailable */ +char wsrep_defaults_file[FN_REFLEN + 10]={0,}; +#endif /* WITH_WREP */ + /* Which directories are searched for options (and in which order) */ #define MAX_DEFAULT_DIRS 6 @@ -803,6 +809,10 @@ static int search_default_file_with_ext(Process_option_func opt_handler, if (!(fp= mysql_file_fopen(key_file_cnf, name, O_RDONLY, MYF(0)))) return 1; /* Ignore wrong files */ +#ifdef WITH_WSREP + strncpy(wsrep_defaults_file, name, sizeof(wsrep_defaults_file) - 1); +#endif /* WITH_WSREP */ + while (mysql_file_fgets(buff, sizeof(buff) - 1, fp)) { line++; diff --git a/mysys/thr_lock.c b/mysys/thr_lock.c index e99956f9c8f..2979244962d 100644 --- a/mysys/thr_lock.c +++ b/mysys/thr_lock.c @@ -94,7 +94,24 @@ be any number of TL_WRITE_CONCURRENT_INSERT locks aktive at the same time. my_bool thr_lock_inited=0; ulong locks_immediate = 0L, locks_waited = 0L; enum thr_lock_type thr_upgraded_concurrent_insert_lock = TL_WRITE; - +#ifdef WITH_WSREP +static wsrep_thd_is_brute_force_fun wsrep_thd_is_brute_force= NULL; +static wsrep_abort_thd_fun wsrep_abort_thd= NULL; +static my_bool wsrep_debug; +static my_bool wsrep_convert_LOCK_to_trx; +static wsrep_on_fun wsrep_on = NULL; + +void wsrep_thr_lock_init( + wsrep_thd_is_brute_force_fun bf_fun, wsrep_abort_thd_fun abort_fun, + my_bool debug, my_bool convert_LOCK_to_trx, wsrep_on_fun on_fun +) { + wsrep_thd_is_brute_force = bf_fun; + wsrep_abort_thd = abort_fun; + wsrep_debug = debug; + wsrep_convert_LOCK_to_trx= convert_LOCK_to_trx; + wsrep_on = on_fun; +} +#endif /* The following constants are only for debug output */ #define MAX_THREADS 1000 #define MAX_LOCKS 1000 @@ -1147,6 +1164,108 @@ static void sort_locks(THR_LOCK_DATA **data,uint count) } } +#ifdef WITH_WSREP +/* + * If brute force applier would need to wait for a thr lock, + * it needs to make sure that it will get the lock without (too much) + * delay. + * We identify here the owners of blocking locks and ask them to + * abort. We then put our lock request in the first place in the + * wait queue. When lock holders abort (one by one) the lock release + * algorithm should grant the lock to us. We rely on this and proceed + * to wait_for_locks(). + * wsrep_break_locks() should be called in all the cases, where lock + * wait would happen. + * + * TODO: current implementation might not cover all possible lock wait + * situations. This needs an review still. + * TODO: lock release, might favor some other lock (instead our bf). + * This needs an condition to check for bf locks first. + * TODO: we still have a debug fprintf, this should be removed + */ +static inline my_bool +wsrep_break_lock( + THR_LOCK_DATA *data, struct st_lock_list *lock_queue1, + struct st_lock_list *lock_queue2, struct st_lock_list *wait_queue) +{ + if (wsrep_on(data->owner->mysql_thd) && + wsrep_thd_is_brute_force && + wsrep_thd_is_brute_force(data->owner->mysql_thd)) + { + THR_LOCK_DATA *holder; + + /* if locking session conversion to transaction has been enabled, + we know that this conflicting lock must be read lock and furthermore, + lock holder is read-only. It is safe to wait for him. + */ +#ifdef TODO + if (wsrep_convert_LOCK_to_trx && + (THD*)(data->owner->mysql_thd)->in_lock_tables) + { + if (wsrep_debug) + fprintf(stderr,"WSREP wsrep_break_lock read lock untouched\n"); + return FALSE; + } +#endif + if (wsrep_debug) + fprintf(stderr,"WSREP wsrep_break_lock aborting locks\n"); + + /* aborting lock holder(s) here */ + for (holder=(lock_queue1) ? lock_queue1->data : NULL; + holder; + holder=holder->next) + { + if (!wsrep_thd_is_brute_force(holder->owner->mysql_thd)) + { + wsrep_abort_thd(data->owner->mysql_thd, + holder->owner->mysql_thd, FALSE); + } + else + { + if (wsrep_debug) + fprintf(stderr,"WSREP wsrep_break_lock skipping BF lock conflict\n"); + return FALSE; + } + } + for (holder=(lock_queue2) ? lock_queue2->data : NULL; + holder; + holder=holder->next) + { + if (!wsrep_thd_is_brute_force(holder->owner->mysql_thd)) + { + wsrep_abort_thd(data->owner->mysql_thd, + holder->owner->mysql_thd, FALSE); + } + else + { + if (wsrep_debug) + fprintf(stderr,"WSREP wsrep_break_lock skipping BF lock conflict\n"); + return FALSE; + } + } + + /* Add our lock to the head of the wait queue */ + if (*(wait_queue->last)==wait_queue->data) + { + wait_queue->last=&data->next; + assert(wait_queue->data==0); + } + else + { + assert(wait_queue->data!=0); + wait_queue->data->prev=&data->next; + } + data->next=wait_queue->data; + data->prev=&wait_queue->data; + wait_queue->data=data; + data->cond=get_cond(); + + statistic_increment(locks_immediate,&THR_LOCK_lock); + return TRUE; + } + return FALSE; +} +#endif enum enum_thr_lock_result thr_multi_lock(THR_LOCK_DATA **data, uint count, THR_LOCK_INFO *owner, diff --git a/scripts/CMakeLists.txt b/scripts/CMakeLists.txt index a454313d966..78456be59d4 100644 --- a/scripts/CMakeLists.txt +++ b/scripts/CMakeLists.txt @@ -307,6 +307,9 @@ IF(WIN32) INSTALL_SCRIPT(${CMAKE_CURRENT_BINARY_DIR}/${file}.pl COMPONENT Server_Scripts) ENDFOREACH() ELSE() + IF(WITH_WSREP) + SET(WSREP_BINARIES wsrep_sst_mysqldump wsrep_sst_rsync) + ENDIF() # On Unix, most of the files end up in the bin directory SET(mysql_config_COMPONENT COMPONENT Development) SET(BIN_SCRIPTS @@ -324,6 +327,7 @@ ELSE() mysqldumpslow mysqld_multi mysqld_safe + ${WSREP_BINARIES} ) FOREACH(file ${BIN_SCRIPTS}) IF(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${file}.sh) diff --git a/sql/CMakeLists.txt b/sql/CMakeLists.txt index 22c51f3ce23..f74931388bb 100644 --- a/sql/CMakeLists.txt +++ b/sql/CMakeLists.txt @@ -13,6 +13,10 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +IF(WITH_WSREP) + SET(WSREP_INCLUDES ${CMAKE_SOURCE_DIR}/wsrep) +ENDIF() + INCLUDE_DIRECTORIES( ${CMAKE_SOURCE_DIR}/include ${CMAKE_SOURCE_DIR}/sql @@ -20,6 +24,7 @@ ${CMAKE_SOURCE_DIR}/regex ${ZLIB_INCLUDE_DIR} ${SSL_INCLUDE_DIRS} ${CMAKE_BINARY_DIR}/sql +${WSREP_INCLUDES} ) SET(GEN_SOURCES @@ -35,6 +40,18 @@ ADD_DEFINITIONS(-DMYSQL_SERVER -DHAVE_EVENT_SCHEDULER -DHAVE_POOL_OF_THREADS) IF(SSL_DEFINES) ADD_DEFINITIONS(${SSL_DEFINES}) ENDIF() +IF(WITH_WSREP) + SET(WSREP_SOURCES + wsrep_check_opts.cc + wsrep_hton.cc + wsrep_mysqld.cc + wsrep_notify.cc + wsrep_sst.cc + wsrep_utils.cc + wsrep_var.cc + ) + SET(WSREP_LIB wsrep) +ENDIF() SET (SQL_SOURCE ../sql-common/client.c derror.cc des_key_file.cc @@ -86,6 +103,7 @@ SET (SQL_SOURCE threadpool_common.cc ../sql-common/mysql_async.c sql_logger.cc + ${WSREP_SOURCES} ${GEN_SOURCES} ${MYSYS_LIBWRAP_SOURCE} ) @@ -105,6 +123,7 @@ DTRACE_INSTRUMENT(sql) TARGET_LINK_LIBRARIES(sql ${MYSQLD_STATIC_PLUGIN_LIBS} mysys dbug strings vio regex ${LIBWRAP} ${LIBCRYPT} ${LIBDL} + ${WSREP_LIB} ${SSL_LIBRARIES}) IF(WIN32) @@ -200,7 +219,6 @@ IF (NOT ${CMAKE_CURRENT_SOURCE_DIR} STREQUAL ${CMAKE_CURRENT_BINARY_DIR}) ENDIF() ENDIF() - INCLUDE(${CMAKE_SOURCE_DIR}/cmake/bison.cmake) RUN_BISON( ${CMAKE_CURRENT_SOURCE_DIR}/sql_yacc.yy diff --git a/sql/events.cc b/sql/events.cc index 8b4bab9e3a6..208914a4e6d 100644 --- a/sql/events.cc +++ b/sql/events.cc @@ -1145,7 +1145,19 @@ end: close_mysql_tables(thd); DBUG_RETURN(ret); } +#ifdef WITH_WSREP +int wsrep_create_event_query(THD *thd, uchar** buf, uint* buf_len) +{ + String log_query; + if (create_query_string(thd, &log_query)) + { + WSREP_WARN("events create string failed: %s", thd->query()); + return 1; + } + return wsrep_to_buf_helper(thd, log_query.ptr(), log_query.length(), buf, buf_len); +} +#endif /* WITH_WSREP */ /** @} (End of group Event_Scheduler) */ diff --git a/sql/handler.cc b/sql/handler.cc index 0310180759c..c04759b196f 100644 --- a/sql/handler.cc +++ b/sql/handler.cc @@ -52,6 +52,9 @@ #include "../storage/maria/ha_maria.h" #endif +#ifdef WITH_WSREP +#include "wsrep_mysqld.h" +#endif /* While we have legacy_db_type, we have this array to check for dups and to find handlerton from legacy_db_type. @@ -1192,7 +1195,11 @@ int ha_commit_trans(THD *thd, bool all) { /* Free resources and perform other cleanup even for 'empty' transactions. */ if (is_real_trans) - thd->transaction.cleanup(); +#ifdef WITH_WSREP + thd->transaction.cleanup(thd); +#else + thd->transaction.cleanup(); +#endif /* WITH_WSREP */ DBUG_RETURN(0); } @@ -1220,7 +1227,12 @@ int ha_commit_trans(THD *thd, bool all) mdl_request.init(MDL_key::COMMIT, "", "", MDL_INTENTION_EXCLUSIVE, MDL_EXPLICIT); +#ifdef WITH_WSREP + if (!WSREP(thd) && + thd->mdl_context.acquire_lock(&mdl_request, +#else if (thd->mdl_context.acquire_lock(&mdl_request, +#endif /* WITH_WSREP */ thd->variables.lock_wait_timeout)) { ha_rollback_trans(thd, all); @@ -1267,6 +1279,19 @@ int ha_commit_trans(THD *thd, bool all) err= ht->prepare(ht, thd, all); status_var_increment(thd->status_var.ha_prepare_count); if (err) +#ifdef WITH_WSREP + if (WSREP(thd) && ht->db_type== DB_TYPE_WSREP) + { + error= 1; + /* avoid sending error, if we need to replay */ + if (thd->wsrep_conflict_state!= MUST_REPLAY) + { + my_error(ER_LOCK_DEADLOCK, MYF(0), err); + } + } + else + /* not wsrep hton, bail to native mysql behavior */ +#endif my_error(ER_ERROR_DURING_COMMIT, MYF(0), err); if (err) @@ -1277,6 +1302,13 @@ int ha_commit_trans(THD *thd, bool all) } DBUG_EXECUTE_IF("crash_commit_after_prepare", DBUG_SUICIDE();); +#ifdef WITH_WSREP + if (!error && wsrep_is_wsrep_xid(&thd->transaction.xid_state.xid)) + { + // xid was rewritten by wsrep + xid= wsrep_xid_seqno(&thd->transaction.xid_state.xid); + } +#endif // WITH_WSREP if (!is_real_trans) { error= commit_one_phase_2(thd, all, trans, is_real_trans); @@ -1363,6 +1395,18 @@ commit_one_phase_2(THD *thd, bool all, THD_TRANS *trans, bool is_real_trans) int error= 0; Ha_trx_info *ha_info= trans->ha_list, *ha_info_next; DBUG_ENTER("commit_one_phase_2"); +#ifdef WITH_WSREP +#ifdef WSREP_PROC_INFO + char info[64]= { 0, }; + snprintf (info, sizeof(info) - 1, "ha_commit_one_phase(%lld)", + (long long)thd->wsrep_trx_seqno); +#else + const char info[]="ha_commit_one_phase()"; +#endif /* WSREP_PROC_INFO */ + char* tmp_info= NULL; + if (WSREP(thd)) tmp_info= (char *)thd_proc_info(thd, info); +#endif /* WITH_WSREP */ + if (ha_info) { for (; ha_info; ha_info= ha_info_next) @@ -1391,7 +1435,14 @@ commit_one_phase_2(THD *thd, bool all, THD_TRANS *trans, bool is_real_trans) } /* Free resources and perform other cleanup even for 'empty' transactions. */ if (is_real_trans) +#ifdef WITH_WSREP + thd->transaction.cleanup(thd); +#else thd->transaction.cleanup(); +#endif /* WITH_WSREP */ +#ifdef WITH_WSREP + if (WSREP(thd)) thd_proc_info(thd, tmp_info); +#endif /* WITH_WSREP */ DBUG_RETURN(error); } @@ -1466,7 +1517,11 @@ int ha_rollback_trans(THD *thd, bool all) } /* Always cleanup. Even if nht==0. There may be savepoints. */ if (is_real_trans) +#ifdef WITH_WSREP + thd->transaction.cleanup(thd); +#else thd->transaction.cleanup(); +#endif /* WITH_WSREP */ if (all) thd->transaction_rollback_request= FALSE; @@ -1631,7 +1686,13 @@ static my_bool xarecover_handlerton(THD *unused, plugin_ref plugin, got, hton_name(hton)->str); for (int i=0; i < got; i ++) { +#ifdef WITH_WSREP + my_xid x=(wsrep_is_wsrep_xid(&info->list[i]) ? + wsrep_xid_seqno(&info->list[i]) : + info->list[i].get_my_xid()); +#else my_xid x=info->list[i].get_my_xid(); +#endif /* WITH_WSREP */ if (!x) // not "mine" - that is generated by external TM { #ifndef DBUG_OFF @@ -2635,7 +2696,12 @@ int handler::update_auto_increment() variables->auto_increment_increment); auto_inc_intervals_count++; /* Row-based replication does not need to store intervals in binlog */ +#ifdef WITH_WSREP + if (((WSREP(thd) && wsrep_emulate_bin_log) || mysql_bin_log.is_open()) && + !thd->is_current_stmt_binlog_format_row()) +#else if (mysql_bin_log.is_open() && !thd->is_current_stmt_binlog_format_row()) +#endif /* WITH_WSREP */ thd->auto_inc_intervals_in_cur_stmt_for_binlog.append(auto_inc_interval_for_cur_row.minimum(), auto_inc_interval_for_cur_row.values(), variables->auto_increment_increment); @@ -4821,7 +4887,11 @@ static bool check_table_binlog_row_based(THD *thd, TABLE *table) return (thd->is_current_stmt_binlog_format_row() && table->s->cached_row_logging_check && (thd->variables.option_bits & OPTION_BIN_LOG) && +#ifdef WITH_WSREP + ((WSREP(thd) && wsrep_emulate_bin_log) || mysql_bin_log.is_open())); +#else mysql_bin_log.is_open()); +#endif } @@ -5142,6 +5212,41 @@ void signal_log_not_needed(struct handlerton, char *log_file) } +#ifdef WITH_WSREP +/** + @details + This function makes the storage engine to force the victim transaction + to abort. Currently, only innodb has this functionality, but any SE + implementing the wsrep API should provide this service to support + multi-master operation. + + @param bf_thd brute force THD asking for the abort + @param victim_thd victim THD to be aborted + + @return + always 0 +*/ + +int ha_wsrep_abort_transaction(THD *bf_thd, THD *victim_thd, my_bool signal) +{ + DBUG_ENTER("ha_wsrep_abort_transaction"); + if (!WSREP(bf_thd)) { + DBUG_RETURN(0); + } + + handlerton *hton= installed_htons[DB_TYPE_INNODB]; + if (hton && hton->wsrep_abort_transaction) + { + hton->wsrep_abort_transaction(hton, bf_thd, victim_thd, signal); + } + else + { + WSREP_WARN("cannot abort InnoDB transaction"); + } + + DBUG_RETURN(0); +} +#endif /* WITH_WSREP */ #ifdef TRANS_LOG_MGM_EXAMPLE_CODE /* Example of transaction log management functions based on assumption that logs diff --git a/sql/handler.h b/sql/handler.h index d56e3242ddd..4ac7202587e 100644 --- a/sql/handler.h +++ b/sql/handler.h @@ -355,6 +355,7 @@ enum legacy_db_type DB_TYPE_MARIA, /** Performance schema engine. */ DB_TYPE_PERFORMANCE_SCHEMA, + DB_TYPE_WSREP, DB_TYPE_FIRST_DYNAMIC=42, DB_TYPE_DEFAULT=127 // Must be last }; @@ -1042,6 +1043,10 @@ struct handlerton const char *wild, bool dir, List<LEX_STRING> *files); int (*table_exists_in_engine)(handlerton *hton, THD* thd, const char *db, const char *name); + int (*wsrep_abort_transaction)(handlerton *hton, THD *bf_thd, + THD *victim_thd, my_bool signal); + int (*wsrep_set_checkpoint)(handlerton *hton, const XID* xid); + int (*wsrep_get_checkpoint)(handlerton *hton, XID* xid); uint32 license; /* Flag for Engine License */ /* Optional clauses in the CREATE/ALTER TABLE @@ -2885,6 +2890,9 @@ bool key_uses_partial_cols(TABLE *table, uint keyno); extern const char *ha_row_type[]; extern MYSQL_PLUGIN_IMPORT const char *tx_isolation_names[]; extern MYSQL_PLUGIN_IMPORT const char *binlog_format_names[]; +#ifdef WITH_WSREP +extern MYSQL_PLUGIN_IMPORT const char *wsrep_binlog_format_names[]; +#endif /* WITH_WSREP */ extern TYPELIB tx_isolation_typelib; extern const char *myisam_stats_method_names[]; extern ulong total_ha, total_ha_2pc; @@ -2980,6 +2988,9 @@ int ha_enable_transaction(THD *thd, bool on); int ha_rollback_to_savepoint(THD *thd, SAVEPOINT *sv); int ha_savepoint(THD *thd, SAVEPOINT *sv); int ha_release_savepoint(THD *thd, SAVEPOINT *sv); +#ifdef WITH_WSREP +int ha_wsrep_abort_transaction(THD *bf_thd, THD *victim_thd, my_bool signal); +#endif /* WITH_WSREP */ /* these are called by storage engines */ void trans_register_ha(THD *thd, bool all, handlerton *ht); @@ -3010,6 +3021,9 @@ int ha_binlog_end(THD *thd); #define ha_binlog_wait(a) do {} while (0) #define ha_binlog_end(a) do {} while (0) #endif +#ifdef WITH_WSREP +void wsrep_brute_force_aborts(); +#endif const char *get_canonical_filename(handler *file, const char *path, char *tmp_path); diff --git a/sql/item_func.cc b/sql/item_func.cc index 92431a552c4..c56909d4335 100644 --- a/sql/item_func.cc +++ b/sql/item_func.cc @@ -2583,7 +2583,19 @@ void Item_func_rand::seed_random(Item *arg) TODO: do not do reinit 'rand' for every execute of PS/SP if args[0] is a constant. */ +#ifdef WITH_WSREP + uint32 tmp; + if (WSREP(current_thd)) + { + if (current_thd->wsrep_exec_mode==REPL_RECV) + tmp= current_thd->wsrep_rand; + else + tmp= current_thd->wsrep_rand= (uint32) arg->val_int(); + } else + tmp= (uint32) arg->val_int(); +#else uint32 tmp= (uint32) arg->val_int(); +#endif /* WITH_WSREP */ my_rnd_init(rand, (uint32) (tmp*0x10001L+55555555L), (uint32) (tmp*0x10000001L)); } diff --git a/sql/lock.cc b/sql/lock.cc index a7029548493..bbaa51dbbae 100644 --- a/sql/lock.cc +++ b/sql/lock.cc @@ -84,6 +84,10 @@ #include <hash.h> #include <assert.h> +#ifdef WITH_WSREP +#include "wsrep_mysqld.h" +#endif /* WITH_WSREP */ + /** @defgroup Locking Locking @{ @@ -314,6 +318,9 @@ bool mysql_lock_tables(THD *thd, MYSQL_LOCK *sql_lock, uint flags) /* Copy the lock data array. thr_multi_lock() reorders its contents. */ memcpy(sql_lock->locks + sql_lock->lock_count, sql_lock->locks, sql_lock->lock_count * sizeof(*sql_lock->locks)); +#ifdef WITH_WSREP + thd->lock_info.in_lock_tables= thd->in_lock_tables; +#endif /* Lock on the copied half of the lock data array. */ /* Lock on the copied half of the lock data array. */ rc= thr_lock_errno_to_mysql[(int) thr_multi_lock(sql_lock->locks + sql_lock->lock_count, @@ -323,7 +330,11 @@ bool mysql_lock_tables(THD *thd, MYSQL_LOCK *sql_lock, uint flags) (void) unlock_external(thd, sql_lock->table, sql_lock->table_count); end: +#ifdef WITH_WSREP + thd_proc_info(thd, "mysql_lock_tables(): unlocking tables II"); +#else /* WITH_WSREP */ thd_proc_info(thd, 0); +#endif /* WITH_WSREP */ if (thd->killed) { @@ -336,6 +347,9 @@ end: my_error(rc, MYF(0)); thd->set_time_after_lock(); +#ifdef WITH_WSREP + thd_proc_info(thd, "exit mysqld_lock_tables()"); +#endif /* WITH_WSREP */ DBUG_RETURN(rc); } @@ -1045,11 +1059,15 @@ void Global_read_lock::unlock_global_read_lock(THD *thd) { thd->mdl_context.release_lock(m_mdl_blocks_commits_lock); m_mdl_blocks_commits_lock= NULL; +#ifdef WITH_WSREP + wsrep_locked_seqno= WSREP_SEQNO_UNDEFINED; + wsrep->resume(wsrep); +#endif /* WITH_WSREP */ } thd->mdl_context.release_lock(m_mdl_global_shared_lock); m_mdl_global_shared_lock= NULL; m_state= GRL_NONE; - + DBUG_VOID_RETURN; } @@ -1077,9 +1095,39 @@ bool Global_read_lock::make_global_read_lock_block_commit(THD *thd) If we didn't succeed lock_global_read_lock(), or if we already suceeded make_global_read_lock_block_commit(), do nothing. */ + +#ifdef WITH_WSREP + if (m_mdl_blocks_commits_lock) + { + WSREP_DEBUG("GRL was in block commit mode when entering " + "make_global_read_lock_block_commit"); + thd->mdl_context.release_lock(m_mdl_blocks_commits_lock); + m_mdl_blocks_commits_lock= NULL; + wsrep_locked_seqno= WSREP_SEQNO_UNDEFINED; + wsrep->resume(wsrep); + m_state= GRL_ACQUIRED; + } +#endif /* WITH_WSREP */ + if (m_state != GRL_ACQUIRED) DBUG_RETURN(0); +#ifdef WITH_WSREP + long long ret = wsrep->pause(wsrep); + if (ret >= 0) + { + wsrep_locked_seqno= ret; + } + else if (ret != -ENOSYS) /* -ENOSYS - no provider */ + { + WSREP_ERROR("Failed to pause provider: %lld (%s)", -ret, strerror(-ret)); + + /* m_mdl_blocks_commits_lock is always NULL here */ + wsrep_locked_seqno= WSREP_SEQNO_UNDEFINED; + my_error(ER_LOCK_DEADLOCK, MYF(0)); + DBUG_RETURN(TRUE); + } +#endif /* WITH_WSREP */ mdl_request.init(MDL_key::COMMIT, "", "", MDL_SHARED, MDL_EXPLICIT); if (thd->mdl_context.acquire_lock(&mdl_request, diff --git a/sql/log.cc b/sql/log.cc index 577297fa1a4..ee7d548d81a 100644 --- a/sql/log.cc +++ b/sql/log.cc @@ -51,6 +51,9 @@ #include "sql_plugin.h" #include "rpl_handler.h" +#ifdef WITH_WSREP +#include "wsrep_mysqld.h" +#endif /* WITH_WSREP */ #include "debug_sync.h" /* max size of the log message */ @@ -486,6 +489,9 @@ private: }; handlerton *binlog_hton; +#ifdef WITH_WSREP +extern handlerton *wsrep_hton; +#endif bool LOGGER::is_log_table_enabled(uint log_table_type) { @@ -500,6 +506,134 @@ bool LOGGER::is_log_table_enabled(uint log_table_type) } } +#ifdef WITH_WSREP +IO_CACHE * get_trans_log(THD * thd) +{ + binlog_cache_mngr *cache_mngr = (binlog_cache_mngr*) + thd_get_ha_data(thd, binlog_hton); + if (cache_mngr) + { + return cache_mngr->get_binlog_cache_log(true); + } + else + { + WSREP_DEBUG("binlog cache not initialized, conn :%ld", thd->thread_id); + return NULL; + } +} + + +bool wsrep_trans_cache_is_empty(THD *thd) +{ + bool res= TRUE; + + if (thd_sql_command((const THD*) thd) != SQLCOM_SELECT) + res= FALSE; + else + { + binlog_cache_mngr *const cache_mngr= + (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton); + if (cache_mngr) + { + res= cache_mngr->trx_cache.empty(); + } + } + return res; +} + +void thd_binlog_flush_pending_rows_event(THD *thd, bool stmt_end) +{ + thd->binlog_flush_pending_rows_event(stmt_end); +} +void thd_binlog_trx_reset(THD * thd) +{ + /* + todo: fix autocommit select to not call the caller + */ + if (thd_get_ha_data(thd, binlog_hton) != NULL) + { + binlog_cache_mngr *const cache_mngr= + (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton); + if (cache_mngr) cache_mngr->reset(TRUE, TRUE); + } + thd->clear_binlog_table_maps(); +} + +void thd_binlog_rollback_stmt(THD * thd) +{ + WSREP_DEBUG("thd_binlog_rollback_stmt :%ld", thd->thread_id); + binlog_cache_mngr *const cache_mngr= + (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton); + if (cache_mngr) cache_mngr->trx_cache.set_prev_position(MY_OFF_T_UNDEF); +} +/* + Write the contents of a cache to memory buffer. + + This function quite the same as MYSQL_BIN_LOG::write_cache(), + with the exception that here we write in buffer instead of log file. + */ + +int wsrep_write_cache(IO_CACHE *cache, uchar **buf, uint *buf_len) +{ + + if (reinit_io_cache(cache, READ_CACHE, 0, 0, 0)) + return ER_ERROR_ON_WRITE; + uint length= my_b_bytes_in_cache(cache); + long long total_length = 0; + uchar *buf_ptr = NULL; + + do + { + /* bail out if buffer grows too large + This is a temporary fix to avoid flooding replication + TODO: remove this check for 0.7.4 release + */ + if (total_length > wsrep_max_ws_size) + { + WSREP_WARN("transaction size limit (%lld) exceeded: %lld", + wsrep_max_ws_size, total_length); + if (reinit_io_cache(cache, WRITE_CACHE, 0, 0, 0)) + { + WSREP_WARN("failed to initialize io-cache"); + } + if (buf_ptr) my_free(*buf); + *buf_len = 0; + return ER_ERROR_ON_WRITE; + } + if (total_length > 0) + { + *buf_len += length; + *buf = (uchar *)my_realloc(*buf, total_length+length, MYF(0)); + if (!*buf) + { + WSREP_ERROR("io cache write problem: %d %d", *buf_len, length); + return ER_ERROR_ON_WRITE; + } + buf_ptr = *buf+total_length; + } + else + { + if (buf_ptr != NULL) + { + WSREP_ERROR("io cache alloc error: %d %d", *buf_len, length); + my_free(*buf); + } + if (length > 0) + { + *buf = (uchar *) my_malloc(length, MYF(0)); + buf_ptr = *buf; + *buf_len = length; + } + } + total_length += length; + + memcpy(buf_ptr, cache->read_pos, length); + cache->read_pos=cache->read_end; + } while ((cache->file >= 0) && (length= my_b_fill(cache))); + + return 0; +} +#endif /* Check if a given table is opened log table */ int check_if_log_table(size_t db_len, const char *db, size_t table_name_len, @@ -1536,7 +1670,11 @@ binlog_trans_log_savepos(THD *thd, my_off_t *pos) thd->binlog_setup_trx_data(); binlog_cache_mngr *const cache_mngr= (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton); +#ifdef WITH_WSREP + DBUG_ASSERT((WSREP(thd) && wsrep_emulate_bin_log) || mysql_bin_log.is_open()); +#else DBUG_ASSERT(mysql_bin_log.is_open()); +#endif *pos= cache_mngr->trx_cache.get_byte_position(); DBUG_PRINT("return", ("*pos: %lu", (ulong) *pos)); DBUG_VOID_RETURN; @@ -1584,7 +1722,16 @@ binlog_trans_log_truncate(THD *thd, my_off_t pos) int binlog_init(void *p) { binlog_hton= (handlerton *)p; +#ifdef WITH_WSREP + if (WSREP_ON) + binlog_hton->state= SHOW_OPTION_YES; + else + { +#endif /* WITH_WSREP */ binlog_hton->state=opt_bin_log ? SHOW_OPTION_YES : SHOW_OPTION_NO; +#ifdef WITH_WSREP + } +#endif /* WITH_WSREP */ binlog_hton->db_type=DB_TYPE_BINLOG; binlog_hton->savepoint_offset= sizeof(my_off_t); binlog_hton->close_connection= binlog_close_connection; @@ -1840,6 +1987,9 @@ static int binlog_commit(handlerton *hton, THD *thd, bool all) DBUG_ENTER("binlog_commit"); binlog_cache_mngr *const cache_mngr= (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton); +#ifdef WITH_WSREP + if (!cache_mngr) DBUG_RETURN(0); +#endif /* WITH_WSREP */ DBUG_PRINT("debug", ("all: %d, in_transaction: %s, all.modified_non_trans_table: %s, stmt.modified_non_trans_table: %s", @@ -1896,6 +2046,9 @@ static int binlog_rollback(handlerton *hton, THD *thd, bool all) int error= 0; binlog_cache_mngr *const cache_mngr= (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton); +#ifdef WITH_WSREP + if (!cache_mngr) DBUG_RETURN(0); +#endif /* WITH_WSREP */ DBUG_PRINT("debug", ("all: %s, all.modified_non_trans_table: %s, stmt.modified_non_trans_table: %s", YESNO(all), @@ -1924,8 +2077,12 @@ static int binlog_rollback(handlerton *hton, THD *thd, bool all) cache_mngr->reset(false, true); DBUG_RETURN(error); } - +#ifdef WITH_WSREP + if (!wsrep_emulate_bin_log && + mysql_bin_log.check_write_error(thd)) +#else if (mysql_bin_log.check_write_error(thd)) +#endif { /* "all == true" means that a "rollback statement" triggered the error and @@ -1955,12 +2112,12 @@ static int binlog_rollback(handlerton *hton, THD *thd, bool all) if (ending_trans(thd, all) && ((thd->variables.option_bits & OPTION_KEEP_LOG) || (trans_has_updated_non_trans_table(thd) && - thd->variables.binlog_format == BINLOG_FORMAT_STMT) || + WSREP_FORMAT(thd->variables.binlog_format) == BINLOG_FORMAT_STMT) || (cache_mngr->trx_cache.changes_to_non_trans_temp_table() && - thd->variables.binlog_format == BINLOG_FORMAT_MIXED) || + WSREP_FORMAT(thd->variables.binlog_format) == BINLOG_FORMAT_MIXED) || (trans_has_updated_non_trans_table(thd) && ending_single_stmt_trans(thd,all) && - thd->variables.binlog_format == BINLOG_FORMAT_MIXED))) + WSREP_FORMAT(thd->variables.binlog_format) == BINLOG_FORMAT_MIXED))) error= binlog_rollback_flush_trx_cache(thd, all, cache_mngr); /* Truncate the cache if: @@ -1974,9 +2131,9 @@ static int binlog_rollback(handlerton *hton, THD *thd, bool all) else if (ending_trans(thd, all) || (!(thd->variables.option_bits & OPTION_KEEP_LOG) && (!stmt_has_updated_non_trans_table(thd) || - thd->variables.binlog_format != BINLOG_FORMAT_STMT) && + WSREP_FORMAT(thd->variables.binlog_format) != BINLOG_FORMAT_STMT) && (!cache_mngr->trx_cache.changes_to_non_trans_temp_table() || - thd->variables.binlog_format != BINLOG_FORMAT_MIXED))) + WSREP_FORMAT(thd->variables.binlog_format) != BINLOG_FORMAT_MIXED))) error= binlog_truncate_trx_cache(thd, cache_mngr, all); } @@ -2070,7 +2227,9 @@ static int binlog_savepoint_set(handlerton *hton, THD *thd, void *sv) binlog_trans_log_savepos(thd, (my_off_t*) sv); /* Write it to the binary log */ - +#ifdef WITH_WSREP + if (wsrep_emulate_bin_log) DBUG_RETURN(0); +#endif /* WITH_WSREP */ String log_query; if (log_query.append(STRING_WITH_LEN("SAVEPOINT ")) || log_query.append("`") || @@ -2092,7 +2251,12 @@ static int binlog_savepoint_rollback(handlerton *hton, THD *thd, void *sv) non-transactional table. Otherwise, truncate the binlog cache starting from the SAVEPOINT command. */ +#ifdef WITH_WSREP + if (!wsrep_emulate_bin_log && + unlikely(trans_has_updated_non_trans_table(thd) || +#else if (unlikely(trans_has_updated_non_trans_table(thd) || +#endif (thd->variables.option_bits & OPTION_KEEP_LOG))) { String log_query; @@ -4720,6 +4884,7 @@ int THD::binlog_setup_trx_data() DBUG_RETURN(0); } + /* Function to start a statement and optionally a transaction for the binary log. @@ -4839,7 +5004,12 @@ int THD::binlog_write_table_map(TABLE *table, bool is_transactional, table->s->table_map_id)); /* Pre-conditions */ +#ifdef WITH_WSREP + DBUG_ASSERT(is_current_stmt_binlog_format_row() && + (WSREP_EMULATE_BINLOG(this) || mysql_bin_log.is_open())); +#else DBUG_ASSERT(is_current_stmt_binlog_format_row() && mysql_bin_log.is_open()); +#endif DBUG_ASSERT(table->s->table_map_id != ULONG_MAX); Table_map_log_event @@ -4976,7 +5146,11 @@ MYSQL_BIN_LOG::flush_and_set_pending_rows_event(THD *thd, bool is_transactional) { DBUG_ENTER("MYSQL_BIN_LOG::flush_and_set_pending_rows_event(event)"); +#ifdef WITH_WSREP + DBUG_ASSERT(WSREP_EMULATE_BINLOG(thd) || mysql_bin_log.is_open()); +#else DBUG_ASSERT(mysql_bin_log.is_open()); +#endif DBUG_PRINT("enter", ("event: 0x%lx", (long) event)); int error= 0; @@ -5056,7 +5230,11 @@ bool MYSQL_BIN_LOG::write(Log_event *event_info, my_bool *with_annotate) mostly called if is_open() *was* true a few instructions before, but it could have changed since. */ +#ifdef WITH_WSREP + if ((WSREP(thd) && wsrep_emulate_bin_log) || is_open()) +#else if (likely(is_open())) +#endif { my_off_t UNINIT_VAR(my_org_b_tell); #ifdef HAVE_REPLICATION @@ -5237,6 +5415,35 @@ err: } } +#ifdef WITH_WSREP + if (WSREP(thd) && wsrep_incremental_data_collection && + (wsrep_emulate_bin_log || mysql_bin_log.is_open())) + { + DBUG_ASSERT(thd->wsrep_trx_handle.trx_id != (unsigned long)-1); + if (!error) + { + IO_CACHE* cache= get_trans_log(thd); + uchar* buf= NULL; + uint buf_len= 0; + + if (wsrep_emulate_bin_log) + thd->binlog_flush_pending_rows_event(false); + error= wsrep_write_cache(cache, &buf, &buf_len); + if (!error && buf_len > 0) + { + wsrep_status_t rc= wsrep->append_data(wsrep, + &thd->wsrep_trx_handle, + buf, buf_len); + if (rc != WSREP_OK) + { + sql_print_warning("WSREP: append_data() returned %d", rc); + error= 1; + } + } + if (buf_len) my_free(buf); + } + } +#endif /* WITH_WSREP */ DBUG_RETURN(error); } @@ -5329,6 +5536,14 @@ int MYSQL_BIN_LOG::rotate(bool force_rotate, bool* check_purge) { int error= 0; DBUG_ENTER("MYSQL_BIN_LOG::rotate"); +#ifdef WITH_WSREP + if (WSREP_ON && wsrep_to_isolation) + { + WSREP_DEBUG("avoiding binlog rotate due to TO isolation: %d", + wsrep_to_isolation); + DBUG_RETURN(0); + } +#endif //todo: fix the macro def and restore safe_mutex_assert_owner(&LOCK_log); *check_purge= false; @@ -5797,6 +6012,9 @@ MYSQL_BIN_LOG::write_transaction_to_binlog(THD *thd, group_commit_entry entry; DBUG_ENTER("MYSQL_BIN_LOG::write_transaction_to_binlog"); +#ifdef WITH_WSREP + if (wsrep_emulate_bin_log) DBUG_RETURN(0); +#endif /* WITH_WSREP */ entry.thd= thd; entry.cache_mngr= cache_mngr; entry.error= 0; @@ -7426,7 +7644,13 @@ TC_LOG_BINLOG::log_and_order(THD *thd, my_xid xid, bool all, binlog_cache_mngr *cache_mngr= (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton); - +#ifdef WITH_WSREP + if (!cache_mngr) + { + WSREP_DEBUG("Skipping empty log_xid: %s", thd->query()); + DBUG_RETURN(1); + } +#endif /* WITH_WSREP */ cache_mngr->using_xa= TRUE; cache_mngr->xa_xid= xid; err= binlog_commit_flush_xid_caches(thd, cache_mngr, all, xid); diff --git a/sql/log.h b/sql/log.h index e8f59801683..2bc4d0e49d7 100644 --- a/sql/log.h +++ b/sql/log.h @@ -94,7 +94,7 @@ public: int log_and_order(THD *thd, my_xid xid, bool all, bool need_prepare_ordered, bool need_commit_ordered) { - DBUG_ASSERT(0 /* Internal error - TC_LOG_DUMMY::log_and_order() called */); + //DBUG_ASSERT(0 /* Internal error - TC_LOG_DUMMY::log_and_order() called */); return 1; } int unlog(ulong cookie, my_xid xid) { return 0; } @@ -267,6 +267,12 @@ enum enum_log_state { LOG_OPENED, LOG_CLOSED, LOG_TO_BE_OPENED }; (mmap+fsync is two times faster than write+fsync) */ +#ifdef WITH_WSREP +extern my_bool wsrep_emulate_bin_log; +Log_event* wsrep_read_log_event( + char **arg_buf, size_t *arg_buf_len, + const Format_description_log_event *description_event); +#endif class MYSQL_LOG { public: @@ -846,12 +852,30 @@ public: }; enum enum_binlog_format { + /* + statement-based except for cases where only row-based can work (UUID() + etc): + */ BINLOG_FORMAT_MIXED= 0, ///< statement if safe, otherwise row - autodetected BINLOG_FORMAT_STMT= 1, ///< statement-based BINLOG_FORMAT_ROW= 2, ///< row-based BINLOG_FORMAT_UNSPEC=3 ///< thd_binlog_format() returns it when binlog is closed }; +#ifdef WITH_WSREP +IO_CACHE * get_trans_log(THD * thd); +bool wsrep_trans_cache_is_empty(THD *thd); +void thd_binlog_flush_pending_rows_event(THD *thd, bool stmt_end); +void thd_binlog_trx_reset(THD * thd); +void thd_binlog_rollback_stmt(THD * thd); +int wsrep_write_cache(IO_CACHE *cache, uchar **buf, uint *buf_len); + +#define WSREP_FORMAT(my_format) \ + ((wsrep_forced_binlog_format != BINLOG_FORMAT_UNSPEC) ? \ + wsrep_forced_binlog_format : my_format) +#else +#define WSREP_FORMAT(my_format) my_format +#endif int query_error_code(THD *thd, bool not_killed); uint purge_log_get_error_code(int res); diff --git a/sql/log_event.cc b/sql/log_event.cc index cec0785a088..05340fff03a 100644 --- a/sql/log_event.cc +++ b/sql/log_event.cc @@ -46,6 +46,9 @@ #include "transaction.h" #include <my_dir.h> +#if WITH_WSREP +#include "wsrep_mysqld.h" +#endif #endif /* MYSQL_CLIENT */ #include <base64.h> @@ -2769,7 +2772,9 @@ Query_log_event::Query_log_event(THD* thd_arg, const char* query_arg, master_data_written(0) { time_t end_time; - +#ifdef WITH_WSREP + thd->wsrep_PA_safe= false; +#endif /* WITH_WSREP */ memset(&user, 0, sizeof(user)); memset(&host, 0, sizeof(host)); @@ -6983,7 +6988,14 @@ err: end_io_cache(&file); if (fd >= 0) mysql_file_close(fd, MYF(0)); +#ifdef WITH_WSREP + if (WSREP(thd)) + thd_proc_info(thd, "exit Create_file_log_event::do_apply_event()"); + else + thd_proc_info(thd, 0); +#else /* WITH_WSREP */ thd_proc_info(thd, 0); +#endif /* WITH_WSREP */ return error != 0; } #endif /* defined(HAVE_REPLICATION) && !defined(MYSQL_CLIENT) */ @@ -7154,7 +7166,14 @@ int Append_block_log_event::do_apply_event(Relay_log_info const *rli) err: if (fd >= 0) mysql_file_close(fd, MYF(0)); +#ifdef WITH_WSREP + if (WSREP(thd)) + thd_proc_info(thd, "exit Append_block_log_event::do_apply_event()"); + else + thd_proc_info(thd, 0); +#else /* WITH_WSREP */ thd_proc_info(thd, 0); +#endif /* WITH_WSREP */ DBUG_RETURN(error); } #endif @@ -8097,7 +8116,17 @@ int Rows_log_event::do_apply_event(Relay_log_info const *rli) if (open_and_lock_tables(thd, rli->tables_to_lock, FALSE, 0)) { +#ifdef WITH_WSREP + uint actual_error= ER_SERVER_SHUTDOWN; + if (WSREP(thd) && !thd->is_fatal_error) + { + sql_print_information("WSREP, BF applier interrupted in log_event.cc"); + } + else + actual_error= thd->stmt_da->sql_errno(); +#else uint actual_error= thd->stmt_da->sql_errno(); +#endif if (thd->is_slave_error || thd->is_fatal_error) { /* @@ -9842,8 +9871,23 @@ int Write_rows_log_event::do_exec_row(const Relay_log_info *const rli) { DBUG_ASSERT(m_table != NULL); +#ifdef WITH_WSREP +#ifdef WSREP_PROC_INFO + char info[64]; + info[sizeof(info) - 1] = '\0'; + snprintf(info, sizeof(info) - 1, "Write_rows_log_event::write_row(%lld)", + (long long) thd->wsrep_trx_seqno); + const char* tmp = (WSREP(thd)) ? thd_proc_info(thd, info) : NULL; +#else + const char* tmp = (WSREP(thd)) ? + thd_proc_info(thd,"Write_rows_log_event::write_row()") : NULL; +#endif /* WSREP_PROC_INFO */ +#endif /* WITH_WSREP */ int error= write_row(rli, slave_exec_mode == SLAVE_EXEC_MODE_IDEMPOTENT); +#ifdef WITH_WSREP + if (WSREP(thd)) thd_proc_info(thd, tmp); +#endif /* WITH_WSREP */ if (error && !thd->is_error()) { DBUG_ASSERT(0); @@ -10496,14 +10540,39 @@ int Delete_rows_log_event::do_exec_row(const Relay_log_info *const rli) int error; DBUG_ASSERT(m_table != NULL); +#ifdef WITH_WSREP +#ifdef WSREP_PROC_INFO + char info[64]; + info[sizeof(info) - 1] = '\0'; + snprintf(info, sizeof(info) - 1, "Delete_rows_log_event::find_row(%lld)", + (long long) thd->wsrep_trx_seqno); + const char* tmp = (WSREP(thd)) ? thd_proc_info(thd, info) : NULL; +#else + const char* tmp = (WSREP(thd)) ? + thd_proc_info(thd,"Delete_rows_log_event::find_row()") : NULL; +#endif /* WSREP_PROC_INFO */ +#endif /* WITH_WSREP */ if (!(error= find_row(rli))) { /* Delete the record found, located in record[0] */ +#ifdef WITH_WSREP +#ifdef WSREP_PROC_INFO + snprintf(info, sizeof(info) - 1, + "Delete_rows_log_event::ha_delete_row(%lld)", + (long long) thd->wsrep_trx_seqno); + if (WSREP(thd)) thd_proc_info(thd, info); +#else + if (WSREP(thd)) thd_proc_info(thd,"Delete_rows_log_event::ha_delete_row()"); +#endif /* WSREP_PROC_INFO */ +#endif /* WITH_WSREP */ error= m_table->file->ha_delete_row(m_table->record[0]); m_table->file->ha_index_or_rnd_end(); } +#ifdef WITH_WSREP + if (WSREP(thd)) thd_proc_info(thd, tmp); +#endif /* WITH_WSREP */ return error; } @@ -10617,6 +10686,18 @@ Update_rows_log_event::do_exec_row(const Relay_log_info *const rli) { DBUG_ASSERT(m_table != NULL); +#ifdef WITH_WSREP +#ifdef WSREP_PROC_INFO + char info[64]; + info[sizeof(info) - 1] = '\0'; + snprintf(info, sizeof(info) - 1, "Update_rows_log_event::find_row(%lld)", + (long long) thd->wsrep_trx_seqno); + const char* tmp = (WSREP(thd)) ? thd_proc_info(thd, info) : NULL; +#else + const char* tmp = (WSREP(thd)) ? + thd_proc_info(thd,"Update_rows_log_event::find_row()") : NULL; +#endif /* WSREP_PROC_INFO */ +#endif /* WITH_WSREP */ int error= find_row(rli); if (error) { @@ -10643,6 +10724,17 @@ Update_rows_log_event::do_exec_row(const Relay_log_info *const rli) store_record(m_table,record[1]); m_curr_row= m_curr_row_end; +#ifdef WITH_WSREP +#ifdef WSREP_PROC_INFO + snprintf(info, sizeof(info) - 1, + "Update_rows_log_event::unpack_current_row(%lld)", + (long long) thd->wsrep_trx_seqno); + if (WSREP(thd)) thd_proc_info(thd, info); +#else + if (WSREP(thd)) + thd_proc_info(thd,"Update_rows_log_event::unpack_current_row()"); +#endif /* WSREP_PROC_INFO */ +#endif /* WITH_WSREP */ /* this also updates m_curr_row_end */ if ((error= unpack_current_row(rli))) goto err; @@ -10661,10 +10753,23 @@ Update_rows_log_event::do_exec_row(const Relay_log_info *const rli) DBUG_DUMP("new values", m_table->record[0], m_table->s->reclength); #endif +#ifdef WITH_WSREP +#ifdef WSREP_PROC_INFO + snprintf(info, sizeof(info) - 1, + "Update_rows_log_event::ha_update_row(%lld)", + (long long) thd->wsrep_trx_seqno); + if (WSREP(thd)) thd_proc_info(thd, info); +#else + if (WSREP(thd)) thd_proc_info(thd,"Update_rows_log_event::ha_update_row()"); +#endif /* WSREP_PROC_INFO */ +#endif /* WITH_WSREP */ error= m_table->file->ha_update_row(m_table->record[1], m_table->record[0]); if (error == HA_ERR_RECORD_IS_THE_SAME) error= 0; +#ifdef WITH_WSREP + if (WSREP(thd)) thd_proc_info(thd, tmp); +#endif /* WITH_WSREP */ err: m_table->file->ha_index_or_rnd_end(); return error; @@ -10749,6 +10854,50 @@ void Incident_log_event::pack_info(Protocol *protocol) protocol->store(buf, bytes, &my_charset_bin); } #endif +#if WITH_WSREP && !defined(MYSQL_CLIENT) +Format_description_log_event *wsrep_format_desc; // TODO: free them at the end +/* + read the first event from (*buf). The size of the (*buf) is (*buf_len). + At the end (*buf) is shitfed to point to the following event or NULL and + (*buf_len) will be changed to account just being read bytes of the 1st event. +*/ +Log_event* wsrep_read_log_event( + char **arg_buf, size_t *arg_buf_len, + const Format_description_log_event *description_event) +{ + DBUG_ENTER("wsrep_read_log_event"); + char *head= (*arg_buf); + + uint data_len = uint4korr(head + EVENT_LEN_OFFSET); + char *buf= (*arg_buf); + const char *error= 0; + Log_event *res= 0; +#ifndef max_allowed_packet + THD *thd=current_thd; + uint max_allowed_packet= thd ? thd->variables.max_allowed_packet : ~(ulong)0; +#endif + + if (data_len > max_allowed_packet) + { + error = "Event too big"; + goto err; + } + + res= Log_event::read_log_event(buf, data_len, &error, description_event, FALSE); + +err: + if (!res) + { + DBUG_ASSERT(error != 0); + sql_print_error("Error in Log_event::read_log_event(): " + "'%s', data_len: %d, event_type: %d", + error,data_len,head[EVENT_TYPE_OFFSET]); + } + (*arg_buf)+= data_len; + (*arg_buf_len)-= data_len; + DBUG_RETURN(res); +} +#endif #ifdef MYSQL_CLIENT diff --git a/sql/mdl.cc b/sql/mdl.cc index ca552a540b9..8ff420e4f50 100644 --- a/sql/mdl.cc +++ b/sql/mdl.cc @@ -20,7 +20,17 @@ #include <mysqld_error.h> #include <mysql/plugin.h> #include <mysql/service_thd_wait.h> - +#ifdef WITH_WSREP +#include "wsrep_mysqld.h" +extern "C" my_thread_id wsrep_thd_thread_id(THD *thd); +extern "C" char *wsrep_thd_query(THD *thd); +void sql_print_information(const char *format, ...) + ATTRIBUTE_FORMAT(printf, 1, 2); + +extern bool +wsrep_grant_mdl_exception(MDL_context *requestor_ctx, + MDL_ticket *ticket); +#endif /* WITH_WSREP */ #ifdef HAVE_PSI_INTERFACE static PSI_mutex_key key_MDL_map_mutex; static PSI_mutex_key key_MDL_wait_LOCK_wait_status; @@ -1222,11 +1232,54 @@ void MDL_lock::Ticket_list::add_ticket(MDL_ticket *ticket) called by other threads. */ DBUG_ASSERT(ticket->get_lock()); +#ifdef WITH_WSREP + if ((this == &(ticket->get_lock()->m_waiting)) && + wsrep_thd_is_brute_force((void *)(ticket->get_ctx()->get_thd()))) + { + Ticket_iterator itw(ticket->get_lock()->m_waiting); + Ticket_iterator itg(ticket->get_lock()->m_granted); + + MDL_ticket *waiting, *granted; + MDL_ticket *prev=NULL; + bool added= false; + + while ((waiting= itw++) && !added) + { + if (!wsrep_thd_is_brute_force((void *)(waiting->get_ctx()->get_thd()))) + { + WSREP_DEBUG("MDL add_ticket inserted before: %lu %s", + wsrep_thd_thread_id(waiting->get_ctx()->get_thd()), + wsrep_thd_query(waiting->get_ctx()->get_thd())); + m_list.insert_after(prev, ticket); + added= true; + } + prev= waiting; + } + if (!added) m_list.push_back(ticket); + + while ((granted= itg++)) + { + if (granted->get_ctx() != ticket->get_ctx() && + granted->is_incompatible_when_granted(ticket->get_type())) + { + if (!wsrep_grant_mdl_exception(ticket->get_ctx(), granted)) + { + WSREP_DEBUG("MDL victim killed at add_ticket"); + } + } + } + } + else + { +#endif /* WITH_WSREP */ /* Add ticket to the *back* of the queue to ensure fairness among requests with the same priority. */ m_list.push_back(ticket); +#ifdef WITH_WSREP + } +#endif /* WITH_WSREP */ m_bitmap|= MDL_BIT(ticket->get_type()); } @@ -1463,7 +1516,6 @@ MDL_object_lock::m_waiting_incompatible[MDL_TYPE_END] = 0 }; - /** Check if request for the metadata lock can be satisfied given its current state. @@ -1486,6 +1538,9 @@ MDL_lock::can_grant_lock(enum_mdl_type type_arg, bool can_grant= FALSE; bitmap_t waiting_incompat_map= incompatible_waiting_types_bitmap()[type_arg]; bitmap_t granted_incompat_map= incompatible_granted_types_bitmap()[type_arg]; +#ifdef WITH_WSREP + bool wsrep_can_grant= TRUE; +#endif /* WITH_WSREP */ /* New lock request can be satisfied iff: - There are no incompatible types of satisfied requests @@ -1507,12 +1562,51 @@ MDL_lock::can_grant_lock(enum_mdl_type type_arg, { if (ticket->get_ctx() != requestor_ctx && ticket->is_incompatible_when_granted(type_arg)) +#ifdef WITH_WSREP + { + if (wsrep_thd_is_brute_force((void *)(requestor_ctx->get_thd())) && + key.mdl_namespace() == MDL_key::GLOBAL) + { + WSREP_DEBUG("global lock granted for BF: %lu %s", + wsrep_thd_thread_id(requestor_ctx->get_thd()), + wsrep_thd_query(requestor_ctx->get_thd())); + can_grant = true; + } + else if (!wsrep_grant_mdl_exception(requestor_ctx, ticket)) + { + wsrep_can_grant= FALSE; + } + else + { + can_grant= TRUE; + } + } +#else break; +#endif /* WITH_WSREP */ } +#ifdef WITH_WSREP + if ((ticket == NULL) && wsrep_can_grant) +#else if (ticket == NULL) /* Incompatible locks are our own. */ +#endif /* WITH_WSREP */ + can_grant= TRUE; } } +#ifdef WITH_WSREP + else + { + if (wsrep_thd_is_brute_force((void *)(requestor_ctx->get_thd())) && + key.mdl_namespace() == MDL_key::GLOBAL) + { + WSREP_DEBUG("global lock granted for BF (waiting queue): %lu %s", + wsrep_thd_thread_id(requestor_ctx->get_thd()), + wsrep_thd_query(requestor_ctx->get_thd())); + can_grant = true; + } + } +#endif /* WITH_WSREP */ return can_grant; } diff --git a/sql/mysqld.cc b/sql/mysqld.cc index 7394ce5c931..2251663c94a 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -72,6 +72,10 @@ #include "scheduler.h" #include <waiting_threads.h> #include "debug_sync.h" +#ifdef WITH_WSREP +#include "wsrep_mysqld.h" +ulong wsrep_running_threads = 0; // # of currently running wsrep threads +#endif #include "sql_callback.h" #include "threadpool.h" @@ -360,6 +364,9 @@ static DYNAMIC_ARRAY all_options; /* Global variables */ +#ifdef WITH_WSREP +ulong my_bind_addr; +#endif /* WITH_WSREP */ bool opt_bin_log, opt_ignore_builtin_innodb= 0; my_bool opt_log, opt_slow_log, debug_assert_if_crashed_table= 0, opt_help= 0, opt_abort; ulonglong log_output_options; @@ -452,6 +459,10 @@ ulong opt_binlog_rows_event_max_size; my_bool opt_master_verify_checksum= 0; my_bool opt_slave_sql_verify_checksum= 1; const char *binlog_format_names[]= {"MIXED", "STATEMENT", "ROW", NullS}; +#ifdef WITH_WSREP +const char *wsrep_binlog_format_names[]= + {"MIXED", "STATEMENT", "ROW", "NONE", NullS}; +#endif /*WITH_WSREP */ #ifdef HAVE_INITGROUPS volatile sig_atomic_t calling_initgroups= 0; /**< Used in SIGSEGV handler. */ #endif @@ -671,6 +682,21 @@ pthread_attr_t connection_attrib; mysql_mutex_t LOCK_server_started; mysql_cond_t COND_server_started; +#ifdef WITH_WSREP +mysql_mutex_t LOCK_wsrep_ready; +mysql_cond_t COND_wsrep_ready; +mysql_mutex_t LOCK_wsrep_sst; +mysql_cond_t COND_wsrep_sst; +mysql_mutex_t LOCK_wsrep_sst_init; +mysql_cond_t COND_wsrep_sst_init; +mysql_mutex_t LOCK_wsrep_rollback; +mysql_cond_t COND_wsrep_rollback; +wsrep_aborting_thd_t wsrep_aborting_thd= NULL; +mysql_mutex_t LOCK_wsrep_replaying; +mysql_cond_t COND_wsrep_replaying; +int wsrep_replaying= 0; +static void wsrep_close_threads(THD* thd); +#endif int mysqld_server_started= 0; File_parser_dummy_hook file_parser_dummy_hook; @@ -740,6 +766,11 @@ PSI_mutex_key key_BINLOG_LOCK_index, key_BINLOG_LOCK_prep_xids, key_structure_guard_mutex, key_TABLE_SHARE_LOCK_ha_data, key_LOCK_error_messages, key_LOG_INFO_lock, key_LOCK_thread_count, key_PARTITION_LOCK_auto_inc; +#ifdef WITH_WSREP +PSI_mutex_key key_LOCK_wsrep_rollback, key_LOCK_wsrep_thd, + key_LOCK_wsrep_replaying, key_LOCK_wsrep_ready, key_LOCK_wsrep_sst, + key_LOCK_wsrep_sst_thread, key_LOCK_wsrep_sst_init; +#endif PSI_mutex_key key_RELAYLOG_LOCK_index; PSI_mutex_key key_LOCK_stats, @@ -810,6 +841,16 @@ static PSI_mutex_info all_server_mutexes[]= { &key_LOCK_thread_count, "LOCK_thread_count", PSI_FLAG_GLOBAL}, { &key_LOCK_logger_service, "logger_service_file_st::lock", PSI_FLAG_GLOBAL}, +#ifdef WITH_WSREP + { &key_LOCK_wsrep_ready, "LOCK_wsrep_ready", PSI_FLAG_GLOBAL}, + { &key_LOCK_wsrep_sst, "LOCK_wsrep_sst", PSI_FLAG_GLOBAL}, + { &key_LOCK_wsrep_sst_thread, "wsrep_sst_thread", 0}, + { &key_LOCK_wsrep_sst_init, "LOCK_wsrep_sst_init", PSI_FLAG_GLOBAL}, + { &key_LOCK_wsrep_sst, "LOCK_wsrep_sst", PSI_FLAG_GLOBAL}, + { &key_LOCK_wsrep_rollback, "LOCK_wsrep_rollback", PSI_FLAG_GLOBAL}, + { &key_LOCK_wsrep_thd, "THD::LOCK_wsrep_thd", 0}, + { &key_LOCK_wsrep_replaying, "LOCK_wsrep_replaying", PSI_FLAG_GLOBAL}, +#endif { &key_PARTITION_LOCK_auto_inc, "HA_DATA_PARTITION::LOCK_auto_inc", 0} }; @@ -847,6 +888,11 @@ PSI_cond_key key_BINLOG_COND_prep_xids, key_BINLOG_update_cond, key_TABLE_SHARE_cond, key_user_level_lock_cond, key_COND_thread_count, key_COND_thread_cache, key_COND_flush_thread_cache, key_BINLOG_COND_queue_busy; +#ifdef WITH_WSREP +PSI_cond_key key_COND_wsrep_rollback, key_COND_wsrep_thd, + key_COND_wsrep_replaying, key_COND_wsrep_ready, key_COND_wsrep_sst, + key_COND_wsrep_sst_init, key_COND_wsrep_sst_thread; +#endif /* WITH_WSREP */ PSI_cond_key key_RELAYLOG_update_cond, key_COND_wakeup_ready; PSI_cond_key key_RELAYLOG_COND_queue_busy; PSI_cond_key key_TC_LOG_MMAP_COND_queue_busy; @@ -888,6 +934,15 @@ static PSI_cond_info all_server_conds[]= { &key_user_level_lock_cond, "User_level_lock::cond", 0}, { &key_COND_thread_count, "COND_thread_count", PSI_FLAG_GLOBAL}, { &key_COND_thread_cache, "COND_thread_cache", PSI_FLAG_GLOBAL}, +#ifdef WITH_WSREP + { &key_COND_wsrep_ready, "COND_wsrep_ready", PSI_FLAG_GLOBAL}, + { &key_COND_wsrep_sst, "COND_wsrep_sst", PSI_FLAG_GLOBAL}, + { &key_COND_wsrep_sst_init, "COND_wsrep_sst_init", PSI_FLAG_GLOBAL}, + { &key_COND_wsrep_sst_thread, "wsrep_sst_thread", 0}, + { &key_COND_wsrep_rollback, "COND_wsrep_rollback", PSI_FLAG_GLOBAL}, + { &key_COND_wsrep_thd, "THD::COND_wsrep_thd", 0}, + { &key_COND_wsrep_replaying, "COND_wsrep_replaying", PSI_FLAG_GLOBAL}, +#endif { &key_COND_flush_thread_cache, "COND_flush_thread_cache", PSI_FLAG_GLOBAL} }; @@ -1413,6 +1468,11 @@ static void close_connections(void) if (tmp->slave_thread) continue; +#ifdef WITH_WSREP + /* skip wsrep system threads as well */ + if (WSREP(tmp) && (tmp->wsrep_exec_mode==REPL_RECV || tmp->wsrep_applier)) + continue; +#endif tmp->killed= KILL_SERVER_HARD; MYSQL_CALLBACK(thread_scheduler, post_kill_notification, (tmp)); mysql_mutex_lock(&tmp->LOCK_thd_data); @@ -1476,6 +1536,33 @@ static void close_connections(void) close_connection(tmp,ER_SERVER_SHUTDOWN); } #endif +#ifdef WITH_WSREP + /* + * TODO: this code block may turn out redundant. wsrep->disconnect() + * should terminate slave threads gracefully, and we don't need + * to signal them here. + * The code here makes sure mysqld will not hang during shutdown + * even if wsrep provider has problems in shutting down. + */ + if (WSREP(tmp) && tmp->wsrep_exec_mode==REPL_RECV) + { + sql_print_information("closing wsrep system thread"); + tmp->killed= KILL_CONNECTION; + MYSQL_CALLBACK(thread_scheduler, post_kill_notification, (tmp)); + if (tmp->mysys_var) + { + tmp->mysys_var->abort=1; + mysql_mutex_lock(&tmp->mysys_var->mutex); + if (tmp->mysys_var->current_cond) + { + mysql_mutex_lock(tmp->mysys_var->current_mutex); + mysql_cond_broadcast(tmp->mysys_var->current_cond); + mysql_mutex_unlock(tmp->mysys_var->current_mutex); + } + mysql_mutex_unlock(&tmp->mysys_var->mutex); + } + } +#endif DBUG_PRINT("quit",("Unlocking LOCK_thread_count")); mysql_mutex_unlock(&LOCK_thread_count); } @@ -1636,8 +1723,14 @@ static void __cdecl kill_server(int sig_ptr) } } #endif +#ifdef WITH_WSREP + if (WSREP_ON) wsrep_stop_replication(NULL); +#endif close_connections(); +#ifdef WITH_WSREP + if (WSREP_ON) wsrep_deinit(); +#endif if (sig != MYSQL_KILL_SIGNAL && sig != 0) unireg_abort(1); /* purecov: inspected */ @@ -1732,6 +1825,21 @@ extern "C" void unireg_abort(int exit_code) usage(); if (exit_code) sql_print_error("Aborting\n"); +#ifdef WITH_WSREP + if (wsrep) + { + /* This is an abort situation, we cannot expect to gracefully close all + * wsrep threads here, we can only diconnect from service */ + wsrep_close_client_connections(FALSE); + shutdown_in_progress= 1; + THD* thd(0); + wsrep->disconnect(wsrep); + WSREP_INFO("Service disconnected."); + wsrep_close_threads(thd); /* this won't close all threads */ + sleep(1); /* so give some time to exit for those which can */ + WSREP_INFO("Some threads may fail to exit."); + } +#endif // WITH_WSREP clean_up(!opt_abort && (exit_code || !opt_bootstrap)); /* purecov: inspected */ DBUG_PRINT("quit",("done with cleanup in unireg_abort")); mysqld_exit(exit_code); @@ -1932,6 +2040,18 @@ static void clean_up_mutexes() mysql_cond_destroy(&COND_thread_count); mysql_cond_destroy(&COND_thread_cache); mysql_cond_destroy(&COND_flush_thread_cache); +#ifdef WITH_WSREP + (void) mysql_mutex_destroy(&LOCK_wsrep_ready); + (void) mysql_cond_destroy(&COND_wsrep_ready); + (void) mysql_mutex_destroy(&LOCK_wsrep_sst); + (void) mysql_cond_destroy(&COND_wsrep_sst); + (void) mysql_mutex_destroy(&LOCK_wsrep_sst_init); + (void) mysql_cond_destroy(&COND_wsrep_sst_init); + (void) mysql_mutex_destroy(&LOCK_wsrep_rollback); + (void) mysql_cond_destroy(&COND_wsrep_rollback); + (void) mysql_mutex_destroy(&LOCK_wsrep_replaying); + (void) mysql_cond_destroy(&COND_wsrep_replaying); +#endif mysql_mutex_destroy(&LOCK_server_started); mysql_cond_destroy(&COND_server_started); mysql_mutex_destroy(&LOCK_prepare_ordered); @@ -2350,7 +2470,11 @@ static void network_init(void) @note For the connection that is doing shutdown, this is called twice */ +#ifdef WITH_WSREP +void close_connection(THD *thd, uint sql_errno, bool lock) +#else void close_connection(THD *thd, uint sql_errno) +#endif { DBUG_ENTER("close_connection"); @@ -3477,7 +3601,11 @@ static int init_common_variables() compile_time_assert(sizeof(com_status_vars)/sizeof(com_status_vars[0]) - 1 == SQLCOM_END + 8); #endif - +#ifdef WITH_WSREP + /* This is a protection against mutually incompatible option values. */ + if (WSREP_ON && wsrep_check_opts (remaining_argc, remaining_argv)) + return 1; +#endif /* WITH_WSREP */ if (get_options(&remaining_argc, &remaining_argv)) return 1; set_server_version(); @@ -3864,6 +3992,23 @@ static int init_thread_environment() sql_print_error("Can't create thread-keys"); return 1; } +#ifdef WITH_WSREP + mysql_mutex_init(key_LOCK_wsrep_ready, + &LOCK_wsrep_ready, MY_MUTEX_INIT_FAST); + mysql_cond_init(key_COND_wsrep_ready, &COND_wsrep_ready, NULL); + mysql_mutex_init(key_LOCK_wsrep_sst, + &LOCK_wsrep_sst, MY_MUTEX_INIT_FAST); + mysql_cond_init(key_COND_wsrep_sst, &COND_wsrep_sst, NULL); + mysql_mutex_init(key_LOCK_wsrep_sst_init, + &LOCK_wsrep_sst_init, MY_MUTEX_INIT_FAST); + mysql_cond_init(key_COND_wsrep_sst_init, &COND_wsrep_sst_init, NULL); + mysql_mutex_init(key_LOCK_wsrep_rollback, + &LOCK_wsrep_rollback, MY_MUTEX_INIT_FAST); + mysql_cond_init(key_COND_wsrep_rollback, &COND_wsrep_rollback, NULL); + mysql_mutex_init(key_LOCK_wsrep_replaying, + &LOCK_wsrep_replaying, MY_MUTEX_INIT_FAST); + mysql_cond_init(key_COND_wsrep_replaying, &COND_wsrep_replaying, NULL); +#endif return 0; } @@ -4106,7 +4251,12 @@ static int init_server_components() sql_print_warning("You need to use --log-bin to make " "--log-slave-updates work."); } + +#ifdef WITH_WSREP + if (!WSREP_ON && !opt_bin_log && binlog_format_used) +#else if (!opt_bin_log && binlog_format_used) +#endif sql_print_warning("You need to use --log-bin to make " "--binlog-format work."); @@ -4131,6 +4281,31 @@ will be ignored as the --log-bin option is not defined."); } #endif +/* WSREP BEFORE */ +#ifdef WITH_WSREP + // add basedir/bin to PATH to resolve wsrep script names + char* const tmp_path((char*)alloca(strlen(mysql_home) + strlen("/bin") + 1)); + if (tmp_path) + { + strcpy(tmp_path, mysql_home); + strcat(tmp_path, "/bin"); + wsrep_prepend_PATH(tmp_path); + } + else + { + WSREP_ERROR("Could not append %s/bin to PATH", mysql_home); + } + + if (opt_bootstrap) + { + wsrep_provider_init(WSREP_NONE); + if (wsrep_init()) unireg_abort(1); + } + else if (!wsrep_recovery && wsrep_init_first()) + { + wsrep_init_startup(true); + } +#endif /* WITH_WSREP */ if (opt_bin_log) { /* Reports an error and aborts, if the --log-bin's path @@ -4336,11 +4511,30 @@ a file name for --log-bin-index option", opt_binlog_index_name); internal_tmp_table_max_key_segments= myisam_max_key_segments(); #endif +#ifdef WITH_WSREP + if (!opt_bin_log) + { + wsrep_emulate_bin_log= 1; + } +#endif + tc_log= (total_ha_2pc > 1 ? (opt_bin_log ? (TC_LOG *) &mysql_bin_log : +#ifdef WITH_WSREP + (WSREP_ON ? + (TC_LOG *) &tc_log_dummy : + (TC_LOG *) &tc_log_mmap)) : +#else (TC_LOG *) &tc_log_mmap) : - (TC_LOG *) &tc_log_dummy); - +#endif + (TC_LOG *) &tc_log_dummy); +#ifdef WITH_WSREP + WSREP_DEBUG("Initial TC log open: %s", + (tc_log == &mysql_bin_log) ? "binlog" : + (tc_log == &tc_log_mmap) ? "mmap" : + (tc_log == &tc_log_dummy) ? "dummy" : "unknown" + ); +#endif if (tc_log->open(opt_bin_log ? opt_bin_logname : opt_tc_log_file)) { sql_print_error("Can't init tc log"); @@ -4397,8 +4591,6 @@ a file name for --log-bin-index option", opt_binlog_index_name); init_global_client_stats(); DBUG_RETURN(0); } - - #ifndef EMBEDDED_LIBRARY static void create_shutdown_thread() @@ -4417,6 +4609,397 @@ static void create_shutdown_thread() #endif /* EMBEDDED_LIBRARY */ +#ifdef WITH_WSREP +typedef void (*wsrep_thd_processor_fun)(THD *); + +pthread_handler_t start_wsrep_THD(void *arg) +{ + THD *thd; + wsrep_thd_processor_fun processor= (wsrep_thd_processor_fun)arg; + + DBUG_ENTER("start_wsrep_THD"); + if (my_thread_init()) + { + WSREP_ERROR("Could not initialize thread"); + DBUG_RETURN(NULL); + } + + if (!(thd= new THD(true))) + { + DBUG_RETURN(NULL); + } + mysql_mutex_lock(&LOCK_thread_count); + thd->thread_id=thread_id++; + + thd->real_id=pthread_self(); // Keep purify happy + thread_count++; + thread_created++; + threads.append(thd); + + my_net_init(&thd->net,(st_vio*) 0); + + DBUG_PRINT("wsrep",(("creating thread %lld"), (long long)thd->thread_id)); + thd->prior_thr_create_utime= thd->start_utime= microsecond_interval_timer(); + (void) mysql_mutex_unlock(&LOCK_thread_count); + + /* from bootstrap()... */ + thd->bootstrap=1; + thd->max_client_packet_length= thd->net.max_packet; + thd->security_ctx->master_access= ~(ulong)0; + + /* from handle_one_connection... */ + pthread_detach_this_thread(); + + mysql_thread_set_psi_id(thd->thread_id); + thd->thr_create_utime= microsecond_interval_timer(); + if (MYSQL_CALLBACK_ELSE(thread_scheduler, init_new_connection_thread, (), 0)) + { + close_connection(thd, ER_OUT_OF_RESOURCES, 1); + statistic_increment(aborted_connects,&LOCK_status); + MYSQL_CALLBACK(thread_scheduler, end_thread, (thd, 0)); + + DBUG_RETURN(NULL); + } + +// </5.1.17> + /* + handle_one_connection() is normally the only way a thread would + start and would always be on the very high end of the stack , + therefore, the thread stack always starts at the address of the + first local variable of handle_one_connection, which is thd. We + need to know the start of the stack so that we could check for + stack overruns. + */ + DBUG_PRINT("wsrep", ("handle_one_connection called by thread %lld\n", + (long long)thd->thread_id)); + /* now that we've called my_thread_init(), it is safe to call DBUG_* */ + + thd->thread_stack= (char*) &thd; + if (thd->store_globals()) + { + close_connection(thd, ER_OUT_OF_RESOURCES, 1); + statistic_increment(aborted_connects,&LOCK_status); + MYSQL_CALLBACK(thread_scheduler, end_thread, (thd, 0)); + delete thd; + + DBUG_RETURN(NULL); + } + + thd->system_thread= SYSTEM_THREAD_SLAVE_SQL; + thd->security_ctx->skip_grants(); + + /* handle_one_connection() again... */ + //thd->version= refresh_version; + thd->proc_info= 0; + thd->command= COM_SLEEP; + thd->set_time(); + thd->init_for_queries(); + + mysql_mutex_lock(&LOCK_connection_count); + ++connection_count; + mysql_mutex_unlock(&LOCK_connection_count); + + mysql_mutex_lock(&LOCK_thread_count); + wsrep_running_threads++; + mysql_cond_signal(&COND_thread_count); + mysql_mutex_unlock(&LOCK_thread_count); + + processor(thd); + + close_connection(thd, 0, 1); + + mysql_mutex_lock(&LOCK_thread_count); + wsrep_running_threads--; + mysql_cond_signal(&COND_thread_count); + mysql_mutex_unlock(&LOCK_thread_count); + + // Note: We can't call THD destructor without crashing + // if plugins have not been initialized. However, in most of the + // cases this means that pre SE initialization SST failed and + // we are going to exit anyway. + if (plugins_are_initialized) + { + net_end(&thd->net); + MYSQL_CALLBACK(thread_scheduler, end_thread, (thd, 1)); + } + else + { + // TODO: lightweight cleanup to get rid of: + // 'Error in my_thread_global_end(): 2 threads didn't exit' + // at server shutdown + } + DBUG_RETURN(NULL); +} + +void wsrep_create_rollbacker() +{ + if (wsrep_provider && strcasecmp(wsrep_provider, "none")) + { + pthread_t hThread; + /* create rollbacker */ + if (pthread_create( &hThread, &connection_attrib, + start_wsrep_THD, (void*)wsrep_rollback_process)) + WSREP_WARN("Can't create thread to manage wsrep rollback"); + } +} + +void wsrep_create_appliers(long threads) +{ + if (!wsrep_connected) + { + /* see wsrep_replication_start() for the logic */ + if (wsrep_cluster_address && strlen(wsrep_cluster_address) && + wsrep_provider && strcasecmp(wsrep_provider, "none")) + { + WSREP_ERROR("Trying to launch slave threads before creating " + "connection at '%s'", wsrep_cluster_address); + assert(0); + } + return; + } + + long wsrep_threads=0; + pthread_t hThread; + while (wsrep_threads++ < threads) { + if (pthread_create( + &hThread, &connection_attrib, + start_wsrep_THD, (void*)wsrep_replication_process)) + WSREP_WARN("Can't create thread to manage wsrep replication"); + } +} +/**/ +static bool abort_replicated(THD *thd) +{ + bool ret_code= false; + if (thd->wsrep_query_state== QUERY_COMMITTING) + { + if (wsrep_debug) WSREP_INFO("aborting replicated trx: %lu", thd->real_id); + + (void)wsrep_abort_thd(thd, thd, TRUE); + ret_code= true; + } + return ret_code; +} +/**/ +static inline bool is_client_connection(THD *thd) +{ +#if REMOVE +// REMOVE THIS LATER (lp:777201). Below we had to add an explicit check for +// wsrep_applier since wsrep_exec_mode didn't seem to always work +if (thd->wsrep_applier && thd->wsrep_exec_mode != REPL_RECV) +WSREP_WARN("applier has wsrep_exec_mode = %d", thd->wsrep_exec_mode); + + if ( thd->slave_thread || /* declared as mysql slave */ + thd->system_thread || /* declared as system thread */ + !thd->vio_ok() || /* server internal thread */ + thd->wsrep_exec_mode==REPL_RECV || /* applier or replaying thread */ + thd->wsrep_applier || /* wsrep slave applier */ + !thd->variables.wsrep_on) /* client, but fenced outside wsrep */ + return false; + + return true; +#else + return (thd->wsrep_client_thread && thd->variables.wsrep_on); +#endif /* REMOVE */ +} + +static bool have_client_connections() +{ + THD *tmp; + + I_List_iterator<THD> it(threads); + while ((tmp=it++)) + { + DBUG_PRINT("quit",("Informing thread %ld that it's time to die", + tmp->thread_id)); + if (is_client_connection(tmp) && tmp->killed == KILL_CONNECTION) + { + (void)abort_replicated(tmp); + return true; + } + } + return false; +} + +/* + returns the number of wsrep appliers running. + However, the caller (thd parameter) is not taken in account + */ +static int have_wsrep_appliers(THD *thd) +{ + int ret= 0; + THD *tmp; + + I_List_iterator<THD> it(threads); + while ((tmp=it++)) + { + ret+= (tmp != thd && tmp->wsrep_applier); + } + return ret; +} + +static void wsrep_close_thread(THD *thd) +{ + thd->killed= KILL_CONNECTION; + MYSQL_CALLBACK(thread_scheduler, post_kill_notification, (thd)); + if (thd->mysys_var) + { + thd->mysys_var->abort=1; + mysql_mutex_lock(&thd->mysys_var->mutex); + if (thd->mysys_var->current_cond) + { + mysql_mutex_lock(thd->mysys_var->current_mutex); + mysql_cond_broadcast(thd->mysys_var->current_cond); + mysql_mutex_unlock(thd->mysys_var->current_mutex); + } + mysql_mutex_unlock(&thd->mysys_var->mutex); + } +} + +void wsrep_close_client_connections(my_bool wait_to_end) +{ + /* + First signal all threads that it's time to die + */ + + THD *tmp; + mysql_mutex_lock(&LOCK_thread_count); // For unlink from list + + bool kill_cached_threads_saved= kill_cached_threads; + kill_cached_threads= true; // prevent future threads caching + mysql_cond_broadcast(&COND_thread_cache); // tell cached threads to die + + I_List_iterator<THD> it(threads); + while ((tmp=it++)) + { + DBUG_PRINT("quit",("Informing thread %ld that it's time to die", + tmp->thread_id)); + /* We skip slave threads & scheduler on this first loop through. */ + if (!is_client_connection(tmp)) + continue; + + /* replicated transactions must be skipped */ + if (abort_replicated(tmp)) + continue; + + WSREP_DEBUG("closing connection %ld", tmp->thread_id); + wsrep_close_thread(tmp); + } + mysql_mutex_unlock(&LOCK_thread_count); + + if (thread_count) + sleep(2); // Give threads time to die + + mysql_mutex_lock(&LOCK_thread_count); + /* + Force remaining threads to die by closing the connection to the client + */ + + I_List_iterator<THD> it2(threads); + while ((tmp=it2++)) + { +#ifndef __bsdi__ // Bug in BSDI kernel + if (is_client_connection(tmp) && !abort_replicated(tmp)) + { + WSREP_INFO("SST kill local trx: %ld",tmp->thread_id); + close_connection(tmp,0,0); + } +#endif + } + + DBUG_PRINT("quit",("Waiting for threads to die (count=%u)",thread_count)); + if (wsrep_debug) + WSREP_INFO("waiting for client connections to close: %u", thread_count); + + while (wait_to_end && have_client_connections()) + { + mysql_cond_wait(&COND_thread_count, &LOCK_thread_count); + DBUG_PRINT("quit",("One thread died (count=%u)", thread_count)); + } + + kill_cached_threads= kill_cached_threads_saved; + + mysql_mutex_unlock(&LOCK_thread_count); + + /* All client connection threads have now been aborted */ +} + +void wsrep_close_applier(THD *thd) +{ + if (wsrep_debug) + WSREP_INFO("closing applier %ld", thd->thread_id); + + wsrep_close_thread(thd); +} + +static void wsrep_close_threads(THD *thd) +{ + THD *tmp; + mysql_mutex_lock(&LOCK_thread_count); // For unlink from list + + I_List_iterator<THD> it(threads); + while ((tmp=it++)) + { + DBUG_PRINT("quit",("Informing thread %ld that it's time to die", + tmp->thread_id)); + /* We skip slave threads & scheduler on this first loop through. */ + if (tmp->wsrep_applier && tmp != thd) + { + if (wsrep_debug) + WSREP_INFO("closing wsrep thread %ld", tmp->thread_id); + wsrep_close_thread (tmp); + + } + } + + mysql_mutex_unlock(&LOCK_thread_count); +} + +void wsrep_wait_appliers_close(THD *thd) +{ + /* Wait for wsrep appliers to gracefully exit */ + mysql_mutex_lock(&LOCK_thread_count); + while (have_wsrep_appliers(thd) > 1) + // 1 is for rollbacker thread which needs to be killed explicitly. + // This gotta be fixed in a more elegant manner if we gonna have arbitrary + // number of non-applier wsrep threads. + { + mysql_cond_wait(&COND_thread_count,&LOCK_thread_count); + DBUG_PRINT("quit",("One applier died (count=%u)",thread_count)); + } + mysql_mutex_unlock(&LOCK_thread_count); + /* Now kill remaining wsrep threads: rollbacker */ + wsrep_close_threads (thd); + /* and wait for them to die */ + mysql_mutex_lock(&LOCK_thread_count); + while (have_wsrep_appliers(thd) > 0) + { + mysql_cond_wait(&COND_thread_count,&LOCK_thread_count); + DBUG_PRINT("quit",("One thread died (count=%u)",thread_count)); + } + mysql_mutex_unlock(&LOCK_thread_count); + + /* All wsrep applier threads have now been aborted. However, if this thread + is also applier, we are still running... + */ +} + +void wsrep_kill_mysql(THD *thd) +{ + if (mysqld_server_started) + { + if (!shutdown_in_progress) + { + WSREP_INFO("starting shutdown"); + kill_mysql(); + } + } + else + { + unireg_abort(1); + } +} +#endif /* WITH_WSREP */ #if (defined(_WIN32) || defined(HAVE_SMEM)) && !defined(EMBEDDED_LIBRARY) static void handle_connections_methods() @@ -4873,6 +5456,33 @@ int mysqld_main(int argc, char **argv) if (Events::init(opt_noacl || opt_bootstrap)) unireg_abort(1); +/* WSREP AFTER */ +#ifdef WITH_WSREP + wsrep_SE_initialized(); + if (opt_bootstrap) + { + /*! bootstrap wsrep init was taken care of above */ + } + else if (wsrep_recovery) + { + select_thread_in_use= 0; + wsrep_recover(); + unireg_abort(0); + } + else if (wsrep_init_first()) + { + /*! in case of no SST wsrep waits in view handler callback */ + wsrep_SE_init_grab(); + wsrep_SE_init_done(); + /*! in case of SST wsrep waits for wsrep->sst_received */ + wsrep_sst_continue(); + } + else + { + wsrep_init_startup (false); + } + wsrep_create_appliers(wsrep_slave_threads - 1); +#endif /* WITH_WSREP */ if (opt_bootstrap) { select_thread_in_use= 0; // Allow 'kill' to work @@ -4930,6 +5540,9 @@ int mysqld_main(int argc, char **argv) #ifdef EXTRA_DEBUG2 sql_print_error("Before Lock_thread_count"); #endif +#ifdef WITH_WSREP + WSREP_DEBUG("Before Lock_thread_count"); +#endif mysql_mutex_lock(&LOCK_thread_count); DBUG_PRINT("quit", ("Got thread_count mutex")); select_thread_in_use=0; // For close_connections @@ -5196,6 +5809,9 @@ static void bootstrap(MYSQL_FILE *file) DBUG_ENTER("bootstrap"); THD *thd= new THD; +#ifdef WITH_WSREP + thd->variables.wsrep_on= 0; +#endif thd->bootstrap=1; my_net_init(&thd->net,(st_vio*) 0); thd->max_client_packet_length= thd->net.max_packet; @@ -5320,7 +5936,11 @@ void create_thread_to_handle_connection(THD *thd) my_snprintf(error_message_buff, sizeof(error_message_buff), ER_THD(thd, ER_CANT_CREATE_THREAD), error); net_send_error(thd, ER_CANT_CREATE_THREAD, error_message_buff, NULL); +#ifdef WITH_WSREP + close_connection(thd, ER_OUT_OF_RESOURCES ,0); +#else close_connection(thd, ER_OUT_OF_RESOURCES); +#endif /* WITH_WSREP */ mysql_mutex_lock(&LOCK_thread_count); delete thd; mysql_mutex_unlock(&LOCK_thread_count); @@ -5363,7 +5983,11 @@ static void create_new_thread(THD *thd) mysql_mutex_unlock(&LOCK_connection_count); DBUG_PRINT("error",("Too many connections")); + #ifdef WITH_WSREP + close_connection(thd, ER_CON_COUNT_ERROR, 1); +#else close_connection(thd, ER_CON_COUNT_ERROR); +#endif /* WITH_WSREP */ statistic_increment(denied_connections, &LOCK_status); delete thd; DBUG_VOID_RETURN; @@ -5744,7 +6368,11 @@ pthread_handler_t handle_connections_namedpipes(void *arg) if (!(thd->net.vio= vio_new_win32pipe(hConnectedPipe)) || my_net_init(&thd->net, thd->net.vio)) { +#ifdef WITH_WSREP + close_connection(thd, ER_OUT_OF_RESOURCES, 1); +#else close_connection(thd, ER_OUT_OF_RESOURCES); +#endif delete thd; continue; } @@ -5939,7 +6567,11 @@ pthread_handler_t handle_connections_shared_memory(void *arg) event_conn_closed)) || my_net_init(&thd->net, thd->net.vio)) { +#ifdef WITH_WSREP + close_connection(thd, ER_OUT_OF_RESOURCES, 1); +#else close_connection(thd, ER_OUT_OF_RESOURCES); +#endif errmsg= 0; goto errorconn; } @@ -7022,6 +7654,19 @@ SHOW_VAR status_vars[]= { #ifdef ENABLED_PROFILING {"Uptime_since_flush_status",(char*) &show_flushstatustime, SHOW_FUNC}, #endif +#ifdef WITH_WSREP + {"wsrep_connected", (char*) &wsrep_connected, SHOW_BOOL}, + {"wsrep_ready", (char*) &wsrep_ready, SHOW_BOOL}, + {"wsrep_cluster_state_uuid", (char*) &wsrep_cluster_state_uuid,SHOW_CHAR_PTR}, + {"wsrep_cluster_conf_id", (char*) &wsrep_cluster_conf_id, SHOW_LONGLONG}, + {"wsrep_cluster_status", (char*) &wsrep_cluster_status, SHOW_CHAR_PTR}, + {"wsrep_cluster_size", (char*) &wsrep_cluster_size, SHOW_LONG}, + {"wsrep_local_index", (char*) &wsrep_local_index, SHOW_LONG}, + {"wsrep_provider_name", (char*) &wsrep_provider_name, SHOW_CHAR_PTR}, + {"wsrep_provider_version", (char*) &wsrep_provider_version, SHOW_CHAR_PTR}, + {"wsrep_provider_vendor", (char*) &wsrep_provider_vendor, SHOW_CHAR_PTR}, + {"wsrep", (char*) &wsrep_show_status, SHOW_FUNC}, +#endif {NullS, NullS, SHOW_LONG} }; @@ -7345,6 +7990,10 @@ static int mysql_init_variables(void) tmpenv = DEFAULT_MYSQL_HOME; (void) strmake(mysql_home, tmpenv, sizeof(mysql_home)-1); #endif +#ifdef WITH_WSREP + if (WSREP_ON && wsrep_init_vars()) + return 1; +#endif return 0; } @@ -7620,6 +8269,23 @@ mysqld_get_one_option(int optid, case OPT_LOWER_CASE_TABLE_NAMES: lower_case_table_names_used= 1; break; +#ifdef WITH_WSREP + case OPT_WSREP_PROVIDER: + wsrep_provider_init (argument); + break; + case OPT_WSREP_PROVIDER_OPTIONS: + wsrep_provider_options_init (argument); + break; + case OPT_WSREP_CLUSTER_ADDRESS: + wsrep_cluster_address_init (argument); + break; + case OPT_WSREP_START_POSITION: + wsrep_start_position_init (argument); + break; + case OPT_WSREP_SST_AUTH: + wsrep_sst_auth_init (argument); + break; +#endif #if defined(ENABLED_DEBUG_SYNC) case OPT_DEBUG_SYNC_TIMEOUT: /* diff --git a/sql/mysqld.h b/sql/mysqld.h index 988584f779a..99093e1e83b 100644 --- a/sql/mysqld.h +++ b/sql/mysqld.h @@ -53,7 +53,11 @@ typedef Bitmap<((MAX_INDEXES+7)/8*8)> key_map; /* Used for finding keys */ some places */ /* Function prototypes */ void kill_mysql(void); +#ifdef WITH_WSREP +void close_connection(THD *thd, uint sql_errno= 0, bool lock=1); +#else void close_connection(THD *thd, uint sql_errno= 0); +#endif void handle_connection_in_main_thread(THD *thd); void create_thread_to_handle_connection(THD *thd); void unlink_thd(THD *thd); @@ -219,6 +223,10 @@ extern pthread_key(MEM_ROOT**,THR_MALLOC); extern PSI_mutex_key key_PAGE_lock, key_LOCK_sync, key_LOCK_active, key_LOCK_pool; #endif /* HAVE_MMAP */ +#ifdef WITH_WSREP +extern PSI_mutex_key key_LOCK_wsrep_thd; +extern PSI_cond_key key_COND_wsrep_thd; +#endif /* HAVE_MMAP */ #ifdef HAVE_OPENSSL extern PSI_mutex_key key_LOCK_des_key_file; @@ -406,6 +414,14 @@ enum options_mysqld OPT_WANT_CORE, OPT_ENGINE_CONDITION_PUSHDOWN, OPT_LOG_ERROR, +#ifdef WITH_WSREP + OPT_WSREP_PROVIDER, + OPT_WSREP_PROVIDER_OPTIONS, + OPT_WSREP_CLUSTER_ADDRESS, + OPT_WSREP_START_POSITION, + OPT_WSREP_SST_AUTH, + OPT_WSREP_RECOVER, +#endif OPT_MAX_LONG_DATA_SIZE }; #endif @@ -554,4 +570,5 @@ extern uint internal_tmp_table_max_key_segments; extern uint volatile global_disable_checkpoint; extern my_bool opt_help; + #endif /* MYSQLD_INCLUDED */ diff --git a/sql/protocol.cc b/sql/protocol.cc index 63b945f7078..4a7ea88c402 100644 --- a/sql/protocol.cc +++ b/sql/protocol.cc @@ -485,6 +485,14 @@ static uchar *net_store_length_fast(uchar *packet, uint length) void Protocol::end_statement() { +#ifdef WITH_WSREP + /*sanity check, can be removed before 1.0 release */ + if (WSREP(thd) && thd->wsrep_conflict_state== REPLAYING) + { + WSREP_ERROR("attempting net_end_statement while replaying"); + return; + } +#endif DBUG_ENTER("Protocol::end_statement"); DBUG_ASSERT(! thd->stmt_da->is_sent); bool error= FALSE; diff --git a/sql/set_var.h b/sql/set_var.h index c074f3f4399..85faaaadd73 100644 --- a/sql/set_var.h +++ b/sql/set_var.h @@ -235,6 +235,9 @@ public: int check(THD *thd); int update(THD *thd); int light_check(THD *thd); +#ifdef WITH_WSREP + int wsrep_store_variable(THD *thd); +#endif }; @@ -315,6 +318,9 @@ extern sys_var *Sys_autocommit_ptr; CHARSET_INFO *get_old_charset_by_name(const char *old_name); +#ifdef WITH_WSREP +int sql_set_wsrep_variables(THD *thd, List<set_var_base> *var_list); +#endif int sys_var_init(); int sys_var_add_options(DYNAMIC_ARRAY *long_options, int parse_flags); void sys_var_end(void); diff --git a/sql/slave.cc b/sql/slave.cc index 56f9c14703c..833820203b0 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -53,6 +53,9 @@ // Create_file_log_event, // Format_description_log_event +#ifdef WITH_WSREP +#include "wsrep_mysqld.h" +#endif #ifdef HAVE_REPLICATION #include "rpl_tblmap.h" @@ -3487,6 +3490,11 @@ pthread_handler_t handle_slave_sql(void *arg) #endif DBUG_ASSERT(rli->sql_thd == thd); +#ifdef WITH_WSREP + thd->wsrep_exec_mode= LOCAL_STATE; + /* synchronize with wsrep replication */ + wsrep_ready_wait (); +#endif DBUG_PRINT("master_info",("log_file_name: %s position: %s", rli->group_master_log_name, llstr(rli->group_master_log_pos,llbuff))); diff --git a/sql/sp.cc b/sql/sp.cc index 29195234a5a..700a8c54fbf 100644 --- a/sql/sp.cc +++ b/sql/sp.cc @@ -2277,3 +2277,37 @@ sp_load_for_information_schema(THD *thd, TABLE *proc_table, String *db, thd->lex= old_lex; return sp; } +#ifdef WITH_WSREP +int wsrep_create_sp(THD *thd, uchar** buf, uint* buf_len) +{ + String log_query; + sp_head *sp = thd->lex->sphead; + ulong saved_mode= thd->variables.sql_mode; + String retstr(64); + retstr.set_charset(system_charset_info); + + log_query.set_charset(system_charset_info); + + if (sp->m_type == TYPE_ENUM_FUNCTION) + { + sp_returns_type(thd, retstr, sp); + } + + if (!create_string(thd, &log_query, + sp->m_type, + (sp->m_explicit_name ? sp->m_db.str : NULL), + (sp->m_explicit_name ? sp->m_db.length : 0), + sp->m_name.str, sp->m_name.length, + sp->m_params.str, sp->m_params.length, + retstr.c_ptr(), retstr.length(), + sp->m_body.str, sp->m_body.length, + sp->m_chistics, &(thd->lex->definer->user), + &(thd->lex->definer->host), + saved_mode)) + { + WSREP_WARN("SP create string failed: %s", thd->query()); + return 1; + } + return wsrep_to_buf_helper(thd, log_query.ptr(), log_query.length(), buf, buf_len); +} +#endif /* WITH_WSREP */ diff --git a/sql/sql_alter.cc b/sql/sql_alter.cc index c6c02773286..c4468ee8793 100644 --- a/sql/sql_alter.cc +++ b/sql/sql_alter.cc @@ -17,7 +17,9 @@ #include "sql_table.h" // mysql_alter_table, // mysql_exchange_partition #include "sql_alter.h" - +#ifdef WITH_WSREP +#include "wsrep_mysqld.h" +#endif /* WITH_WSREP */ bool Alter_table_statement::execute(THD *thd) { LEX *lex= thd->lex; @@ -97,6 +99,17 @@ bool Alter_table_statement::execute(THD *thd) thd->enable_slow_log= opt_log_slow_admin_statements; +#ifdef WITH_WSREP +TABLE *find_temporary_table(THD *thd, const TABLE_LIST *tl); + + if ((!thd->is_current_stmt_binlog_format_row() || + !find_temporary_table(thd, first_table)) && + wsrep_to_isolation_begin(thd, first_table->db, first_table->table_name)) + { + WSREP_WARN("ALTER TABLE isolation failure"); + DBUG_RETURN(TRUE); + } +#endif /* WITH_WSREP */ result= mysql_alter_table(thd, select_lex->db, lex->name.str, &create_info, first_table, @@ -105,5 +118,7 @@ bool Alter_table_statement::execute(THD *thd) select_lex->order_list.first, lex->ignore, lex->online); +#ifdef WITH_WSREP +#endif /* WITH_WSREP */ DBUG_RETURN(result); } diff --git a/sql/sql_base.cc b/sql/sql_base.cc index 9298d3ccb72..d0404693405 100644 --- a/sql/sql_base.cc +++ b/sql/sql_base.cc @@ -60,6 +60,9 @@ #include <io.h> #endif +#ifdef WITH_WSREP +#include "wsrep_mysqld.h" +#endif // WITH_WSREP bool No_such_table_error_handler::handle_condition(THD *, @@ -4218,7 +4221,7 @@ thr_lock_type read_lock_type_for_table(THD *thd, */ bool log_on= mysql_bin_log.is_open() && thd->variables.sql_log_bin; ulong binlog_format= thd->variables.binlog_format; - if ((log_on == FALSE) || (binlog_format == BINLOG_FORMAT_ROW) || + if ((log_on == FALSE) || (WSREP_FORMAT(binlog_format) == BINLOG_FORMAT_ROW) || (table_list->table->s->table_category == TABLE_CATEGORY_LOG) || (table_list->table->s->table_category == TABLE_CATEGORY_PERFORMANCE) || !(is_update_query(prelocking_ctx->sql_command) || @@ -5075,7 +5078,14 @@ restart: } err: +#ifdef WITH_WSREP + if (WSREP(thd)) + thd_proc_info(thd, "exit open_tables()"); + else + thd_proc_info(thd, 0); +#else /* WITH_WSREP */ thd_proc_info(thd, 0); +#endif /* WITH_WSREP */ free_root(&new_frm_mem, MYF(0)); // Free pre-alloced block if (error && *table_to_open) @@ -5518,7 +5528,14 @@ end: trans_rollback_stmt(thd); close_thread_tables(thd); } +#ifdef WITH_WSREP + if (WSREP(thd)) + thd_proc_info(thd, "End opening table"); + else + thd_proc_info(thd, 0); +#else /* WITH_WSREP */ thd_proc_info(thd, 0); +#endif /* WITH_WSREP */ DBUG_RETURN(table); } @@ -5738,7 +5755,7 @@ bool lock_tables(THD *thd, TABLE_LIST *tables, uint count, We can solve these problems in mixed mode by switching to binlogging if at least one updated table is used by sub-statement */ - if (thd->variables.binlog_format != BINLOG_FORMAT_ROW && tables && + if (WSREP_FORMAT(thd->variables.binlog_format) != BINLOG_FORMAT_ROW && tables && has_write_table_with_auto_increment(thd->lex->first_not_own_table())) thd->lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_AUTOINC_COLUMNS); } @@ -9169,7 +9186,19 @@ bool mysql_notify_thread_having_shared_lock(THD *thd, THD *in_use, (e.g. see partitioning code). */ if (!thd_table->needs_reopen()) +#ifdef WITH_WSREP + { + signalled|= mysql_lock_abort_for_thread(thd, thd_table); + if (thd && WSREP(thd) && wsrep_thd_is_brute_force((void *)thd)) + { + WSREP_DEBUG("remove_table_from_cache: %llu", + (unsigned long long) thd->real_id); + wsrep_abort_thd((void *)thd, (void *)in_use, FALSE); + } + } +#else signalled|= mysql_lock_abort_for_thread(thd, thd_table); +#endif } mysql_mutex_unlock(&in_use->LOCK_thd_data); } diff --git a/sql/sql_builtin.cc.in b/sql/sql_builtin.cc.in index 63850650ac9..2de475b0a76 100644 --- a/sql/sql_builtin.cc.in +++ b/sql/sql_builtin.cc.in @@ -25,7 +25,11 @@ extern #endif builtin_maria_plugin @mysql_mandatory_plugins@ @mysql_optional_plugins@ - builtin_maria_binlog_plugin, builtin_maria_mysql_password_plugin; + builtin_maria_binlog_plugin, +#ifdef WITH_WSREP + builtin_wsrep_plugin@mysql_plugin_defs@, +#endif /* WITH_WSREP */ + builtin_maria_mysql_password_plugin; struct st_maria_plugin *mysql_optional_plugins[]= { @@ -35,5 +39,8 @@ struct st_maria_plugin *mysql_optional_plugins[]= struct st_maria_plugin *mysql_mandatory_plugins[]= { builtin_maria_binlog_plugin, builtin_maria_mysql_password_plugin, +#ifdef WITH_WSREP + builtin_wsrep_plugin@mysql_plugin_defs@, +#endif /* WITH_WSREP */ @mysql_mandatory_plugins@ 0 }; diff --git a/sql/sql_class.cc b/sql/sql_class.cc index d7d0c8d3f68..5b63201a910 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -62,6 +62,9 @@ #include "debug_sync.h" #include "sql_parse.h" // is_update_query #include "sql_callback.h" +#ifdef WITH_WSREP +#include "wsrep_mysqld.h" +#endif #include "sql_connect.h" /* @@ -695,6 +698,137 @@ char *thd_security_context(THD *thd, char *buffer, unsigned int length, return buffer; } +#ifdef WITH_WSREP +extern "C" int wsrep_on(void *thd) +{ + return (int)(WSREP(((THD*)thd))); +} +extern "C" bool wsrep_thd_is_wsrep_on(THD *thd) +{ + return thd->variables.wsrep_on; +} + +extern "C" bool wsrep_consistency_check(void *thd) +{ + return ((THD*)thd)->wsrep_consistency_check; +} + +extern "C" void wsrep_thd_set_exec_mode(THD *thd, enum wsrep_exec_mode mode) +{ + thd->wsrep_exec_mode= mode; +} +extern "C" void wsrep_thd_set_query_state( + THD *thd, enum wsrep_query_state state) +{ + thd->wsrep_query_state= state; +} +extern "C" void wsrep_thd_set_conflict_state( + THD *thd, enum wsrep_conflict_state state) +{ + thd->wsrep_conflict_state= state; +} + + +extern "C" enum wsrep_exec_mode wsrep_thd_exec_mode(THD *thd) +{ + return thd->wsrep_exec_mode; +} +extern "C" enum wsrep_query_state wsrep_thd_query_state(THD *thd) +{ + return thd->wsrep_query_state; +} +extern "C" enum wsrep_conflict_state wsrep_thd_conflict_state(THD *thd) +{ + return thd->wsrep_conflict_state; +} + +extern "C" wsrep_trx_handle_t* wsrep_thd_trx_handle(THD *thd) +{ + return &thd->wsrep_trx_handle; +} + +extern "C"void wsrep_thd_LOCK(THD *thd) +{ + mysql_mutex_lock(&thd->LOCK_wsrep_thd); +} +extern "C"void wsrep_thd_UNLOCK(THD *thd) +{ + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); +} +extern "C" time_t wsrep_thd_query_start(THD *thd) +{ + return thd->query_start(); +} +extern "C" uint32 wsrep_thd_wsrep_rand(THD *thd) +{ + return thd->wsrep_rand; +} +extern "C" my_thread_id wsrep_thd_thread_id(THD *thd) +{ + return thd->thread_id; +} +extern "C" wsrep_seqno_t wsrep_thd_trx_seqno(THD *thd) +{ + return thd->wsrep_trx_seqno; +} +extern "C" query_id_t wsrep_thd_query_id(THD *thd) +{ + return thd->query_id; +} +extern "C" char *wsrep_thd_query(THD *thd) +{ + return thd->query(); +} +extern "C" query_id_t wsrep_thd_wsrep_last_query_id(THD *thd) +{ + return thd->wsrep_last_query_id; +} +extern "C" void wsrep_thd_set_wsrep_last_query_id(THD *thd, query_id_t id) +{ + thd->wsrep_last_query_id= id; +} +extern "C" void wsrep_thd_awake(THD *thd, my_bool signal) +{ + if (signal) + { + mysql_mutex_lock(&thd->LOCK_thd_data); + thd->awake(KILL_QUERY); + mysql_mutex_unlock(&thd->LOCK_thd_data); + } + else + { + mysql_mutex_lock(&LOCK_wsrep_replaying); + mysql_cond_broadcast(&COND_wsrep_replaying); + mysql_mutex_unlock(&LOCK_wsrep_replaying); + } +} + +extern "C" int +wsrep_trx_order_before(void *thd1, void *thd2) +{ + if (((THD*)thd1)->wsrep_trx_seqno < ((THD*)thd2)->wsrep_trx_seqno) { + WSREP_DEBUG("BF conflict, order: %lld %lld\n", + (long long)((THD*)thd1)->wsrep_trx_seqno, + (long long)((THD*)thd2)->wsrep_trx_seqno); + return 1; + } + WSREP_DEBUG("waiting for BF, trx order: %lld %lld\n", + (long long)((THD*)thd1)->wsrep_trx_seqno, + (long long)((THD*)thd2)->wsrep_trx_seqno); + return 0; +} +extern "C" int +wsrep_trx_is_aborting(void *thd_ptr) +{ + if (thd_ptr) { + if ((((THD *)thd_ptr)->wsrep_conflict_state == MUST_ABORT) || + (((THD *)thd_ptr)->wsrep_conflict_state == ABORTING)) { + return 1; + } + } + return 0; +} +#endif /** Implementation of Drop_table_error_handler::handle_condition(). @@ -723,7 +857,11 @@ bool Drop_table_error_handler::handle_condition(THD *thd, } +#ifdef WITH_WSREP +THD::THD(bool is_applier) +#else THD::THD() +#endif :Statement(&main_lex, &main_mem_root, STMT_CONVENTIONAL_EXECUTION, /* statement id */ 0), rli_fake(0), @@ -750,6 +888,10 @@ THD::THD() bootstrap(0), derived_tables_processing(FALSE), spcont(NULL), +#ifdef WITH_WSREP + wsrep_applier(is_applier), + wsrep_client_thread(0), +#endif m_parser_state(NULL), #if defined(ENABLED_DEBUG_SYNC) debug_sync_control(0), @@ -849,6 +991,20 @@ THD::THD() command=COM_CONNECT; *scramble= '\0'; +#ifdef WITH_WSREP + mysql_mutex_init(key_LOCK_wsrep_thd, &LOCK_wsrep_thd, MY_MUTEX_INIT_FAST); + mysql_cond_init(key_COND_wsrep_thd, &COND_wsrep_thd, NULL); + wsrep_trx_handle.trx_id= -1; + wsrep_trx_handle.opaque= NULL; + //wsrep_retry_autocommit= ::wsrep_retry_autocommit; + wsrep_retry_counter= 0; + wsrep_PA_safe = true; + wsrep_seqno_changed= false; + wsrep_retry_query = NULL; + wsrep_retry_query_len = 0; + wsrep_retry_command = COM_CONNECT; + wsrep_consistency_check = false; +#endif /* Call to init() below requires fully initialized Open_tables_state. */ reset_open_tables_state(this); @@ -881,6 +1037,13 @@ THD::THD() my_rnd_init(&rand, tmp + (ulong) &rand, tmp + (ulong) ::global_query_id); substitute_null_with_insert_id = FALSE; thr_lock_info_init(&lock_info); /* safety: will be reset after start */ +#ifdef WITH_WSREP + lock_info.mysql_thd= (void *)this; + lock_info.in_lock_tables= false; +#ifdef WSREP_PROC_INFO + wsrep_info[sizeof(wsrep_info) - 1] = '\0'; /* make sure it is 0-terminated */ +#endif /* WSREP_PROC_INFO */ +#endif /* WITH_WSREP */ m_internal_handler= NULL; m_binlog_invoker= FALSE; @@ -1182,7 +1345,19 @@ void THD::init(void) reset_current_stmt_binlog_format_row(); bzero((char *) &status_var, sizeof(status_var)); bzero((char *) &org_status_var, sizeof(org_status_var)); - +#ifdef WITH_WSREP + wsrep_exec_mode= wsrep_applier ? REPL_RECV : LOCAL_STATE; + wsrep_conflict_state= NO_CONFLICT; + wsrep_query_state= QUERY_IDLE; + wsrep_last_query_id= 0; + wsrep_trx_seqno= 0; + wsrep_converted_lock_session= false; + wsrep_retry_counter= 0; + wsrep_rli= NULL; + wsrep_PA_safe= true; + wsrep_seqno_changed= false; + wsrep_consistency_check = false; +#endif if (variables.sql_log_bin) variables.option_bits|= OPTION_BIN_LOG; else @@ -1376,6 +1551,12 @@ THD::~THD() mysql_mutex_unlock(&LOCK_thd_data); add_to_status(&global_status_var, &status_var); +#ifdef WITH_WSREP + mysql_mutex_lock(&LOCK_wsrep_thd); + mysql_mutex_unlock(&LOCK_wsrep_thd); + mysql_mutex_destroy(&LOCK_wsrep_thd); + if (wsrep_rli) delete wsrep_rli; +#endif /* Close connection */ #ifndef EMBEDDED_LIBRARY if (net.vio) @@ -1790,6 +1971,13 @@ void THD::cleanup_after_query() /* reset table map for multi-table update */ table_map_for_update= 0; m_binlog_invoker= FALSE; +#ifdef WITH_WSREP + if (TOTAL_ORDER == wsrep_exec_mode) + { + wsrep_exec_mode = LOCAL_STATE; + } + //wsrep_trx_seqno = 0; +#endif /* WITH_WSREP */ DBUG_VOID_RETURN; } @@ -2205,6 +2393,13 @@ bool sql_exchange::escaped_given(void) bool select_send::send_result_set_metadata(List<Item> &list, uint flags) { bool res; +#ifdef WITH_WSREP + if (WSREP(thd) && thd->wsrep_retry_query) + { + WSREP_DEBUG("skipping select metadata"); + return FALSE; + } +#endif /* WITH_WSREP */ if (!(res= thd->protocol->send_result_set_metadata(&list, flags))) is_result_set_started= 1; return res; @@ -3911,8 +4106,13 @@ extern "C" int thd_non_transactional_update(const MYSQL_THD thd) extern "C" int thd_binlog_format(const MYSQL_THD thd) { +#ifdef WITH_WSREP + if (((WSREP(thd) && wsrep_emulate_bin_log) || mysql_bin_log.is_open()) && + (thd->variables.option_bits & OPTION_BIN_LOG)) +#else if (mysql_bin_log.is_open() && (thd->variables.option_bits & OPTION_BIN_LOG)) - return (int) thd->variables.binlog_format; +#endif + return (int) WSREP_FORMAT(thd->variables.binlog_format); else return BINLOG_FORMAT_UNSPEC; } @@ -4467,7 +4667,7 @@ int THD::decide_logging_format(TABLE_LIST *tables) binlog by filtering rules. */ if (mysql_bin_log.is_open() && (variables.option_bits & OPTION_BIN_LOG) && - !(variables.binlog_format == BINLOG_FORMAT_STMT && + !(WSREP_FORMAT(variables.binlog_format) == BINLOG_FORMAT_STMT && !binlog_filter->db_ok(db))) { /* @@ -4631,7 +4831,7 @@ int THD::decide_logging_format(TABLE_LIST *tables) */ my_error((error= ER_BINLOG_ROW_INJECTION_AND_STMT_ENGINE), MYF(0)); } - else if (variables.binlog_format == BINLOG_FORMAT_ROW && + else if (WSREP_FORMAT(variables.binlog_format) == BINLOG_FORMAT_ROW && sqlcom_can_generate_row_events(this)) { /* @@ -4660,7 +4860,7 @@ int THD::decide_logging_format(TABLE_LIST *tables) else { /* binlog_format = STATEMENT */ - if (variables.binlog_format == BINLOG_FORMAT_STMT) + if (WSREP_FORMAT(variables.binlog_format) == BINLOG_FORMAT_STMT) { if (lex->is_stmt_row_injection()) { @@ -4677,7 +4877,14 @@ int THD::decide_logging_format(TABLE_LIST *tables) 5. Error: Cannot modify table that uses a storage engine limited to row-logging when binlog_format = STATEMENT */ +#ifdef WITH_WSREP + if (!WSREP(this) || wsrep_exec_mode == LOCAL_STATE) + { +#endif /* WITH_WSREP */ my_error((error= ER_BINLOG_STMT_MODE_AND_ROW_ENGINE), MYF(0), ""); +#ifdef WITH_WSREP + } +#endif /* WITH_WSREP */ } else if (is_write && (unsafe_flags= lex->get_stmt_unsafe_flags()) != 0) { @@ -4725,7 +4932,7 @@ int THD::decide_logging_format(TABLE_LIST *tables) "and binlog_filter->db_ok(db) = %d", mysql_bin_log.is_open(), (variables.option_bits & OPTION_BIN_LOG), - variables.binlog_format, + WSREP_FORMAT(variables.binlog_format), binlog_filter->db_ok(db))); #endif @@ -4979,7 +5186,13 @@ int THD::binlog_write_row(TABLE* table, bool is_trans, MY_BITMAP const* cols, size_t colcnt, uchar const *record) { +#ifdef WITH_WSREP + DBUG_ASSERT(is_current_stmt_binlog_format_row() && + ((WSREP(this) && wsrep_emulate_bin_log) || + mysql_bin_log.is_open())); +#else DBUG_ASSERT(is_current_stmt_binlog_format_row() && mysql_bin_log.is_open()); +#endif /* Pack records into format for transfer. We are allocating more @@ -5009,7 +5222,13 @@ int THD::binlog_update_row(TABLE* table, bool is_trans, const uchar *before_record, const uchar *after_record) { +#ifdef WITH_WSREP + DBUG_ASSERT(is_current_stmt_binlog_format_row() && + ((WSREP(this) && wsrep_emulate_bin_log) + || mysql_bin_log.is_open())); +#else DBUG_ASSERT(is_current_stmt_binlog_format_row() && mysql_bin_log.is_open()); +#endif size_t const before_maxlen = max_row_length(table, before_record); size_t const after_maxlen = max_row_length(table, after_record); @@ -5054,7 +5273,13 @@ int THD::binlog_delete_row(TABLE* table, bool is_trans, MY_BITMAP const* cols, size_t colcnt, uchar const *record) { +#ifdef WITH_WSREP + DBUG_ASSERT(is_current_stmt_binlog_format_row() && + ((WSREP(this) && wsrep_emulate_bin_log) + || mysql_bin_log.is_open())); +#else DBUG_ASSERT(is_current_stmt_binlog_format_row() && mysql_bin_log.is_open()); +#endif /* Pack records into format for transfer. We are allocating more @@ -5085,7 +5310,11 @@ int THD::binlog_remove_pending_rows_event(bool clear_maps, { DBUG_ENTER("THD::binlog_remove_pending_rows_event"); +#ifdef WITH_WSREP + if (!(WSREP_EMULATE_BINLOG(this) || mysql_bin_log.is_open())) +#else if (!mysql_bin_log.is_open()) +#endif DBUG_RETURN(0); mysql_bin_log.remove_pending_rows_event(this, is_transactional); @@ -5104,7 +5333,11 @@ int THD::binlog_flush_pending_rows_event(bool stmt_end, bool is_transactional) mode: it might be the case that we left row-based mode before flushing anything (e.g., if we have explicitly locked tables). */ +#ifdef WITH_WSREP + if (!(WSREP_EMULATE_BINLOG(this) || mysql_bin_log.is_open())) +#else if (!mysql_bin_log.is_open()) +#endif DBUG_RETURN(0); /* @@ -5224,8 +5457,12 @@ int THD::binlog_query(THD::enum_binlog_query_type qtype, char const *query_arg, DBUG_ENTER("THD::binlog_query"); DBUG_PRINT("enter", ("qtype: %s query: '%-.*s'", show_query_type(qtype), (int) query_len, query_arg)); +#ifdef WITH_WSREP + DBUG_ASSERT(query_arg && (WSREP_EMULATE_BINLOG(this) + || mysql_bin_log.is_open())); +#else DBUG_ASSERT(query_arg && mysql_bin_log.is_open()); - +#endif /* If we are not in prelocked mode, mysql_unlock_tables() will be called after this binlog_query(), so we have to flush the pending diff --git a/sql/sql_class.h b/sql/sql_class.h index c6c46975076..96a67fccaef 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -20,6 +20,32 @@ #define SQL_CLASS_INCLUDED /* Classes in mysql */ +#ifdef WITH_WSREP +#include "../wsrep/wsrep_api.h" +//#include "wsrep_mysqld.h" + enum wsrep_exec_mode { + LOCAL_STATE, + REPL_RECV, + TOTAL_ORDER, + LOCAL_COMMIT, + }; + enum wsrep_query_state { + QUERY_IDLE, + QUERY_EXEC, + QUERY_COMMITTING, + QUERY_EXITING, + QUERY_ROLLINGBACK, + }; + enum wsrep_conflict_state { + NO_CONFLICT, + MUST_ABORT, + ABORTING, + ABORTED, + MUST_REPLAY, + REPLAYING, + RETRY_AUTOCOMMIT, + }; +#endif #ifdef USE_PRAGMA_INTERFACE #pragma interface /* gcc class implementation */ @@ -45,6 +71,15 @@ THR_LOCK_INFO */ +#ifdef WITH_WSREP +#include "wsrep_mysqld.h" +struct wsrep_thd_shadow { + ulonglong options; + enum wsrep_exec_mode wsrep_exec_mode; + Vio *vio; + ulong tx_isolation; +}; +#endif class Reprepare_observer; class Relay_log_info; @@ -572,6 +607,11 @@ typedef struct system_variables ulong wt_timeout_short, wt_deadlock_search_depth_short; ulong wt_timeout_long, wt_deadlock_search_depth_long; +#ifdef WITH_WSREP + my_bool wsrep_on; + my_bool wsrep_causal_reads; + ulong wsrep_retry_autocommit; +#endif double long_query_time_double; } SV; @@ -965,6 +1005,9 @@ struct st_savepoint { /** State of metadata locks before this savepoint was set. */ MDL_savepoint mdl_savepoint; }; +#ifdef WITH_WSREP +void wsrep_cleanup_transaction(THD *thd); // THD.transactions.cleanup calls it +#endif enum xa_states {XA_NOTR=0, XA_ACTIVE, XA_IDLE, XA_PREPARED, XA_ROLLBACK_ONLY}; extern const char *xa_state_names[]; @@ -1781,7 +1824,7 @@ public: int is_current_stmt_binlog_format_row() const { DBUG_ASSERT(current_stmt_binlog_format == BINLOG_FORMAT_STMT || current_stmt_binlog_format == BINLOG_FORMAT_ROW); - return current_stmt_binlog_format == BINLOG_FORMAT_ROW; + return (WSREP_FORMAT((ulong)current_stmt_binlog_format) == BINLOG_FORMAT_ROW); } private: @@ -1839,7 +1882,11 @@ public: */ CHANGED_TABLE_LIST* changed_tables; MEM_ROOT mem_root; // Transaction-life memory allocation pool +#ifdef WITH_WSREP + void cleanup(THD *thd) +#else void cleanup() +#endif { changed_tables= 0; savepoints= 0; @@ -1852,6 +1899,11 @@ public: if (!xid_state.rm_error) xid_state.xid.null(); free_root(&mem_root,MYF(MY_KEEP_PREALLOC)); +#ifdef WITH_WSREP + // Todo: convert into a plugin method + // wsrep's post-commit. LOCAL_COMMIT designates wsrep's commit was ok + if (WSREP(thd)) wsrep_cleanup_transaction(thd); +#endif /* WITH_WSREP */ } my_bool is_active() { @@ -2314,6 +2366,31 @@ public: query_id_t first_query_id; } binlog_evt_union; +#ifdef WITH_WSREP + const bool wsrep_applier; /* dedicated slave applier thread */ + bool wsrep_client_thread; /* to identify client threads*/ + enum wsrep_exec_mode wsrep_exec_mode; + query_id_t wsrep_last_query_id; + enum wsrep_query_state wsrep_query_state; + enum wsrep_conflict_state wsrep_conflict_state; + mysql_mutex_t LOCK_wsrep_thd; + mysql_cond_t COND_wsrep_thd; + wsrep_seqno_t wsrep_trx_seqno; + uint32 wsrep_rand; + Relay_log_info* wsrep_rli; + bool wsrep_converted_lock_session; + wsrep_trx_handle_t wsrep_trx_handle; + bool wsrep_seqno_changed; +#ifdef WSREP_PROC_INFO + char wsrep_info[128]; /* string for dynamic proc info */ +#endif /* WSREP_PROC_INFO */ + ulong wsrep_retry_counter; // of autocommit + bool wsrep_PA_safe; + char* wsrep_retry_query; + size_t wsrep_retry_query_len; + enum enum_server_command wsrep_retry_command; + bool wsrep_consistency_check; +#endif /* WITH_WSREP */ /** Internal parser state. Note that since the parser is not re-entrant, we keep only one parser @@ -2345,7 +2422,11 @@ public: /* Debug Sync facility. See debug_sync.cc. */ struct st_debug_sync_control *debug_sync_control; #endif /* defined(ENABLED_DEBUG_SYNC) */ +#ifdef WITH_WSREP + THD(bool is_applier = false); +#else THD(); +#endif ~THD(); void init(void); @@ -2741,7 +2822,7 @@ public: tests fail and so force them to propagate the lex->binlog_row_based_if_mixed upwards to the caller. */ - if ((variables.binlog_format == BINLOG_FORMAT_MIXED) && + if ((WSREP_FORMAT(variables.binlog_format) == BINLOG_FORMAT_MIXED) && (in_sub_stmt == 0)) set_current_stmt_binlog_format_row(); @@ -2783,7 +2864,7 @@ public: show_system_thread(system_thread))); if (in_sub_stmt == 0) { - if (variables.binlog_format == BINLOG_FORMAT_ROW) + if (WSREP_FORMAT(variables.binlog_format) == BINLOG_FORMAT_ROW) set_current_stmt_binlog_format_row(); else if (temporary_tables == NULL) clear_current_stmt_binlog_format_row(); diff --git a/sql/sql_connect.cc b/sql/sql_connect.cc index 7157b1b109f..e96068484eb 100644 --- a/sql/sql_connect.cc +++ b/sql/sql_connect.cc @@ -44,6 +44,9 @@ HASH global_index_stats; extern mysql_mutex_t LOCK_global_user_client_stats; extern mysql_mutex_t LOCK_global_table_stats; extern mysql_mutex_t LOCK_global_index_stats; +#ifdef WITH_WSREP +#include "wsrep_mysqld.h" +#endif /* Get structure for logging connection data for the current user @@ -97,6 +100,9 @@ int get_or_create_user_conn(THD *thd, const char *user, } thd->user_connect=uc; uc->connections++; +#ifdef WITH_WSREP + thd->wsrep_client_thread= 1; +#endif /* WITH_WSREP */ end: mysql_mutex_unlock(&LOCK_user_conn); return return_val; @@ -977,7 +983,11 @@ bool setup_connection_thread_globals(THD *thd) { if (thd->store_globals()) { +#ifdef WITH_WSREP + close_connection(thd, ER_OUT_OF_RESOURCES, 1); +#else close_connection(thd, ER_OUT_OF_RESOURCES); +#endif statistic_increment(aborted_connects,&LOCK_status); MYSQL_CALLBACK(thd->scheduler, end_thread, (thd, 0)); return 1; // Error @@ -1050,6 +1060,17 @@ bool login_connection(THD *thd) void end_connection(THD *thd) { NET *net= &thd->net; +#ifdef WITH_WSREP + if (WSREP(thd)) + { + wsrep_status_t rcode= wsrep->free_connection(wsrep, thd->thread_id); + if (rcode) { + WSREP_WARN("wsrep failed to free connection context: %lu, code: %d", + thd->thread_id, rcode); + } + } + thd->wsrep_client_thread= 0; +#endif plugin_thdvar_cleanup(thd); if (thd->user_connect) @@ -1205,7 +1226,11 @@ void do_handle_one_connection(THD *thd_arg) if (MYSQL_CALLBACK_ELSE(thd->scheduler, init_new_connection_thread, (), 0)) { +#ifdef WITH_WSREP + close_connection(thd, ER_OUT_OF_RESOURCES, 1); +#else close_connection(thd, ER_OUT_OF_RESOURCES); +#endif statistic_increment(aborted_connects,&LOCK_status); MYSQL_CALLBACK(thd->scheduler, end_thread, (thd, 0)); return; @@ -1254,9 +1279,21 @@ void do_handle_one_connection(THD *thd_arg) break; } end_connection(thd); - + +#ifdef WITH_WSREP + if (WSREP(thd)) + { + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + thd->wsrep_query_state= QUERY_EXITING; + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + } +#endif end_thread: +#ifdef WITH_WSREP + close_connection(thd, 0, 1); +#else close_connection(thd); +#endif if (thd->userstat_running) update_global_user_stats(thd, create_user, time(NULL)); diff --git a/sql/sql_delete.cc b/sql/sql_delete.cc index ff88bf7c0f8..7fc623daf88 100644 --- a/sql/sql_delete.cc +++ b/sql/sql_delete.cc @@ -410,7 +410,11 @@ cleanup: /* See similar binlogging code in sql_update.cc, for comments */ if ((error < 0) || thd->transaction.stmt.modified_non_trans_table) { +#ifdef WITH_WSREP + if ((WSREP_EMULATE_BINLOG(thd) || mysql_bin_log.is_open())) +#else if (mysql_bin_log.is_open()) +#endif { int errcode= 0; if (error < 0) @@ -861,7 +865,11 @@ void multi_delete::abort_result_set() /* there is only side effects; to binlog with the error */ +#ifdef WITH_WSREP + if (WSREP_EMULATE_BINLOG(thd) || mysql_bin_log.is_open()) +#else if (mysql_bin_log.is_open()) +#endif { int errcode= query_error_code(thd, thd->killed == NOT_KILLED); /* possible error of writing binary log is ignored deliberately */ @@ -1037,7 +1045,11 @@ bool multi_delete::send_eof() } if ((local_error == 0) || thd->transaction.stmt.modified_non_trans_table) { +#ifdef WITH_WSREP + if (WSREP_EMULATE_BINLOG(thd) || mysql_bin_log.is_open()) +#else if (mysql_bin_log.is_open()) +#endif { int errcode= 0; if (local_error == 0) diff --git a/sql/sql_insert.cc b/sql/sql_insert.cc index 54f94ce78c1..a4eb1e8996f 100644 --- a/sql/sql_insert.cc +++ b/sql/sql_insert.cc @@ -1019,7 +1019,11 @@ bool mysql_insert(THD *thd,TABLE_LIST *table_list, thd->transaction.stmt.modified_non_trans_table || was_insert_delayed) { +#ifdef WITH_WSREP + if (WSREP_EMULATE_BINLOG(thd) || mysql_bin_log.is_open()) +#else if (mysql_bin_log.is_open()) +#endif { int errcode= 0; if (error <= 0) @@ -3113,6 +3117,11 @@ bool Delayed_insert::handle_inserts(void) mysql_cond_broadcast(&cond_client); // If waiting clients } } +#ifdef WITH_WSREP + if (WSREP((&thd))) + thd_proc_info(&thd, "insert done"); + else +#endif /* WITH_WSREP */ thd_proc_info(&thd, 0); mysql_mutex_unlock(&mutex); @@ -3597,8 +3606,13 @@ bool select_insert::send_eof() events are in the transaction cache and will be written when ha_autocommit_or_rollback() is issued below. */ +#ifdef WITH_WSREP + if ((WSREP_EMULATE_BINLOG(thd) || mysql_bin_log.is_open()) && + (!error || thd->transaction.stmt.modified_non_trans_table)) +#else if (mysql_bin_log.is_open() && (!error || thd->transaction.stmt.modified_non_trans_table)) +#endif { int errcode= 0; if (!error) @@ -3681,7 +3695,11 @@ void select_insert::abort_result_set() { if (!can_rollback_data()) thd->transaction.all.modified_non_trans_table= TRUE; +#ifdef WITH_WSREP + if (WSREP_EMULATE_BINLOG(thd) || mysql_bin_log.is_open()) +#else if (mysql_bin_log.is_open()) +#endif { int errcode= query_error_code(thd, thd->killed == NOT_KILLED); /* error of writing binary log is ignored */ @@ -4072,7 +4090,11 @@ select_create::binlog_show_create_table(TABLE **tables, uint count) /* show_database */ TRUE); DBUG_ASSERT(result == 0); /* store_create_info() always return 0 */ +#ifdef WITH_WSREP + if (WSREP_EMULATE_BINLOG(thd) || mysql_bin_log.is_open()) +#else if (mysql_bin_log.is_open()) +#endif { int errcode= query_error_code(thd, thd->killed == NOT_KILLED); result= thd->binlog_query(THD::STMT_QUERY_TYPE, diff --git a/sql/sql_lex.cc b/sql/sql_lex.cc index b2dfae5ded4..070bcdfcfd4 100644 --- a/sql/sql_lex.cc +++ b/sql/sql_lex.cc @@ -1548,6 +1548,17 @@ int lex_one_token(void *arg, void *yythd) } else { +#ifdef WITH_WSREP + if (version == 99997 && thd->wsrep_exec_mode == LOCAL_STATE) + { + WSREP_DEBUG("consistency check: %s", thd->query()); + thd->wsrep_consistency_check= TRUE; + lip->yySkipn(5); + lip->set_echo(TRUE); + state=MY_LEX_START; + break; /* Do not treat contents as a comment. */ + } +#endif /* WITH_WSREP */ /* Patch and skip the conditional comment to avoid it being propagated infinitely (eg. to a slave). diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc index e0b2acd199d..a955b434319 100644 --- a/sql/sql_parse.cc +++ b/sql/sql_parse.cc @@ -103,6 +103,24 @@ #include "../storage/maria/ha_maria.h" #endif +#ifdef WITH_WSREP +#include "wsrep_mysqld.h" +#include "rpl_rli.h" + +extern Format_description_log_event *wsrep_format_desc; +#define WSREP_MYSQL_DB (char *)"mysql" + +#define WSREP_TO_ISOLATION_BEGIN(db_, table_) \ + if (WSREP(thd) && wsrep_to_isolation_begin(thd, db_, table_)) goto error; + +#define WSREP_TO_ISOLATION_END \ + if (WSREP(thd) || (thd && thd->wsrep_exec_mode==TOTAL_ORDER)) \ + wsrep_to_isolation_end(thd); + +#else +#define WSREP_TO_ISOLATION_BEGIN(db_, table_) +#define WSREP_TO_ISOLATION_END +#endif /* WITH_WSREP */ /** @defgroup Runtime_Environment Runtime Environment @{ @@ -436,6 +454,13 @@ bool is_log_table_write_query(enum enum_sql_command command) return (sql_command_flags[command] & CF_WRITE_LOGS_COMMAND) != 0; } +#ifdef WITH_WSREP +bool is_show_query(enum enum_sql_command command) +{ + DBUG_ASSERT(command >= 0 && command <= SQLCOM_END); + return (sql_command_flags[command] & CF_STATUS_COMMAND) != 0; +} +#endif void execute_init_command(THD *thd, LEX_STRING *init_command, mysql_rwlock_t *var_lock) { @@ -617,8 +642,12 @@ void do_handle_bootstrap(THD *thd) if (my_thread_init() || thd->store_globals()) { #ifndef EMBEDDED_LIBRARY +#ifdef WITH_WSREP + close_connection(thd, ER_OUT_OF_RESOURCES, 1); +#else close_connection(thd, ER_OUT_OF_RESOURCES); #endif +#endif thd->fatal_error(); goto end; } @@ -692,7 +721,26 @@ bool do_command(THD *thd) NET *net= &thd->net; enum enum_server_command command; DBUG_ENTER("do_command"); +#ifdef WITH_WSREP + if (WSREP(thd)) + { + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + thd->wsrep_query_state= QUERY_IDLE; + if (thd->wsrep_conflict_state==MUST_ABORT) + { + thd->wsrep_conflict_state= ABORTING; + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + trans_rollback(thd); + thd->locked_tables_list.unlock_locked_tables(thd); + /* Release transactional metadata locks. */ + thd->mdl_context.release_transactional_locks(); + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + thd->wsrep_conflict_state= ABORTED; + } + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + } +#endif /* indicator of uninitialized lex => normal flow of errors handling (see my_message_sql) @@ -738,11 +786,57 @@ bool do_command(THD *thd) */ DEBUG_SYNC(thd, "before_do_command_net_read"); +#ifdef WITH_WSREP + if (WSREP(thd)) { + packet_length= my_net_read(net); + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + /* these THD's are aborted or are aborting during being idle */ + if (thd->wsrep_conflict_state == ABORTING) + { + while (thd->wsrep_conflict_state == ABORTING) { + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + my_sleep(1000); + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + } + thd->store_globals(); + } + else if (thd->wsrep_conflict_state == ABORTED) + { + thd->store_globals(); + } + + thd->wsrep_query_state= QUERY_EXEC; + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + } + if ((WSREP(thd) && packet_length == packet_error) || + (!WSREP(thd) && (packet_length= my_net_read(net)) == packet_error)) +#else if ((packet_length= my_net_read(net)) == packet_error) +#endif { DBUG_PRINT("info",("Got error %d reading command from socket %s", net->error, vio_description(net->vio))); +#ifdef WITH_WSREP + if (WSREP(thd)) { + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + if (thd->wsrep_conflict_state == MUST_ABORT) + { + DBUG_PRINT("wsrep",("aborted for wsrep rollback: %lu", thd->real_id)); + thd->wsrep_conflict_state= ABORTING; + + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + trans_rollback(thd); + thd->locked_tables_list.unlock_locked_tables(thd); + /* Release transactional metadata locks. */ + thd->mdl_context.release_transactional_locks(); + + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + thd->wsrep_conflict_state= ABORTED; + } + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + } +#endif /* Check if we can continue without closing the connection */ @@ -788,12 +882,54 @@ bool do_command(THD *thd) vio_description(net->vio), command, command_name[command].str)); +#ifdef WITH_WSREP + if (WSREP(thd)) { + /* + * bail out if DB snapshot has not been installed. We however, + * allow queries "SET" and "SHOW", they are trapped later in execute_command + */ + if (thd->variables.wsrep_on && !thd->wsrep_applier && !wsrep_ready && + command != COM_QUERY && + command != COM_PING && + command != COM_QUIT && + command != COM_PROCESS_INFO && + command != COM_PROCESS_KILL && + command != COM_SET_OPTION && + command != COM_SHUTDOWN && + command != COM_SLEEP && + command != COM_STATISTICS && + command != COM_TIME && + command != COM_END + ) { + my_error(ER_UNKNOWN_COM_ERROR, MYF(0), + "WSREP has not yet prepared node for application use"); + thd->protocol->end_statement(); + return_value= FALSE; + goto out; + } + } +#endif /* Restore read timeout value */ my_net_set_read_timeout(net, thd->variables.net_read_timeout); DBUG_ASSERT(packet_length); return_value= dispatch_command(command, thd, packet+1, (uint) (packet_length-1)); - +#ifdef WITH_WSREP + if (WSREP(thd)) { + while (thd->wsrep_conflict_state== RETRY_AUTOCOMMIT) + { + return_value= dispatch_command(command, thd, thd->wsrep_retry_query, + thd->wsrep_retry_query_len); + } + } + if (thd->wsrep_retry_query) + { + my_free(thd->wsrep_retry_query); + thd->wsrep_retry_query = NULL; + thd->wsrep_retry_query_len = 0; + thd->wsrep_retry_command = COM_CONNECT; + } +#endif out: DBUG_RETURN(return_value); } @@ -867,6 +1003,34 @@ static my_bool deny_updates_if_read_only_option(THD *thd, DBUG_RETURN(FALSE); } +#ifdef WITH_WSREP +static my_bool wsrep_read_only_option(THD *thd, TABLE_LIST *all_tables) +{ + int opt_readonly_saved = opt_readonly; + ulong flag_saved = (ulong)(thd->security_ctx->master_access & SUPER_ACL); + + opt_readonly = 0; + thd->security_ctx->master_access &= ~SUPER_ACL; + + my_bool ret = !deny_updates_if_read_only_option(thd, all_tables); + + opt_readonly = opt_readonly_saved; + thd->security_ctx->master_access |= flag_saved; + + return ret; +} + +static void wsrep_copy_query(THD *thd) +{ + thd->wsrep_retry_command = thd->command; + thd->wsrep_retry_query_len = thd->query_length(); + thd->wsrep_retry_query = (char *)my_malloc( + thd->wsrep_retry_query_len + 1, MYF(0)); + thd->wsrep_retry_command = thd->command; + strcpy(thd->wsrep_retry_query, thd->query()); + thd->wsrep_retry_query[thd->wsrep_retry_query_len] = '\0'; +} +#endif /* WITH_WSREP */ /** Perform one connection-level (COM_XXXX) command. @@ -896,6 +1060,50 @@ bool dispatch_command(enum enum_server_command command, THD *thd, DBUG_ENTER("dispatch_command"); DBUG_PRINT("info", ("command: %d", command)); +#ifdef WITH_WSREP + bool is_autocommit= false; + + if (WSREP(thd)) { + if (!thd->in_multi_stmt_transaction_mode()) + { + thd->wsrep_PA_safe= true; + } + + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + thd->wsrep_query_state= QUERY_EXEC; + if (thd->wsrep_conflict_state== RETRY_AUTOCOMMIT) + { + thd->wsrep_conflict_state= NO_CONFLICT; + } + + is_autocommit= !thd->in_multi_stmt_transaction_mode() && + thd->wsrep_conflict_state == NO_CONFLICT && + !thd->wsrep_applier && + wsrep_read_only_option(thd, thd->lex->query_tables); + + if (thd->wsrep_conflict_state== MUST_ABORT) + { + thd->wsrep_conflict_state= ABORTING; + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + trans_rollback(thd); + thd->locked_tables_list.unlock_locked_tables(thd); + /* Release transactional metadata locks. */ + thd->mdl_context.release_transactional_locks(); + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + thd->wsrep_conflict_state= ABORTED; + } + if (thd->wsrep_conflict_state== ABORTED) + { + my_error(ER_LOCK_DEADLOCK, MYF(0), "wsrep aborted transaction"); + WSREP_DEBUG("Deadlock error for: %s", thd->query()); + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + thd->killed= NOT_KILLED; + thd->mysys_var->abort= 0; + goto dispatch_end; + } + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + } +#endif /* WITH_WSREP */ #if defined(ENABLED_PROFILING) thd->profiling.start_new_query(); #endif @@ -1104,7 +1312,12 @@ bool dispatch_command(enum enum_server_command command, THD *thd, Count each statement from the client. */ statistic_increment(thd->status_var.questions, &LOCK_status); +#ifdef WITH_WSREP + if (!WSREP(thd)) + thd->set_time(); /* Reset the query start time. */ +#else thd->set_time(); /* Reset the query start time. */ +#endif parser_state.reset(beginning_of_next_stmt, length); /* TODO: set thd->lex->sql_command to SQLCOM_END here */ mysql_parse(thd, beginning_of_next_stmt, length, &parser_state); @@ -1422,6 +1635,156 @@ bool dispatch_command(enum enum_server_command command, THD *thd, my_message(ER_UNKNOWN_COM_ERROR, ER(ER_UNKNOWN_COM_ERROR), MYF(0)); break; } +#ifdef WITH_WSREP + dispatch_end: + + if (WSREP(thd)) { + /* wsrep BF abort in query exec phase */ + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + if (thd->wsrep_conflict_state == MUST_ABORT) { + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + ha_rollback_trans(thd, 0); + thd->locked_tables_list.unlock_locked_tables(thd); + /* Release transactional metadata locks. */ + thd->mdl_context.release_transactional_locks(); + thd->transaction.stmt.reset(); + WSREP_DEBUG("abort in exec query state, avoiding autocommit"); + goto wsrep_must_abort; + } + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + } +#endif /* WITH_WSREP */ + +#ifdef WITH_WSREP + wsrep_must_abort: + if (WSREP(thd)) { + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + if (thd->wsrep_conflict_state == MUST_ABORT) { + thd->wsrep_conflict_state= ABORTING; + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + + WSREP_DEBUG("in dispatch_command, aborting %s", + (thd->query()) ? thd->query() : "void"); + trans_rollback(thd); + thd->locked_tables_list.unlock_locked_tables(thd); + /* Release transactional metadata locks. */ + thd->mdl_context.release_transactional_locks(); + + if (thd->get_binlog_table_maps()) { + thd->clear_binlog_table_maps(); + } + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + thd->wsrep_conflict_state= ABORTED; + } + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + /* checking if BF trx must be replayed */ + if (thd->wsrep_conflict_state== MUST_REPLAY) { + if (thd->wsrep_exec_mode!= REPL_RECV) { + if (thd->stmt_da->is_sent) { + WSREP_ERROR("replay issue, thd has reported status already"); + } + thd->stmt_da->reset_diagnostics_area(); + + thd->wsrep_conflict_state= REPLAYING; + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + + mysql_reset_thd_for_next_command(thd, opt_userstat_running); + thd->killed= NOT_KILLED; + close_thread_tables(thd); + + thd_proc_info(thd, "wsrep replaying trx"); + WSREP_DEBUG("replay trx: %s %lld", + thd->query() ? thd->query() : "void", + (long long)thd->wsrep_trx_seqno); + struct wsrep_thd_shadow shadow; + wsrep_prepare_bf_thd(thd, &shadow); + int rcode = wsrep->replay_trx(wsrep, + &thd->wsrep_trx_handle, + (void *)thd); + + wsrep_return_from_bf_mode(thd, &shadow); + if (thd->wsrep_conflict_state!= REPLAYING) + WSREP_WARN("lost replaying mode: %d", thd->wsrep_conflict_state ); + + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + + switch (rcode) { + case WSREP_OK: + thd->wsrep_conflict_state= NO_CONFLICT; + wsrep->post_commit(wsrep, &thd->wsrep_trx_handle); + WSREP_DEBUG("trx_replay successful for: %ld %llu", + thd->thread_id, (long long)thd->real_id); + break; + case WSREP_TRX_FAIL: + if (thd->stmt_da->is_sent) { + WSREP_ERROR("replay failed, thd has reported status"); + } + else + { + WSREP_DEBUG("replay failed, rolling back"); + my_error(ER_LOCK_DEADLOCK, MYF(0), "wsrep aborted transaction"); + } + thd->wsrep_conflict_state= ABORTED; + wsrep->post_rollback(wsrep, &thd->wsrep_trx_handle); + break; + default: + WSREP_ERROR("trx_replay failed for: %d, query: %s", + rcode, thd->query() ? thd->query() : "void"); + /* we're now in inconsistent state, must abort */ + unireg_abort(1); + break; + } + mysql_mutex_lock(&LOCK_wsrep_replaying); + wsrep_replaying--; + WSREP_DEBUG("replaying decreased: %d, thd: %lu", + wsrep_replaying, thd->thread_id); + mysql_cond_broadcast(&COND_wsrep_replaying); + mysql_mutex_unlock(&LOCK_wsrep_replaying); + } + } + /* setting error code for BF aborted trxs */ + if (thd->wsrep_conflict_state == ABORTED) + { + mysql_reset_thd_for_next_command(thd, opt_userstat_running); + thd->killed= NOT_KILLED; + if (is_autocommit && + (thd->wsrep_retry_counter < thd->variables.wsrep_retry_autocommit)) + { + WSREP_DEBUG("wsrep retrying AC query: %s", + (thd->query()) ? thd->query() : "void"); + thd->wsrep_conflict_state= RETRY_AUTOCOMMIT; + thd->wsrep_retry_counter++; // grow + wsrep_copy_query(thd); + } + else + { + WSREP_DEBUG("BF aborted, thd: %lu is_AC: %d, retry: %lu - %lu SQL: %s", + thd->thread_id, is_autocommit, thd->wsrep_retry_counter, + thd->variables.wsrep_retry_autocommit, thd->query()); + my_error(ER_LOCK_DEADLOCK, MYF(0), "wsrep aborted transaction"); + thd->killed= NOT_KILLED; + thd->wsrep_conflict_state= NO_CONFLICT; + thd->wsrep_retry_counter= 0; // reset + } + } + else + { + set_if_smaller(thd->wsrep_retry_counter, 0); // reset; eventually ok + } + if ((thd->wsrep_conflict_state != REPLAYING) && + (thd->wsrep_conflict_state != RETRY_AUTOCOMMIT)) { + + thd->update_server_status(); + thd->protocol->end_statement(); + query_cache_end_of_result(thd); + } + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + + } else { /* if (WSREP(thd))... */ + +#endif /* WITH_WSREP */ DBUG_ASSERT(thd->derived_tables == NULL && (thd->open_tables == NULL || (thd->locked_tables_mode == LTM_LOCK_TABLES))); @@ -1430,6 +1793,9 @@ bool dispatch_command(enum enum_server_command command, THD *thd, thd->update_server_status(); thd->protocol->end_statement(); query_cache_end_of_result(thd); +#ifdef WITH_WSREP + } +#endif /* WITH_WSREP */ if (!thd->is_error() && !thd->killed_errno()) mysql_audit_general(thd, MYSQL_AUDIT_GENERAL_RESULT, 0, 0); @@ -1446,7 +1812,16 @@ bool dispatch_command(enum enum_server_command command, THD *thd, thd->reset_query(); thd->command=COM_SLEEP; dec_thread_running(); +#ifdef WITH_WSREP + if (WSREP(thd)) { + thd_proc_info(thd, "sleeping"); + } else { +#endif /* WITH_WSREP */ thd_proc_info(thd, 0); +#ifdef WITH_WSREP + } +#endif /* WITH_WSREP */ + thd->packet.shrink(thd->variables.net_buffer_length); // Reclaim some memory free_root(thd->mem_root,MYF(MY_KEEP_PREALLOC)); @@ -2091,7 +2466,45 @@ mysql_execute_command(THD *thd) #ifdef HAVE_REPLICATION } /* endif unlikely slave */ #endif +#ifdef WITH_WSREP + if (WSREP(thd)) { + /* + change LOCK TABLE WRITE to transaction + */ + if (lex->sql_command== SQLCOM_LOCK_TABLES && wsrep_convert_LOCK_to_trx) + { + for (TABLE_LIST *table= all_tables; table; table= table->next_global) + { + if (table->lock_type >= TL_WRITE_ALLOW_WRITE) + { + lex->sql_command= SQLCOM_BEGIN; + thd->wsrep_converted_lock_session= true; + break; + } + } + } + if (lex->sql_command== SQLCOM_UNLOCK_TABLES && + thd->wsrep_converted_lock_session) + { + thd->wsrep_converted_lock_session= false; + lex->sql_command= SQLCOM_COMMIT; + lex->tx_release= TVL_NO; + } + /* + * bail out if DB snapshot has not been installed. We however, + * allow SET and SHOW queries + */ + if (thd->variables.wsrep_on && !thd->wsrep_applier && !wsrep_ready && + lex->sql_command != SQLCOM_SET_OPTION && + !is_show_query(lex->sql_command)) + { + my_error(ER_UNKNOWN_COM_ERROR, MYF(0), + "WSREP has not yet prepared node for application use"); + goto error; + } + } +#endif /* WITH_WSREP */ status_var_increment(thd->status_var.com_stat[lex->sql_command]); thd->progress.report_to_client= test(sql_command_flags[lex->sql_command] & CF_REPORT_PROGRESS); @@ -2134,6 +2547,9 @@ mysql_execute_command(THD *thd) #endif case SQLCOM_SHOW_STATUS_PROC: case SQLCOM_SHOW_STATUS_FUNC: +#ifdef WITH_WSREP + if (WSREP(thd) && wsrep_causal_wait(thd)) goto error; +#endif /* WITH_WSREP */ if ((res= check_table_access(thd, SELECT_ACL, all_tables, FALSE, UINT_MAX, FALSE))) goto error; @@ -2141,6 +2557,9 @@ mysql_execute_command(THD *thd) break; case SQLCOM_SHOW_STATUS: { +#ifdef WITH_WSREP + if (WSREP(thd) && wsrep_causal_wait(thd)) goto error; +#endif /* WITH_WSREP */ execute_show_status(thd, all_tables); break; } @@ -2152,17 +2571,27 @@ mysql_execute_command(THD *thd) case SQLCOM_SHOW_PLUGINS: case SQLCOM_SHOW_FIELDS: case SQLCOM_SHOW_KEYS: +#ifndef WITH_WSREP case SQLCOM_SHOW_VARIABLES: case SQLCOM_SHOW_CHARSETS: case SQLCOM_SHOW_COLLATIONS: case SQLCOM_SHOW_STORAGE_ENGINES: case SQLCOM_SHOW_PROFILE: +#endif /* WITH_WSREP */ case SQLCOM_SHOW_CLIENT_STATS: case SQLCOM_SHOW_USER_STATS: case SQLCOM_SHOW_TABLE_STATS: case SQLCOM_SHOW_INDEX_STATS: case SQLCOM_SELECT: - { +#ifdef WITH_WSREP + if (WSREP(thd) && wsrep_causal_wait(thd)) goto error; + case SQLCOM_SHOW_VARIABLES: + case SQLCOM_SHOW_CHARSETS: + case SQLCOM_SHOW_COLLATIONS: + case SQLCOM_SHOW_STORAGE_ENGINES: + case SQLCOM_SHOW_PROFILE: +#endif /* WITH_WSREP */ + { thd->status_var.last_query_cost= 0.0; /* @@ -2472,7 +2901,7 @@ case SQLCOM_PREPARE: */ if (thd->query_name_consts && mysql_bin_log.is_open() && - thd->variables.binlog_format == BINLOG_FORMAT_STMT && + WSREP_FORMAT(thd->variables.binlog_format) == BINLOG_FORMAT_STMT && !mysql_bin_log.is_query_in_union(thd, thd->query_id)) { List_iterator_fast<Item> it(select_lex->item_list); @@ -2576,6 +3005,11 @@ case SQLCOM_PREPARE: if (create_info.options & HA_LEX_CREATE_TMP_TABLE) thd->variables.option_bits|= OPTION_KEEP_LOG; /* regular create */ +#ifdef WITH_WSREP + if (!thd->is_current_stmt_binlog_format_row() || + !(create_info.options & HA_LEX_CREATE_TMP_TABLE)) + WSREP_TO_ISOLATION_BEGIN(create_table->db, create_table->table_name) +#endif /* WITH_WSREP */ if (create_info.options & HA_LEX_CREATE_TABLE_LIKE) { /* CREATE TABLE ... LIKE ... */ @@ -2617,6 +3051,7 @@ end_with_restore_list: DBUG_ASSERT(first_table == all_tables && first_table != 0); if (check_one_table_access(thd, INDEX_ACL, all_tables)) goto error; /* purecov: inspected */ + WSREP_TO_ISOLATION_BEGIN(first_table->db, first_table->table_name) /* Currently CREATE INDEX or DROP INDEX cause a full table rebuild and thus classify as slow administrative statements just like @@ -2674,6 +3109,7 @@ end_with_restore_list: case SQLCOM_RENAME_TABLE: { + WSREP_TO_ISOLATION_BEGIN(first_table->db, first_table->table_name) if (execute_rename_table(thd, first_table, all_tables)) goto error; break; @@ -2701,6 +3137,10 @@ end_with_restore_list: goto error; #else { +#ifdef WITH_WSREP + if (WSREP(thd) && wsrep_causal_wait(thd)) goto error; +#endif /* WITH_WSREP */ + /* Access check: SHOW CREATE TABLE require any privileges on the table level (ie @@ -2756,6 +3196,10 @@ end_with_restore_list: case SQLCOM_CHECKSUM: { DBUG_ASSERT(first_table == all_tables && first_table != 0); +#ifdef WITH_WSREP + if (WSREP(thd) && wsrep_causal_wait(thd)) goto error; +#endif /* WITH_WSREP */ + if (check_table_access(thd, SELECT_ACL, all_tables, FALSE, UINT_MAX, FALSE)) goto error; /* purecov: inspected */ @@ -2950,6 +3394,14 @@ end_with_restore_list: DBUG_ASSERT(first_table == all_tables && first_table != 0); if ((res= insert_precheck(thd, all_tables))) break; +#ifdef WITH_WSREP + if (lex->sql_command == SQLCOM_INSERT_SELECT && + thd->wsrep_consistency_check) + { + WSREP_TO_ISOLATION_BEGIN(first_table->db, first_table->table_name); + } + +#endif /* INSERT...SELECT...ON DUPLICATE KEY UPDATE/REPLACE SELECT/ INSERT...IGNORE...SELECT can be unsafe, unless ORDER BY PRIMARY KEY @@ -3113,6 +3565,17 @@ end_with_restore_list: /* So that DROP TEMPORARY TABLE gets to binlog at commit/rollback */ thd->variables.option_bits|= OPTION_KEEP_LOG; } +#ifdef WITH_WSREP + for (TABLE_LIST *table= all_tables; table; table= table->next_global) + { + if (!thd->is_current_stmt_binlog_format_row() || + !find_temporary_table(thd, table)) + { + WSREP_TO_ISOLATION_BEGIN(table->db, table->table_name); + break; + } + } +#endif /* WITH_WSREP */ /* DDL and binlog write order are protected by metadata locks. */ res= mysql_rm_table(thd, first_table, lex->drop_if_exists, lex->drop_temporary); @@ -3156,7 +3619,6 @@ end_with_restore_list: if (!mysql_change_db(thd, &db_str, FALSE)) my_ok(thd); - break; } @@ -3299,6 +3761,7 @@ end_with_restore_list: #endif if (check_access(thd, CREATE_ACL, lex->name.str, NULL, NULL, 1, 0)) break; + WSREP_TO_ISOLATION_BEGIN(lex->name.str, NULL) res= mysql_create_db(thd,(lower_case_table_names == 2 ? alias : lex->name.str), &create_info, 0); break; @@ -3328,6 +3791,7 @@ end_with_restore_list: #endif if (check_access(thd, DROP_ACL, lex->name.str, NULL, NULL, 1, 0)) break; + WSREP_TO_ISOLATION_BEGIN(lex->name.str, NULL) res= mysql_rm_db(thd, lex->name.str, lex->drop_if_exists, 0); break; } @@ -3356,6 +3820,7 @@ end_with_restore_list: res= 1; break; } + WSREP_TO_ISOLATION_BEGIN(db->str, NULL) res= mysql_upgrade_db(thd, db); if (!res) my_ok(thd); @@ -3388,6 +3853,7 @@ end_with_restore_list: #endif if (check_access(thd, ALTER_ACL, db->str, NULL, NULL, 1, 0)) break; + WSREP_TO_ISOLATION_BEGIN(db->str, NULL) res= mysql_alter_db(thd, db->str, &create_info); break; } @@ -3420,6 +3886,7 @@ end_with_restore_list: if (res) break; + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL) switch (lex->sql_command) { case SQLCOM_CREATE_EVENT: { @@ -3454,6 +3921,7 @@ end_with_restore_list: lex->spname->m_name); break; case SQLCOM_DROP_EVENT: + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL) if (!(res= Events::drop_event(thd, lex->spname->m_db, lex->spname->m_name, lex->drop_if_exists))) @@ -3468,6 +3936,7 @@ end_with_restore_list: if (check_access(thd, INSERT_ACL, "mysql", NULL, NULL, 1, 0)) break; #ifdef HAVE_DLOPEN + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL) if (!(res = mysql_create_function(thd, &lex->udf))) my_ok(thd); #else @@ -3482,6 +3951,7 @@ end_with_restore_list: if (check_access(thd, INSERT_ACL, "mysql", NULL, NULL, 1, 1) && check_global_access(thd,CREATE_USER_ACL)) break; + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL) /* Conditionally writes to binlog */ if (!(res= mysql_create_user(thd, lex->users_list))) my_ok(thd); @@ -3493,6 +3963,7 @@ end_with_restore_list: check_global_access(thd,CREATE_USER_ACL)) break; /* Conditionally writes to binlog */ + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL) if (!(res= mysql_drop_user(thd, lex->users_list))) my_ok(thd); break; @@ -3503,6 +3974,7 @@ end_with_restore_list: check_global_access(thd,CREATE_USER_ACL)) break; /* Conditionally writes to binlog */ + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL) if (!(res= mysql_rename_user(thd, lex->users_list))) my_ok(thd); break; @@ -3517,6 +3989,7 @@ end_with_restore_list: thd->binlog_invoker(); /* Conditionally writes to binlog */ + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL) if (!(res = mysql_revoke_all(thd, lex->users_list))) my_ok(thd); break; @@ -3583,6 +4056,7 @@ end_with_restore_list: lex->type == TYPE_ENUM_PROCEDURE, 0)) goto error; /* Conditionally writes to binlog */ + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL) res= mysql_routine_grant(thd, all_tables, lex->type == TYPE_ENUM_PROCEDURE, lex->users_list, grants, @@ -3596,6 +4070,7 @@ end_with_restore_list: all_tables, FALSE, UINT_MAX, FALSE)) goto error; /* Conditionally writes to binlog */ + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL) res= mysql_table_grant(thd, all_tables, lex->users_list, lex->columns, lex->grant, lex->sql_command == SQLCOM_REVOKE); @@ -3611,6 +4086,7 @@ end_with_restore_list: } else { + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL) /* Conditionally writes to binlog */ res = mysql_grant(thd, select_lex->db, lex->users_list, lex->grant, lex->sql_command == SQLCOM_REVOKE, @@ -3749,9 +4225,17 @@ end_with_restore_list: able to open it (with SQLCOM_HA_OPEN) in the first place. */ unit->set_limit(select_lex); +#ifdef WITH_WSREP + { char* tmp_info= NULL; + if (WSREP(thd)) tmp_info = (char *)thd_proc_info(thd, "mysql_ha_read()"); +#endif /* WITH_WSREP */ res= mysql_ha_read(thd, first_table, lex->ha_read_mode, lex->ident.str, lex->insert_list, lex->ha_rkey_mode, select_lex->where, unit->select_limit_cnt, unit->offset_limit_cnt); +#ifdef WITH_WSREP + if (WSREP(thd)) thd_proc_info(thd, tmp_info); + } +#endif /* WITH_WSREP */ break; case SQLCOM_BEGIN: @@ -3819,8 +4303,20 @@ end_with_restore_list: /* Disconnect the current client connection. */ if (tx_release) thd->killed= KILL_CONNECTION; - my_ok(thd); - break; + #ifdef WITH_WSREP + if (WSREP(thd)) { + if (thd->wsrep_conflict_state == NO_CONFLICT || + thd->wsrep_conflict_state == REPLAYING) + { + my_ok(thd); + } + } else { +#endif /* WITH_WSREP */ + my_ok(thd); + #ifdef WITH_WSREP + } +#endif /* WITH_WSREP */ + break; } case SQLCOM_RELEASE_SAVEPOINT: if (trans_release_savepoint(thd, lex->ident)) @@ -3888,6 +4384,7 @@ end_with_restore_list: if (sp_process_definer(thd)) goto create_sp_error; + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL) res= (sp_result= sp_create_routine(thd, lex->sphead->m_type, lex->sphead)); switch (sp_result) { case SP_OK: { @@ -4099,6 +4596,7 @@ create_sp_error: already puts on CREATE FUNCTION. */ /* Conditionally writes to binlog */ + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL) sp_result= sp_update_routine(thd, type, lex->spname, &lex->sp_chistics); switch (sp_result) { @@ -4170,6 +4668,7 @@ create_sp_error: if (check_routine_access(thd, ALTER_PROC_ACL, db, name, lex->sql_command == SQLCOM_DROP_PROCEDURE, 0)) goto error; + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL) /* Conditionally writes to binlog */ sp_result= sp_drop_routine(thd, type, lex->spname); @@ -4287,6 +4786,7 @@ create_sp_error: Note: SQLCOM_CREATE_VIEW also handles 'ALTER VIEW' commands as specified through the thd->lex->create_view_mode flag. */ + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL) res= mysql_create_view(thd, first_table, thd->lex->create_view_mode); break; } @@ -4295,12 +4795,14 @@ create_sp_error: if (check_table_access(thd, DROP_ACL, all_tables, FALSE, UINT_MAX, FALSE)) goto error; /* Conditionally writes to binlog. */ + WSREP_TO_ISOLATION_BEGIN(NULL, NULL) res= mysql_drop_view(thd, first_table, thd->lex->drop_mode); break; } case SQLCOM_CREATE_TRIGGER: { /* Conditionally writes to binlog. */ + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL) res= mysql_create_or_drop_trigger(thd, all_tables, 1); break; @@ -4308,6 +4810,7 @@ create_sp_error: case SQLCOM_DROP_TRIGGER: { /* Conditionally writes to binlog. */ + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL) res= mysql_create_or_drop_trigger(thd, all_tables, 0); break; } @@ -4358,11 +4861,13 @@ create_sp_error: my_ok(thd); break; case SQLCOM_INSTALL_PLUGIN: + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL) if (! (res= mysql_install_plugin(thd, &thd->lex->comment, &thd->lex->ident))) my_ok(thd); break; case SQLCOM_UNINSTALL_PLUGIN: + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL) if (! (res= mysql_uninstall_plugin(thd, &thd->lex->comment, &thd->lex->ident))) my_ok(thd); @@ -4519,6 +5024,10 @@ finish: /* Free tables */ thd_proc_info(thd, "closing tables"); close_thread_tables(thd); +#ifdef WITH_WSREP + WSREP_TO_ISOLATION_END + thd->wsrep_consistency_check= FALSE; +#endif /* WITH_WSREP */ thd_proc_info(thd, 0); #ifndef DBUG_OFF @@ -5429,6 +5938,21 @@ void THD::reset_for_next_command(bool calculate_userstat) thd->auto_inc_intervals_in_cur_stmt_for_binlog.empty(); thd->stmt_depends_on_first_successful_insert_id_in_prev_stmt= 0; +#ifdef WITH_WSREP + if (WSREP(thd)) { + if (wsrep_auto_increment_control) + { + if (thd->variables.auto_increment_offset != + global_system_variables.auto_increment_offset) + thd->variables.auto_increment_offset= + global_system_variables.auto_increment_offset; + if (thd->variables.auto_increment_increment != + global_system_variables.auto_increment_increment) + thd->variables.auto_increment_increment= + global_system_variables.auto_increment_increment; + } + } +#endif /* WITH_WSREP */ thd->query_start_used= 0; thd->query_start_sec_part_used= 0; thd->is_fatal_error= thd->time_zone_used= 0; @@ -7381,6 +7905,545 @@ LEX_USER *create_definer(THD *thd, LEX_STRING *user_name, LEX_STRING *host_name) return definer; } +#ifdef WITH_WSREP +static enum wsrep_status wsrep_apply_sql( + THD *thd, const char *sql, size_t sql_len, time_t timeval, uint32 randseed) +{ + int error; + enum wsrep_status ret_code= WSREP_OK; + + DBUG_ENTER("wsrep_bf_execute_cb"); + thd->wsrep_exec_mode= REPL_RECV; + thd->net.vio= 0; + thd->start_time= timeval; + thd->wsrep_rand= randseed; + + thd->variables.option_bits |= OPTION_NOT_AUTOCOMMIT; + + DBUG_PRINT("wsrep", ("SQL: %s", sql)); + + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + thd->wsrep_query_state= QUERY_EXEC; + /* preserve replaying mode */ + if (thd->wsrep_conflict_state!= REPLAYING) + thd->wsrep_conflict_state= NO_CONFLICT; + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + + if ((error= dispatch_command(COM_QUERY, thd, (char*)sql, sql_len))) { + WSREP_WARN("BF SQL apply failed: %d, %lld", + thd->wsrep_conflict_state, (long long)thd->wsrep_trx_seqno); + DBUG_RETURN(WSREP_FATAL); + } + + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + if (thd->wsrep_conflict_state!= NO_CONFLICT && + thd->wsrep_conflict_state!= REPLAYING) { + ret_code= WSREP_FATAL; + WSREP_DEBUG("BF thd ending, with: %d, %lld", + thd->wsrep_conflict_state, (long long)thd->wsrep_trx_seqno); + } + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + + assert(thd->wsrep_exec_mode== REPL_RECV); + DBUG_RETURN(ret_code); +} + +void wsrep_write_rbr_buf( + THD *thd, const void* rbr_buf, size_t buf_len) +{ + char filename[PATH_MAX]= {0}; + int len= snprintf(filename, PATH_MAX, "%s/GRA_%ld_%lld.log", + wsrep_data_home_dir, thd->thread_id, + (long long)thd->wsrep_trx_seqno); + if (len >= PATH_MAX) + { + WSREP_ERROR("RBR dump path too long: %d, skipping dump.", len); + return; + } + + FILE *of= fopen(filename, "wb"); + if (of) + { + fwrite (rbr_buf, buf_len, 1, of); + fclose(of); + } + else + { + WSREP_ERROR("Failed to open file '%s': %d (%s)", + filename, errno, strerror(errno)); + } +} + +static inline wsrep_status_t wsrep_apply_rbr( + THD *thd, const uchar *rbr_buf, size_t buf_len) +{ + char *buf= (char *)rbr_buf; + int rcode= 0; + int event= 1; + + DBUG_ENTER("wsrep_apply_rbr"); + + if (thd->killed == KILL_CONNECTION) + { + WSREP_INFO("applier has been aborted, skipping apply_rbr: %lld", + (long long) thd->wsrep_trx_seqno); + DBUG_RETURN(WSREP_FATAL); + } + + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + thd->wsrep_query_state= QUERY_EXEC; + if (thd->wsrep_conflict_state!= REPLAYING) + thd->wsrep_conflict_state= NO_CONFLICT; + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + + if (!buf_len) WSREP_DEBUG("empty rbr buffer to apply: %lld", + (long long) thd->wsrep_trx_seqno); + + if ((rcode= trans_begin(thd))) + WSREP_WARN("begin for rbr apply failed: %lld, code: %d", + (long long) thd->wsrep_trx_seqno, rcode); + + while(buf_len) + { + int exec_res; + int error = 0; + Log_event* ev= wsrep_read_log_event(&buf, &buf_len, wsrep_format_desc); + + switch (ev->get_type_code()) { + case WRITE_ROWS_EVENT: + case UPDATE_ROWS_EVENT: + case DELETE_ROWS_EVENT: + DBUG_ASSERT(buf_len != 0 || + ((Rows_log_event *) ev)->get_flags(Rows_log_event::STMT_END_F)); + break; + default: + break; + } + + thd->server_id = ev->server_id; // use the original server id for logging + thd->set_time(); // time the query + wsrep_xid_init(&thd->transaction.xid_state.xid, + wsrep_cluster_uuid(), + thd->wsrep_trx_seqno); + thd->lex->current_select= 0; + if (!ev->when) + ev->when = time(NULL); + ev->thd = thd; + exec_res = ev->apply_event(thd->wsrep_rli); + DBUG_PRINT("info", ("exec_event result: %d", exec_res)); + + if (exec_res) + { + WSREP_WARN("RBR event %d %s apply warning: %d, %lld", + event, ev->get_type_str(), exec_res, (long long) thd->wsrep_trx_seqno); + rcode= exec_res; + /* stop processing for the first error */ + delete ev; + goto error; + } + event++; + + if (thd->wsrep_conflict_state!= NO_CONFLICT && + thd->wsrep_conflict_state!= REPLAYING) + WSREP_WARN("conflict state after RBR event applying: %d, %lld", + thd->wsrep_query_state, (long long)thd->wsrep_trx_seqno); + + if (thd->wsrep_conflict_state == MUST_ABORT) { + WSREP_WARN("RBR event apply failed, rolling back: %lld", + (long long) thd->wsrep_trx_seqno); + trans_rollback(thd); + thd->locked_tables_list.unlock_locked_tables(thd); + /* Release transactional metadata locks. */ + thd->mdl_context.release_transactional_locks(); + thd->wsrep_conflict_state= NO_CONFLICT; + DBUG_RETURN(WSREP_FATAL); + } + + if (ev->get_type_code() != TABLE_MAP_EVENT && + ((Rows_log_event *) ev)->get_flags(Rows_log_event::STMT_END_F)) + { + // TODO: combine with commit on higher level common for the query ws + + thd->wsrep_rli->cleanup_context(thd, 0); + + if (error == 0) + { + thd->clear_error(); + } + else + WSREP_ERROR("Error in %s event: commit of row events failed: %lld", + ev->get_type_str(), (long long)thd->wsrep_trx_seqno); + } + delete ev; + } + + error: + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + thd->wsrep_query_state= QUERY_IDLE; + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + + assert(thd->wsrep_exec_mode== REPL_RECV); + + if (thd->killed == KILL_CONNECTION) + WSREP_INFO("applier aborted: %lld", (long long)thd->wsrep_trx_seqno); + + if (rcode) DBUG_RETURN(WSREP_FATAL); + DBUG_RETURN(WSREP_OK); +} + +wsrep_status_t wsrep_apply_cb(void* const ctx, + const void* const buf, size_t const buf_len, + wsrep_seqno_t const global_seqno) +{ + THD* const thd((THD*)ctx); + + thd->wsrep_trx_seqno= global_seqno; + +#ifdef WSREP_PROC_INFO + snprintf(thd->wsrep_info, sizeof(thd->wsrep_info) - 1, + "applying write set %lld: %p, %zu", + (long long)thd->wsrep_trx_seqno, buf, buf_len); + thd_proc_info(thd, thd->wsrep_info); +#else + thd_proc_info(thd, "applying write set"); +#endif /* WSREP_PROC_INFO */ + + wsrep_status_t const rcode(wsrep_apply_rbr(thd, (const uchar*)buf, buf_len)); + +#ifdef WSREP_PROC_INFO + snprintf(thd->wsrep_info, sizeof(thd->wsrep_info) - 1, + "applied write set %lld", (long long)thd->wsrep_trx_seqno); + thd_proc_info(thd, thd->wsrep_info); +#else + thd_proc_info(thd, "applied write set"); +#endif /* WSREP_PROC_INFO */ + + if (WSREP_OK != rcode) wsrep_write_rbr_buf(thd, buf, buf_len); + + return rcode; +} + +#if DELETE // this does not work in 5.5 +/* a common wrapper for end_trans() function - to put all necessary stuff */ +static inline wsrep_status_t +wsrep_end_trans (THD* const thd, enum enum_mysql_completiontype const end) +{ + if (0 == end_trans(thd, end)) + { + return WSREP_OK; + } + else + { + return WSREP_FATAL; + } +} +#endif + +wsrep_status_t wsrep_commit(THD* const thd, wsrep_seqno_t const global_seqno) +{ +#ifdef WSREP_PROC_INFO + snprintf(thd->wsrep_info, sizeof(thd->wsrep_info) - 1, + "committing %lld", (long long)thd->wsrep_trx_seqno); + thd_proc_info(thd, thd->wsrep_info); +#else + thd_proc_info(thd, "committing"); +#endif /* WSREP_PROC_INFO */ + + wsrep_status_t const rcode(wsrep_apply_sql(thd, "COMMIT", 6, 0, 0)); +// wsrep_status_t const rcode(wsrep_end_trans (thd, COMMIT)); + +#ifdef WSREP_PROC_INFO + snprintf(thd->wsrep_info, sizeof(thd->wsrep_info) - 1, + "committed %lld", (long long)thd->wsrep_trx_seqno); + thd_proc_info(thd, thd->wsrep_info); +#else + thd_proc_info(thd, "committed"); +#endif /* WSREP_PROC_INFO */ + + if (WSREP_OK == rcode) + { + // TODO: mark snapshot with global_seqno. + } + + return rcode; +} + +wsrep_status_t wsrep_rollback(THD* const thd, wsrep_seqno_t const global_seqno) +{ +#ifdef WSREP_PROC_INFO + snprintf(thd->wsrep_info, sizeof(thd->wsrep_info) - 1, + "rolling back %lld", (long long)thd->wsrep_trx_seqno); + thd_proc_info(thd, thd->wsrep_info); +#else + thd_proc_info(thd, "rolling back"); +#endif /* WSREP_PROC_INFO */ + + wsrep_status_t const rcode(wsrep_apply_sql(thd, "ROLLBACK", 8, 0, 0)); +// wsrep_status_t const rcode(wsrep_end_trans (thd, ROLLBACK)); + +#ifdef WSREP_PROC_INFO + snprintf(thd->wsrep_info, sizeof(thd->wsrep_info) - 1, + "rolled back %lld", (long long)thd->wsrep_trx_seqno); + thd_proc_info(thd, thd->wsrep_info); +#else + thd_proc_info(thd, "rolled back"); +#endif /* WSREP_PROC_INFO */ + + return rcode; +} + +wsrep_status_t wsrep_commit_cb(void* const ctx, + wsrep_seqno_t const global_seqno, + bool const commit) +{ + THD* const thd((THD*)ctx); + + assert(global_seqno == thd->wsrep_trx_seqno); + + if (commit) + return wsrep_commit(thd, global_seqno); + else + return wsrep_rollback(thd, global_seqno); +} + +Relay_log_info* wsrep_relay_log_init(const char* log_fname) +{ + Relay_log_info* rli= new Relay_log_info(false); + + rli->no_storage= true; + if (!rli->relay_log.description_event_for_exec) + { + rli->relay_log.description_event_for_exec= + new Format_description_log_event(4); + } + + rli->sql_thd= current_thd; + return rli; +} + +void wsrep_prepare_bf_thd(THD *thd, struct wsrep_thd_shadow* shadow) +{ + shadow->options = thd->variables.option_bits; + shadow->wsrep_exec_mode = thd->wsrep_exec_mode; + shadow->vio = thd->net.vio; + + if (opt_log_slave_updates) + thd->variables.option_bits|= OPTION_BIN_LOG; + else + thd->variables.option_bits&= ~(OPTION_BIN_LOG); + + if (!thd->wsrep_rli) thd->wsrep_rli= wsrep_relay_log_init("wsrep_relay"); + + thd->wsrep_exec_mode= REPL_RECV; + thd->net.vio= 0; + thd->clear_error(); + + thd->variables.option_bits|= OPTION_NOT_AUTOCOMMIT; + + shadow->tx_isolation = thd->variables.tx_isolation; + thd->variables.tx_isolation = ISO_READ_COMMITTED; + thd->tx_isolation = ISO_READ_COMMITTED; +} + +void wsrep_return_from_bf_mode(THD *thd, struct wsrep_thd_shadow* shadow) +{ + thd->variables.option_bits = shadow->options; + thd->wsrep_exec_mode = shadow->wsrep_exec_mode; + thd->net.vio = shadow->vio; + thd->variables.tx_isolation = shadow->tx_isolation; +} + +void wsrep_replication_process(THD *thd) +{ + int rcode; + DBUG_ENTER("wsrep_replication_process"); + + struct wsrep_thd_shadow shadow; + wsrep_prepare_bf_thd(thd, &shadow); + + wsrep_format_desc= new Format_description_log_event(4); + + rcode = wsrep->recv(wsrep, (void *)thd); + DBUG_PRINT("wsrep",("wsrep_repl returned: %d", rcode)); + + WSREP_INFO("applier thread exiting (code:%d)", rcode); + + switch (rcode) { + case WSREP_OK: + case WSREP_NOT_IMPLEMENTED: + case WSREP_CONN_FAIL: + /* provider does not support slave operations / disconnected from group, + * just close applier thread */ + break; + case WSREP_NODE_FAIL: + /* data inconsistency => SST is needed */ + /* Note: we cannot just blindly restart replication here, + * SST might require server restart if storage engines must be + * initialized after SST */ + WSREP_ERROR("node consistency compromised, aborting"); + wsrep_kill_mysql(thd); + break; + case WSREP_WARNING: + case WSREP_TRX_FAIL: + case WSREP_TRX_MISSING: + /* these suggests a bug in provider code */ + WSREP_WARN("bad return from recv() call: %d", rcode); + /* fall through to node shutdown */ + case WSREP_FATAL: + /* Cluster connectivity is lost. + * + * If applier was killed on purpose (KILL_CONNECTION), we + * avoid mysql shutdown. This is because the killer will then handle + * shutdown processing (or replication restarting) + */ + if (thd->killed != KILL_CONNECTION) + { + wsrep_kill_mysql(thd); + } + break; + } + + if (thd->killed != KILL_CONNECTION) + { + mysql_mutex_lock(&LOCK_thread_count); + wsrep_close_applier(thd); + mysql_cond_broadcast(&COND_thread_count); + mysql_mutex_unlock(&LOCK_thread_count); + } + + if (thd->killed != KILL_CONNECTION) + { + mysql_mutex_lock(&LOCK_thread_count); + wsrep_close_applier(thd); + mysql_cond_broadcast(&COND_thread_count); + mysql_mutex_unlock(&LOCK_thread_count); + } + wsrep_return_from_bf_mode(thd, &shadow); + DBUG_VOID_RETURN; +} + +void wsrep_rollback_process(THD *thd) +{ + DBUG_ENTER("wsrep_rollback_process"); + + mysql_mutex_lock(&LOCK_wsrep_rollback); + wsrep_aborting_thd= NULL; + + while (thd->killed == NOT_KILLED) { + thd_proc_info(thd, "wsrep aborter idle"); + thd->mysys_var->current_mutex= &LOCK_wsrep_rollback; + thd->mysys_var->current_cond= &COND_wsrep_rollback; + + mysql_cond_wait(&COND_wsrep_rollback,&LOCK_wsrep_rollback); + + WSREP_DEBUG("WSREP rollback thread wakes for signal"); + + mysql_mutex_lock(&thd->mysys_var->mutex); + thd_proc_info(thd, "wsrep aborter active"); + thd->mysys_var->current_mutex= 0; + thd->mysys_var->current_cond= 0; + mysql_mutex_unlock(&thd->mysys_var->mutex); + + /* check for false alarms */ + if (!wsrep_aborting_thd) + { + WSREP_DEBUG("WSREP rollback thread has empty abort queue"); + } + /* process all entries in the queue */ + while (wsrep_aborting_thd) { + THD *aborting; + wsrep_aborting_thd_t next = wsrep_aborting_thd->next; + aborting = wsrep_aborting_thd->aborting_thd; + my_free(wsrep_aborting_thd); + wsrep_aborting_thd= next; + /* + * must release mutex, appliers my want to add more + * aborting thds in our work queue, while we rollback + */ + mysql_mutex_unlock(&LOCK_wsrep_rollback); + + mysql_mutex_lock(&aborting->LOCK_wsrep_thd); + if (aborting->wsrep_conflict_state== ABORTED) + { + WSREP_DEBUG("WSREP, thd already aborted: %llu state: %d", + (long long)aborting->real_id, + aborting->wsrep_conflict_state); + + mysql_mutex_unlock(&aborting->LOCK_wsrep_thd); + mysql_mutex_lock(&LOCK_wsrep_rollback); + continue; + } + aborting->wsrep_conflict_state= ABORTING; + + mysql_mutex_unlock(&aborting->LOCK_wsrep_thd); + + aborting->store_globals(); + + trans_rollback(aborting); + aborting->locked_tables_list.unlock_locked_tables(thd); + /* Release transactional metadata locks. */ + aborting->mdl_context.release_transactional_locks(); + + mysql_mutex_lock(&aborting->LOCK_wsrep_thd); + aborting->wsrep_conflict_state= ABORTED; + WSREP_DEBUG("WSREP rollbacker aborted thd: %llu", + (long long)aborting->real_id); + mysql_mutex_unlock(&aborting->LOCK_wsrep_thd); + mysql_mutex_lock(&LOCK_wsrep_rollback); + } + } + + mysql_mutex_unlock(&LOCK_wsrep_rollback); + sql_print_information("WSREP: rollbacker thread exiting"); + + DBUG_PRINT("wsrep",("wsrep rollbacker thread exiting")); + DBUG_VOID_RETURN; +} +extern "C" +int wsrep_thd_is_brute_force(void *thd_ptr) +{ + if (thd_ptr) { + switch (((THD *)thd_ptr)->wsrep_exec_mode) { + case LOCAL_STATE: + { + if (((THD *)thd_ptr)->wsrep_conflict_state== REPLAYING) + { + return 1; + } + return 0; + } + case REPL_RECV: return 1; + case TOTAL_ORDER: return 2; + case LOCAL_COMMIT: return 3; + } + } + return 0; +} +extern "C" +int wsrep_abort_thd(void *bf_thd_ptr, void *victim_thd_ptr, my_bool signal) +{ + THD *victim_thd = (THD *) victim_thd_ptr; + THD *bf_thd = (THD *) bf_thd_ptr; + DBUG_ENTER("wsrep_abort_thd"); + + if (WSREP(bf_thd) && victim_thd) + { + WSREP_DEBUG("wsrep_abort_thd, by: %llu, victim: %llu", (bf_thd) ? + (long long)bf_thd->real_id : 0, (long long)victim_thd->real_id); + ha_wsrep_abort_transaction(bf_thd, victim_thd, signal); + } + + DBUG_RETURN(1); +} +extern "C" +int wsrep_thd_in_locking_session(void *thd_ptr) +{ + if (thd_ptr && ((THD *)thd_ptr)->in_lock_tables) { + return 1; + } + return 0; +} +#endif /** Retuns information about user or current user. diff --git a/sql/sql_plugin.cc b/sql/sql_plugin.cc index 39f69b2656d..6a1a9221064 100644 --- a/sql/sql_plugin.cc +++ b/sql/sql_plugin.cc @@ -2954,11 +2954,17 @@ void plugin_thdvar_init(THD *thd) thd->variables.dynamic_variables_size= 0; thd->variables.dynamic_variables_ptr= 0; +#ifdef WITH_WSREP + if (!WSREP(thd) || !thd->wsrep_applier) { +#endif mysql_mutex_lock(&LOCK_plugin); thd->variables.table_plugin= my_intern_plugin_lock(NULL, global_system_variables.table_plugin); intern_plugin_unlock(NULL, old_table_plugin); mysql_mutex_unlock(&LOCK_plugin); +#ifdef WITH_WSREP + } +#endif DBUG_VOID_RETURN; } diff --git a/sql/sql_reload.cc b/sql/sql_reload.cc index d2f118b62c9..cb33110d9eb 100644 --- a/sql/sql_reload.cc +++ b/sql/sql_reload.cc @@ -224,7 +224,18 @@ bool reload_acl_and_cache(THD *thd, unsigned long options, } if (options & REFRESH_CHECKPOINT) disable_checkpoints(thd); - } +#ifdef WITH_WSREP + /* + We need to do it second time after wsrep appliers were blocked in + make_global_read_lock_block_commit(thd) above since they could have + modified the tables too. + */ + if (WSREP(thd) && + close_cached_tables(thd, tables, (options & REFRESH_FAST) ? + FALSE : TRUE, TRUE)) + result= 1; +#endif /* WITH_WSREP */ + } else { if (thd && thd->locked_tables_mode) diff --git a/sql/sql_repl.cc b/sql/sql_repl.cc index eb17ef0812c..ecb47324552 100644 --- a/sql/sql_repl.cc +++ b/sql/sql_repl.cc @@ -1399,7 +1399,14 @@ int stop_slave(THD* thd, Master_info* mi, bool net_report ) ER(ER_SLAVE_WAS_NOT_RUNNING)); } unlock_slave_threads(mi); +#ifdef WITH_WSREP + if (WSREP(thd)) + thd_proc_info(thd, "exit stop_slave()"); + else + thd_proc_info(thd, 0); +#else /* WITH_WSREP */ thd_proc_info(thd, 0); +#endif /* WITH_WSREP */ if (slave_errno) { @@ -1832,7 +1839,14 @@ bool change_master(THD* thd, Master_info* mi) err: unlock_slave_threads(mi); +#ifdef WITH_WSREP + if (WSREP(thd)) + thd_proc_info(thd, "exit change_master()"); + else + thd_proc_info(thd, 0); +#else /* WITH_WSREP */ thd_proc_info(thd, 0); +#endif /* WITH_WSREP */ if (ret == FALSE) my_ok(thd); DBUG_RETURN(ret); diff --git a/sql/sql_show.cc b/sql/sql_show.cc index d64c7a6df52..da48c7cdc63 100644 --- a/sql/sql_show.cc +++ b/sql/sql_show.cc @@ -59,6 +59,9 @@ #include "datadict.h" // dd_frm_type() #include "keycaches.h" +#if !defined(MYSQL_MAX_VARIABLE_VALUE_LEN) +#define MYSQL_MAX_VARIABLE_VALUE_LEN 1024 +#endif // !defined(MYSQL_MAX_VARIABLE_VALUE_LEN) #define STR_OR_NIL(S) ((S) ? (S) : "<nil>") #ifdef WITH_PARTITION_STORAGE_ENGINE @@ -8088,7 +8091,8 @@ ST_FIELD_INFO variables_fields_info[]= { {"VARIABLE_NAME", 64, MYSQL_TYPE_STRING, 0, 0, "Variable_name", SKIP_OPEN_TABLE}, - {"VARIABLE_VALUE", 1024, MYSQL_TYPE_STRING, 0, 1, "Value", SKIP_OPEN_TABLE}, + {"VARIABLE_VALUE", MYSQL_MAX_VARIABLE_VALUE_LEN, MYSQL_TYPE_STRING, 0, 1, + "Value", SKIP_OPEN_TABLE}, {0, 0, MYSQL_TYPE_STRING, 0, 0, 0, SKIP_OPEN_TABLE} }; diff --git a/sql/sql_table.cc b/sql/sql_table.cc index 01832036701..1fdd81de089 100644 --- a/sql/sql_table.cc +++ b/sql/sql_table.cc @@ -6158,12 +6158,18 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name, error= 0; break; } - if (error == HA_ERR_WRONG_COMMAND) +#ifdef WITH_WSREP + bool do_log_write(true); +#endif /* WITH_WSREP */ + if (error == HA_ERR_WRONG_COMMAND) { error= 0; push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_NOTE, ER_ILLEGAL_HA, ER(ER_ILLEGAL_HA), table->alias.c_ptr()); +#ifdef WITH_WSREP + WSREP_DEBUG("ignoring DDL failure: %d %s", error, thd->query()); +#endif /* WITH_WSREP */ } if (!error && (new_name != table_name || new_db != db)) @@ -6215,6 +6221,9 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name, push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_NOTE, ER_ILLEGAL_HA, ER(ER_ILLEGAL_HA), table->alias.c_ptr()); +#ifdef WITH_WSREP + WSREP_DEBUG("ignoring DDL failure: %d %s", error, thd->query()); +#endif /* WITH_WSREP */ } if (!error) diff --git a/sql/sql_trigger.cc b/sql/sql_trigger.cc index 1ac1d7bbb5e..149c2431324 100644 --- a/sql/sql_trigger.cc +++ b/sql/sql_trigger.cc @@ -2452,3 +2452,55 @@ bool load_table_name_for_trigger(THD *thd, DBUG_RETURN(FALSE); } +#ifdef WITH_WSREP +int wsrep_create_trigger_query(THD *thd, uchar** buf, uint* buf_len) +{ + LEX *lex= thd->lex; + String stmt_query; + + LEX_STRING definer_user; + LEX_STRING definer_host; + + if (!lex->definer) + { + if (!thd->slave_thread) + { + if (!(lex->definer= create_default_definer(thd))) + return 1; + } + } + + if (lex->definer) + { + /* SUID trigger. */ + + definer_user= lex->definer->user; + definer_host= lex->definer->host; + } + else + { + /* non-SUID trigger. */ + + definer_user.str= 0; + definer_user.length= 0; + + definer_host.str= 0; + definer_host.length= 0; + } + + stmt_query.append(STRING_WITH_LEN("CREATE ")); + + append_definer(thd, &stmt_query, &definer_user, &definer_host); + + LEX_STRING stmt_definition; + stmt_definition.str= (char*) thd->lex->stmt_definition_begin; + stmt_definition.length= thd->lex->stmt_definition_end + - thd->lex->stmt_definition_begin; + trim_whitespace(thd->charset(), & stmt_definition); + + stmt_query.append(stmt_definition.str, stmt_definition.length); + + return wsrep_to_buf_helper(thd, stmt_query.c_ptr(), stmt_query.length(), + buf, buf_len); +} +#endif /* WITH_WSREP */ diff --git a/sql/sql_truncate.cc b/sql/sql_truncate.cc index 67ed608f114..a66b2377fcb 100644 --- a/sql/sql_truncate.cc +++ b/sql/sql_truncate.cc @@ -24,6 +24,9 @@ #include "sql_acl.h" // DROP_ACL #include "sql_parse.h" // check_one_table_access() #include "sql_truncate.h" +#ifdef WITH_WSREP +#include "wsrep_mysqld.h" +#endif /* WITH_WSREP */ /** @@ -515,9 +518,14 @@ bool Truncate_statement::execute(THD *thd) if (check_one_table_access(thd, DROP_ACL, first_table)) DBUG_RETURN(res); +#ifdef WITH_WSREP + if (WSREP(thd) && wsrep_to_isolation_begin(thd, + first_table->db, + first_table->table_name)) + DBUG_RETURN(TRUE); +#endif /* WITH_WSREP */ if (! (res= truncate_table(thd, first_table))) my_ok(thd); - DBUG_RETURN(res); } diff --git a/sql/sql_update.cc b/sql/sql_update.cc index 5fa30c91417..ddb95f09ce2 100644 --- a/sql/sql_update.cc +++ b/sql/sql_update.cc @@ -902,7 +902,11 @@ int mysql_update(THD *thd, */ if ((error < 0) || thd->transaction.stmt.modified_non_trans_table) { +#ifdef WITH_WSREP + if (WSREP_EMULATE_BINLOG(thd) || mysql_bin_log.is_open()) +#else if (mysql_bin_log.is_open()) +#endif { int errcode= 0; if (error < 0) @@ -2045,7 +2049,11 @@ void multi_update::abort_result_set() The query has to binlog because there's a modified non-transactional table either from the query's list or via a stored routine: bug#13270,23333 */ +#ifdef WITH_WSREP + if (WSREP_EMULATE_BINLOG(thd) || mysql_bin_log.is_open()) +#else if (mysql_bin_log.is_open()) +#endif { /* THD::killed status might not have been set ON at time of an error @@ -2304,7 +2312,11 @@ bool multi_update::send_eof() if (local_error == 0 || thd->transaction.stmt.modified_non_trans_table) { +#ifdef WITH_WSREP + if (WSREP_EMULATE_BINLOG(thd) || mysql_bin_log.is_open()) +#else if (mysql_bin_log.is_open()) +#endif { int errcode= 0; if (local_error == 0) diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc index 72e9525db72..dd11f51b212 100644 --- a/sql/sys_vars.cc +++ b/sql/sys_vars.cc @@ -3437,6 +3437,206 @@ static Sys_var_tz Sys_time_zone( "time_zone", "time_zone", SESSION_VAR(time_zone), NO_CMD_LINE, DEFAULT(&default_tz), NO_MUTEX_GUARD, IN_BINLOG); +#ifdef WITH_WSREP +#include "wsrep_mysqld.h" + +static Sys_var_charptr Sys_wsrep_provider( + "wsrep_provider", "Path to replication provider library", + GLOBAL_VAR(wsrep_provider), CMD_LINE(REQUIRED_ARG, OPT_WSREP_PROVIDER), + IN_FS_CHARSET, DEFAULT(wsrep_provider), + // IN_FS_CHARSET, DEFAULT(wsrep_provider_default), + NO_MUTEX_GUARD, NOT_IN_BINLOG, + ON_CHECK(wsrep_provider_check), ON_UPDATE(wsrep_provider_update)); + +static Sys_var_charptr Sys_wsrep_provider_options( + "wsrep_provider_options", "provider specific options", + GLOBAL_VAR(wsrep_provider_options), + CMD_LINE(REQUIRED_ARG, OPT_WSREP_PROVIDER_OPTIONS), + IN_FS_CHARSET, DEFAULT(wsrep_provider_options), + NO_MUTEX_GUARD, NOT_IN_BINLOG, + ON_CHECK(wsrep_provider_options_check), + ON_UPDATE(wsrep_provider_options_update)); + +static Sys_var_charptr Sys_wsrep_data_home_dir( + "wsrep_data_home_dir", "home directory for wsrep provider", + READ_ONLY GLOBAL_VAR(wsrep_data_home_dir), CMD_LINE(REQUIRED_ARG), + IN_FS_CHARSET, DEFAULT(""), + NO_MUTEX_GUARD, NOT_IN_BINLOG); + +static Sys_var_charptr Sys_wsrep_cluster_name( + "wsrep_cluster_name", "Name for the cluster", + GLOBAL_VAR(wsrep_cluster_name), CMD_LINE(REQUIRED_ARG), + IN_FS_CHARSET, DEFAULT(wsrep_cluster_name), + NO_MUTEX_GUARD, NOT_IN_BINLOG, + ON_CHECK(wsrep_cluster_name_check), + ON_UPDATE(wsrep_cluster_name_update)); + +static Sys_var_charptr Sys_wsrep_cluster_address ( + "wsrep_cluster_address", "Address to initially connect to cluster", + GLOBAL_VAR(wsrep_cluster_address), + CMD_LINE(REQUIRED_ARG, OPT_WSREP_CLUSTER_ADDRESS), + IN_FS_CHARSET, DEFAULT(wsrep_cluster_address), + NO_MUTEX_GUARD, NOT_IN_BINLOG, + ON_CHECK(wsrep_cluster_address_check), + ON_UPDATE(wsrep_cluster_address_update)); + +static Sys_var_charptr Sys_wsrep_node_name ( + "wsrep_node_name", "Node name", + GLOBAL_VAR(wsrep_node_name), CMD_LINE(REQUIRED_ARG), + IN_FS_CHARSET, DEFAULT(glob_hostname), + NO_MUTEX_GUARD, NOT_IN_BINLOG); + +static Sys_var_charptr Sys_wsrep_node_address ( + "wsrep_node_address", "Node address", + GLOBAL_VAR(wsrep_node_address), CMD_LINE(REQUIRED_ARG), + IN_FS_CHARSET, DEFAULT(wsrep_node_address), + NO_MUTEX_GUARD, NOT_IN_BINLOG, + ON_CHECK(wsrep_node_address_check), + ON_UPDATE(wsrep_node_address_update)); + +static Sys_var_charptr Sys_wsrep_node_incoming_address( + "wsrep_node_incoming_address", "Client connection address", + GLOBAL_VAR(wsrep_node_incoming_address),CMD_LINE(REQUIRED_ARG), + IN_FS_CHARSET, DEFAULT(wsrep_node_incoming_address), + NO_MUTEX_GUARD, NOT_IN_BINLOG, + ON_CHECK(wsrep_node_name_check), + ON_UPDATE(wsrep_node_name_update)); + +static Sys_var_ulong Sys_wsrep_slave_threads( + "wsrep_slave_threads", "Number of slave appliers to launch", + GLOBAL_VAR(wsrep_slave_threads), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(1, 512), DEFAULT(1), BLOCK_SIZE(1)); + +static Sys_var_charptr Sys_wsrep_dbug_option( + "wsrep_dbug_option", "DBUG options to provider library", + GLOBAL_VAR(wsrep_dbug_option),CMD_LINE(REQUIRED_ARG), + IN_FS_CHARSET, DEFAULT(""), + NO_MUTEX_GUARD, NOT_IN_BINLOG); + +static Sys_var_mybool Sys_wsrep_debug( + "wsrep_debug", "To enable debug level logging", + GLOBAL_VAR(wsrep_debug), CMD_LINE(OPT_ARG), DEFAULT(FALSE)); + +static Sys_var_mybool Sys_wsrep_convert_LOCK_to_trx( + "wsrep_convert_LOCK_to_trx", "To convert locking sessions " + "into transactions", + GLOBAL_VAR(wsrep_convert_LOCK_to_trx), + CMD_LINE(OPT_ARG), DEFAULT(FALSE)); + +static Sys_var_ulong Sys_wsrep_retry_autocommit( + "wsrep_retry_autocommit", "Max number of times to retry " + "a failed autocommit statement", + SESSION_VAR(wsrep_retry_autocommit), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0, 10000), DEFAULT(1), BLOCK_SIZE(1)); + +static Sys_var_mybool Sys_wsrep_auto_increment_control( + "wsrep_auto_increment_control", "To automatically control the " + "assignment of autoincrement variables", + GLOBAL_VAR(wsrep_auto_increment_control), + CMD_LINE(OPT_ARG), DEFAULT(TRUE)); + +static Sys_var_mybool Sys_wsrep_drupal_282555_workaround( + "wsrep_drupal_282555_workaround", "To use a workaround for" + "bad autoincrement value", + GLOBAL_VAR(wsrep_drupal_282555_workaround), + CMD_LINE(OPT_ARG), DEFAULT(FALSE)); + +static Sys_var_charptr sys_wsrep_sst_method( + "wsrep_sst_method", "Snapshot transfer method", + GLOBAL_VAR(wsrep_sst_method),CMD_LINE(REQUIRED_ARG), + IN_FS_CHARSET, DEFAULT(wsrep_sst_method), NO_MUTEX_GUARD, NOT_IN_BINLOG, + ON_CHECK(wsrep_sst_method_check), + ON_UPDATE(wsrep_sst_method_update)); + +static Sys_var_charptr Sys_wsrep_sst_receive_address( + "wsrep_sst_receive_address", "Address where node is waiting for " + "SST contact", + GLOBAL_VAR(wsrep_sst_receive_address),CMD_LINE(REQUIRED_ARG), + IN_FS_CHARSET, DEFAULT(wsrep_sst_receive_address), NO_MUTEX_GUARD, + NOT_IN_BINLOG, + ON_CHECK(wsrep_sst_receive_address_check), + ON_UPDATE(wsrep_sst_receive_address_update)); + +static Sys_var_charptr Sys_wsrep_sst_auth( + "wsrep_sst_auth", "Authentication for SST connection", + GLOBAL_VAR(wsrep_sst_auth), CMD_LINE(REQUIRED_ARG, OPT_WSREP_SST_AUTH), + IN_FS_CHARSET, DEFAULT(wsrep_sst_auth), NO_MUTEX_GUARD, + NOT_IN_BINLOG, + ON_CHECK(wsrep_sst_auth_check), + ON_UPDATE(wsrep_sst_auth_update)); + +static Sys_var_charptr Sys_wsrep_sst_donor( + "wsrep_sst_donor", "preferred donor node for the SST", + GLOBAL_VAR(wsrep_sst_donor),CMD_LINE(REQUIRED_ARG), + IN_FS_CHARSET, DEFAULT(""), NO_MUTEX_GUARD, NOT_IN_BINLOG, + ON_CHECK(wsrep_sst_donor_check), + ON_UPDATE(wsrep_sst_donor_update)); + +static Sys_var_mybool Sys_wsrep_on ( + "wsrep_on", "To enable wsrep replication ", + SESSION_VAR(wsrep_on), + CMD_LINE(OPT_ARG), DEFAULT(TRUE), + NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0), + ON_UPDATE(wsrep_on_update)); + +static Sys_var_charptr Sys_wsrep_start_position ( + "wsrep_start_position", "global transaction position to start from ", + GLOBAL_VAR(wsrep_start_position), + CMD_LINE(REQUIRED_ARG, OPT_WSREP_START_POSITION), + IN_FS_CHARSET, DEFAULT(wsrep_start_position), + NO_MUTEX_GUARD, NOT_IN_BINLOG, + ON_CHECK(wsrep_start_position_check), + ON_UPDATE(wsrep_start_position_update)); + +static Sys_var_ulonglong Sys_wsrep_max_ws_size ( + "wsrep_max_ws_size", "Max write set size (bytes)", + GLOBAL_VAR(wsrep_max_ws_size), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(1024, 4294967296ULL), DEFAULT(1073741824ULL), BLOCK_SIZE(1)); + +static Sys_var_ulong Sys_wsrep_max_ws_rows ( + "wsrep_max_ws_rows", "Max number of rows in write set", + GLOBAL_VAR(wsrep_max_ws_rows), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(1, 1048576), DEFAULT(131072), BLOCK_SIZE(1)); + +static Sys_var_charptr Sys_wsrep_notify_cmd( + "wsrep_notify_cmd", "", + GLOBAL_VAR(wsrep_notify_cmd),CMD_LINE(REQUIRED_ARG), + IN_FS_CHARSET, DEFAULT(""), NO_MUTEX_GUARD, NOT_IN_BINLOG); + +static Sys_var_mybool Sys_wsrep_certify_nonPK( + "wsrep_certify_nonPK", "Certify tables with no primary key", + GLOBAL_VAR(wsrep_certify_nonPK), + CMD_LINE(OPT_ARG), DEFAULT(TRUE)); + +static Sys_var_mybool Sys_wsrep_causal_reads( + "wsrep_causal_reads", "Enable \"strictly synchronous\" semantics for read operations", + SESSION_VAR(wsrep_causal_reads), + CMD_LINE(OPT_ARG), DEFAULT(FALSE)); + // ON_UPDATE(wsrep_causal_reads_update)); + +static const char *wsrep_OSU_method_names[]= { "TOI", "RSU", NullS }; +static Sys_var_enum Sys_wsrep_OSU_method( + "wsrep_OSU_method", "Method for Online Schema Upgrade", + GLOBAL_VAR(wsrep_OSU_method_options), CMD_LINE(OPT_ARG), + wsrep_OSU_method_names, DEFAULT(WSREP_OSU_TOI), + NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0), + ON_UPDATE(0)); + +static Sys_var_enum Sys_wsrep_forced_binlog_format( + "wsrep_forced_binlog_format", "binlog format to take effect over user's choice", + GLOBAL_VAR(wsrep_forced_binlog_format), + CMD_LINE(REQUIRED_ARG, OPT_BINLOG_FORMAT), + wsrep_binlog_format_names, DEFAULT(BINLOG_FORMAT_UNSPEC), + NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0), + ON_UPDATE(0)); + +static Sys_var_mybool Sys_wsrep_recover_datadir( + "wsrep_recover", "Recover database state after crash and exit", + READ_ONLY GLOBAL_VAR(wsrep_recovery), + CMD_LINE(OPT_ARG, OPT_WSREP_RECOVER), DEFAULT(FALSE)); + + +#endif /* WITH_WSREP */ static Sys_var_ulong Sys_sp_cache_size( "stored_program_cache", diff --git a/sql/transaction.cc b/sql/transaction.cc index 94a32200274..2b23b5e19f1 100644 --- a/sql/transaction.cc +++ b/sql/transaction.cc @@ -96,6 +96,9 @@ static bool xa_trans_force_rollback(THD *thd) by ha_rollback()/THD::transaction::cleanup(). */ thd->transaction.xid_state.rm_error= 0; +#ifdef WITH_WSREP + wsrep_register_hton(thd, TRUE); +#endif /* WITH_WSREP */ if (ha_rollback_trans(thd, true)) { my_error(ER_XAER_RMERR, MYF(0)); @@ -134,6 +137,9 @@ bool trans_begin(THD *thd, uint flags) (thd->variables.option_bits & OPTION_TABLE_LOCK)) { thd->variables.option_bits&= ~OPTION_TABLE_LOCK; +#ifdef WITH_WSREP + wsrep_register_hton(thd, TRUE); +#endif /* WITH_WSREP */ thd->server_status&= ~SERVER_STATUS_IN_TRANS; res= test(ha_commit_trans(thd, TRUE)); } @@ -150,6 +156,12 @@ bool trans_begin(THD *thd, uint flags) */ thd->mdl_context.release_transactional_locks(); +#ifdef WITH_WSREP + thd->wsrep_PA_safe= true; + if (thd->wsrep_client_thread && wsrep_causal_wait(thd)) + DBUG_RETURN(TRUE); +#endif /* WITH_WSREP */ + thd->variables.option_bits|= OPTION_BEGIN; thd->server_status|= SERVER_STATUS_IN_TRANS; @@ -177,6 +189,9 @@ bool trans_commit(THD *thd) if (trans_check(thd)) DBUG_RETURN(TRUE); +#ifdef WITH_WSREP + wsrep_register_hton(thd, TRUE); +#endif /* WITH_WSREP */ thd->server_status&= ~SERVER_STATUS_IN_TRANS; res= ha_commit_trans(thd, TRUE); if (res) @@ -220,6 +235,9 @@ bool trans_commit_implicit(THD *thd) /* Safety if one did "drop table" on locked tables */ if (!thd->locked_tables_mode) thd->variables.option_bits&= ~OPTION_TABLE_LOCK; +#ifdef WITH_WSREP + wsrep_register_hton(thd, TRUE); +#endif /* WITH_WSREP */ thd->server_status&= ~SERVER_STATUS_IN_TRANS; res= test(ha_commit_trans(thd, TRUE)); } @@ -251,11 +269,16 @@ bool trans_commit_implicit(THD *thd) bool trans_rollback(THD *thd) { int res; - DBUG_ENTER("trans_rollback"); - - if (trans_check(thd)) + DBUG_ENTER("trans_rollback"); +#ifdef WITH_WSREP + thd->wsrep_PA_safe= true; +#endif /* WITH_WSREP */ + if (trans_check(thd)) DBUG_RETURN(TRUE); +#ifdef WITH_WSREP + wsrep_register_hton(thd, TRUE); +#endif /* WITH_WSREP */ thd->server_status&= ~SERVER_STATUS_IN_TRANS; res= ha_rollback_trans(thd, TRUE); RUN_HOOK(transaction, after_rollback, (thd, FALSE)); @@ -296,6 +319,9 @@ bool trans_commit_stmt(THD *thd) if (thd->transaction.stmt.ha_list) { +#ifdef WITH_WSREP + wsrep_register_hton(thd, FALSE); +#endif /* WITH_WSREP */ res= ha_commit_trans(thd, FALSE); if (! thd->in_active_multi_stmt_transaction()) thd->tx_isolation= (enum_tx_isolation) thd->variables.tx_isolation; @@ -338,9 +364,19 @@ bool trans_rollback_stmt(THD *thd) if (thd->transaction.stmt.ha_list) { +#ifdef WITH_WSREP + wsrep_register_hton(thd, FALSE); +#endif /* WITH_WSREP */ ha_rollback_trans(thd, FALSE); if (thd->transaction_rollback_request && !thd->in_sub_stmt) +#ifdef WITH_WSREP + { + wsrep_register_hton(thd, TRUE); +#endif /* WITH_WSREP */ ha_rollback_trans(thd, TRUE); +#ifdef WITH_WSREP + } +#endif /* WITH_WSREP */ if (! thd->in_active_multi_stmt_transaction()) thd->tx_isolation= (enum_tx_isolation) thd->variables.tx_isolation; } @@ -681,6 +717,9 @@ bool trans_xa_commit(THD *thd) } else if (xa_state == XA_IDLE && thd->lex->xa_opt == XA_ONE_PHASE) { +#ifdef WITH_WSREP + wsrep_register_hton(thd, TRUE); +#endif /* WITH_WSREP */ int r= ha_commit_trans(thd, TRUE); if ((res= test(r))) my_error(r == 1 ? ER_XA_RBROLLBACK : ER_XAER_RMERR, MYF(0)); @@ -702,6 +741,9 @@ bool trans_xa_commit(THD *thd) if (thd->mdl_context.acquire_lock(&mdl_request, thd->variables.lock_wait_timeout)) { +#ifdef WITH_WSREP + wsrep_register_hton(thd, TRUE); +#endif /* WITH_WSREP */ ha_rollback_trans(thd, TRUE); my_error(ER_XAER_RMERR, MYF(0)); } diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 9fb7dd79f3d..705eeaf5f9c 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -89,6 +89,12 @@ extern "C" { #include "ha_prototypes.h" #include "ut0mem.h" #include "ibuf0ibuf.h" +#ifdef WITH_WSREP +#include "../storage/innobase/include/ut0byte.h" +#ifndef EXTRA_DEBUG + //#include "../storage/innobase/include/ut0byte.ic" +#endif /* EXTRA_DEBUG */ +#endif /* WITH_WSREP */ } #include "ha_innodb.h" @@ -98,6 +104,32 @@ extern "C" { # define MYSQL_PLUGIN_IMPORT /* nothing */ # endif /* MYSQL_PLUGIN_IMPORT */ +#ifdef WITH_WSREP +#include <wsrep_mysqld.h> +#include <my_md5.h> +extern my_bool wsrep_certify_nonPK; +class binlog_trx_data; +extern handlerton *binlog_hton; + +extern MYSQL_PLUGIN_IMPORT MYSQL_BIN_LOG mysql_bin_log; +extern MYSQL_PLUGIN_IMPORT mysql_mutex_t LOCK_wsrep_rollback; +extern MYSQL_PLUGIN_IMPORT mysql_cond_t COND_wsrep_rollback; +extern MYSQL_PLUGIN_IMPORT wsrep_aborting_thd_t wsrep_aborting_thd; + +static inline wsrep_trx_handle_t* +wsrep_trx_handle(THD* thd, const trx_t* trx) { + return wsrep_trx_handle_for_id(wsrep_thd_trx_handle(thd), + (wsrep_trx_id_t)trx->id); +} + +extern bool wsrep_prepare_key_for_innodb(const uchar *cache_key, + size_t cache_key_len, + const uchar* row_id, + size_t row_id_len, + wsrep_key_part_t* key, + size_t* key_len); + +#endif /* WITH_WSREP */ /** to protect innobase_open_files */ static mysql_mutex_t innobase_share_mutex; /** to force correct commit order in binlog */ @@ -851,6 +883,15 @@ thd_to_trx( { return(*(trx_t**) thd_ha_data(thd, innodb_hton_ptr)); } +#ifdef WITH_WSREP +ulonglong +thd_to_trx_id( +/*=======*/ + THD* thd) /*!< in: MySQL thread */ +{ + return(thd_to_trx(thd)->id); +} +#endif /********************************************************************//** Call this function when mysqld passes control to the client. That is to @@ -881,6 +922,13 @@ innobase_release_temporary_latches( return(0); } +#ifdef WITH_WSREP +static int +wsrep_abort_transaction(handlerton* hton, THD *bf_thd, THD *victim_thd, + my_bool signal); +static int innobase_wsrep_set_checkpoint(handlerton* hton, const XID* xid); +static int innobase_wsrep_get_checkpoint(handlerton* hton, XID* xid); +#endif /********************************************************************//** Increments innobase_active_counter and every INNOBASE_WAKE_INTERVALth time calls srv_active_wake_master_thread. This function should be used @@ -1327,6 +1375,9 @@ int innobase_mysql_tmpfile(void) /*========================*/ { +#ifdef WITH_INNODB_DISALLOW_WRITES + os_event_wait(srv_allow_writes_event); +#endif /* WITH_INNODB_DISALLOW_WRITES */ int fd2 = -1; File fd = mysql_tmpfile("ib"); if (fd >= 0) { @@ -2241,6 +2292,11 @@ innobase_init( innobase_hton->flags=HTON_NO_FLAGS; innobase_hton->release_temporary_latches=innobase_release_temporary_latches; innobase_hton->alter_table_flags = innobase_alter_table_flags; +#ifdef WITH_WSREP + innobase_hton->wsrep_abort_transaction=wsrep_abort_transaction; + innobase_hton->wsrep_set_checkpoint=innobase_wsrep_set_checkpoint; + innobase_hton->wsrep_get_checkpoint=innobase_wsrep_get_checkpoint; +#endif /* WITH_WSREP */ ut_a(DATA_MYSQL_TRUE_VARCHAR == (ulint)MYSQL_TYPE_VARCHAR); @@ -2670,6 +2726,27 @@ innobase_commit_low( trx_commit_for_mysql(trx); } +#ifdef WITH_WSREP + THD* thd = (THD*)trx->mysql_thd; + const char* tmp = 0; + if (wsrep_on((void*)thd)) { +#ifdef WSREP_PROC_INFO + char info[64]; + info[sizeof(info) - 1] = '\0'; + snprintf(info, sizeof(info) - 1, + "innobase_commit_low():trx_commit_for_mysql(%lld)", + (long long) wsrep_thd_trx_seqno(thd)); + tmp = thd_proc_info(thd, info); + +#else + tmp = thd_proc_info(thd, "innobase_commit_low()"); +#endif /* WSREP_PROC_INFO */ + } +#endif /* WITH_WSREP */ + trx_commit_for_mysql(trx); +#ifdef WITH_WSREP + if (wsrep_on((void*)thd)) { thd_proc_info(thd, tmp); } +#endif /* WITH_WSREP */ } /*****************************************************************//** @@ -3286,7 +3363,11 @@ ha_innobase::max_supported_key_length() const therefore set to slightly less than 1 / 4 of page size which is 16 kB; but currently MySQL does not work with keys whose size is > MAX_KEY_LENGTH */ +#ifdef WITH_WSREP + return(3500); +#else return(3500); +#endif } /****************************************************************//** @@ -4272,7 +4353,96 @@ innobase_mysql_cmp( return(0); } +#ifdef WITH_WSREP +extern "C" UNIV_INTERN +void +wsrep_innobase_mysql_sort( +/*===============*/ + /* out: str contains sort string */ + int mysql_type, /* in: MySQL type */ + uint charset_number, /* in: number of the charset */ + unsigned char* str, /* in: data field */ + unsigned int str_length) /* in: data field length, + not UNIV_SQL_NULL */ +{ + CHARSET_INFO* charset; + enum_field_types mysql_tp; + + DBUG_ASSERT(str_length != UNIV_SQL_NULL); + + mysql_tp = (enum_field_types) mysql_type; + + switch (mysql_tp) { + + case MYSQL_TYPE_BIT: + case MYSQL_TYPE_STRING: + case MYSQL_TYPE_VAR_STRING: + case MYSQL_TYPE_TINY_BLOB: + case MYSQL_TYPE_MEDIUM_BLOB: + case MYSQL_TYPE_BLOB: + case MYSQL_TYPE_LONG_BLOB: + case MYSQL_TYPE_VARCHAR: + { + uchar tmp_str[REC_VERSION_56_MAX_INDEX_COL_LEN]; + uint tmp_length = REC_VERSION_56_MAX_INDEX_COL_LEN; + + /* Use the charset number to pick the right charset struct for + the comparison. Since the MySQL function get_charset may be + slow before Bar removes the mutex operation there, we first + look at 2 common charsets directly. */ + + if (charset_number == default_charset_info->number) { + charset = default_charset_info; + } else if (charset_number == my_charset_latin1.number) { + charset = &my_charset_latin1; + } else { + charset = get_charset(charset_number, MYF(MY_WME)); + + if (charset == NULL) { + sql_print_error("InnoDB needs charset %lu for doing " + "a comparison, but MySQL cannot " + "find that charset.", + (ulong) charset_number); + ut_a(0); + } + } + + ut_a(str_length <= tmp_length); + memcpy(tmp_str, str, str_length); + + tmp_length = charset->coll->strnxfrm(charset, str, str_length, + tmp_str, tmp_length); + DBUG_ASSERT(tmp_length == str_length); + + break; + } + case MYSQL_TYPE_DECIMAL : + case MYSQL_TYPE_TINY : + case MYSQL_TYPE_SHORT : + case MYSQL_TYPE_LONG : + case MYSQL_TYPE_FLOAT : + case MYSQL_TYPE_DOUBLE : + case MYSQL_TYPE_NULL : + case MYSQL_TYPE_TIMESTAMP : + case MYSQL_TYPE_LONGLONG : + case MYSQL_TYPE_INT24 : + case MYSQL_TYPE_DATE : + case MYSQL_TYPE_TIME : + case MYSQL_TYPE_DATETIME : + case MYSQL_TYPE_YEAR : + case MYSQL_TYPE_NEWDATE : + case MYSQL_TYPE_NEWDECIMAL : + case MYSQL_TYPE_ENUM : + case MYSQL_TYPE_SET : + case MYSQL_TYPE_GEOMETRY : + break; + default: + break; + } + return; +} +#endif // WITH_WSREP /**************************************************************//** Converts a MySQL type to an InnoDB type. Note that this function returns the 'mtype' of InnoDB. InnoDB differentiates between MySQL's old <= 4.1 @@ -4391,6 +4561,256 @@ innobase_read_from_2_little_endian( /*******************************************************************//** Stores a key value for a row to a buffer. @return key value length as stored in buff */ +#ifdef WITH_WSREP +UNIV_INTERN +uint +wsrep_store_key_val_for_row( +/*===============================*/ + TABLE* table, + uint keynr, /*!< in: key number */ + char* buff, /*!< in/out: buffer for the key value (in MySQL + format) */ + uint buff_len,/*!< in: buffer length */ + const uchar* record, + ibool* key_is_null)/*!< out: full key was null */ +{ + KEY* key_info = table->key_info + keynr; + KEY_PART_INFO* key_part = key_info->key_part; + KEY_PART_INFO* end = key_part + key_info->key_parts; + char* buff_start = buff; + enum_field_types mysql_type; + Field* field; + + DBUG_ENTER("store_key_val_for_row"); + + bzero(buff, buff_len); + *key_is_null = TRUE; + + for (; key_part != end; key_part++) { + uchar sorted[REC_VERSION_56_MAX_INDEX_COL_LEN] = {'\0'}; + ibool part_is_null = FALSE; + + if (key_part->null_bit) { + if (record[key_part->null_offset] & + key_part->null_bit) { + *buff = 1; + part_is_null = TRUE; + } else { + *buff = 0; + } + buff++; + } + if (!part_is_null) *key_is_null = FALSE; + + field = key_part->field; + mysql_type = field->type(); + + if (mysql_type == MYSQL_TYPE_VARCHAR) { + /* >= 5.0.3 true VARCHAR */ + ulint lenlen; + ulint len; + const byte* data; + ulint key_len; + ulint true_len; + CHARSET_INFO* cs; + int error=0; + + key_len = key_part->length; + + if (part_is_null) { + buff += key_len + 2; + + continue; + } + cs = field->charset(); + + lenlen = (ulint) + (((Field_varstring*)field)->length_bytes); + + data = row_mysql_read_true_varchar(&len, + (byte*) (record + + (ulint)get_field_offset(table, field)), + lenlen); + + true_len = len; + + /* For multi byte character sets we need to calculate + the true length of the key */ + + if (len > 0 && cs->mbmaxlen > 1) { + true_len = (ulint) cs->cset->well_formed_len(cs, + (const char *) data, + (const char *) data + len, + (uint) (key_len / + cs->mbmaxlen), + &error); + } + + /* In a column prefix index, we may need to truncate + the stored value: */ + + if (true_len > key_len) { + true_len = key_len; + } + + memcpy(sorted, data, true_len); + wsrep_innobase_mysql_sort( + mysql_type, cs->number, sorted, true_len); + + /* Note that we always reserve the maximum possible + length of the true VARCHAR in the key value, though + only len first bytes after the 2 length bytes contain + actual data. The rest of the space was reset to zero + in the bzero() call above. */ + + buff += key_len; + + } else if (mysql_type == MYSQL_TYPE_TINY_BLOB + || mysql_type == MYSQL_TYPE_MEDIUM_BLOB + || mysql_type == MYSQL_TYPE_BLOB + || mysql_type == MYSQL_TYPE_LONG_BLOB + /* MYSQL_TYPE_GEOMETRY data is treated + as BLOB data in innodb. */ + || mysql_type == MYSQL_TYPE_GEOMETRY) { + + CHARSET_INFO* cs; + ulint key_len; + ulint true_len; + int error=0; + ulint blob_len; + const byte* blob_data; + + ut_a(key_part->key_part_flag & HA_PART_KEY_SEG); + + key_len = key_part->length; + + if (part_is_null) { + buff += key_len + 2; + + continue; + } + + cs = field->charset(); + + blob_data = row_mysql_read_blob_ref(&blob_len, + (byte*) (record + + (ulint)get_field_offset(table, field)), + (ulint) field->pack_length()); + + true_len = blob_len; + + ut_a(get_field_offset(table, field) + == key_part->offset); + + /* For multi byte character sets we need to calculate + the true length of the key */ + + if (blob_len > 0 && cs->mbmaxlen > 1) { + true_len = (ulint) cs->cset->well_formed_len(cs, + (const char *) blob_data, + (const char *) blob_data + + blob_len, + (uint) (key_len / + cs->mbmaxlen), + &error); + } + + /* All indexes on BLOB and TEXT are column prefix + indexes, and we may need to truncate the data to be + stored in the key value: */ + + if (true_len > key_len) { + true_len = key_len; + } + + memcpy(sorted, blob_data, true_len); + wsrep_innobase_mysql_sort( + mysql_type, cs->number, sorted, true_len); + + memcpy(buff, sorted, true_len); + + /* Note that we always reserve the maximum possible + length of the BLOB prefix in the key value. */ + + buff += key_len; + } else { + /* Here we handle all other data types except the + true VARCHAR, BLOB and TEXT. Note that the column + value we store may be also in a column prefix + index. */ + + CHARSET_INFO* cs; + ulint true_len; + ulint key_len; + const uchar* src_start; + int error=0; + enum_field_types real_type; + + key_len = key_part->length; + + if (part_is_null) { + buff += key_len; + + continue; + } + + src_start = record + key_part->offset; + real_type = field->real_type(); + true_len = key_len; + + /* Character set for the field is defined only + to fields whose type is string and real field + type is not enum or set. For these fields check + if character set is multi byte. */ + + if (real_type != MYSQL_TYPE_ENUM + && real_type != MYSQL_TYPE_SET + && ( mysql_type == MYSQL_TYPE_VAR_STRING + || mysql_type == MYSQL_TYPE_STRING)) { + + cs = field->charset(); + + /* For multi byte character sets we need to + calculate the true length of the key */ + + if (key_len > 0 && cs->mbmaxlen > 1) { + + true_len = (ulint) + cs->cset->well_formed_len(cs, + (const char *)src_start, + (const char *)src_start + + key_len, + (uint) (key_len / + cs->mbmaxlen), + &error); + } + memcpy(sorted, src_start, true_len); + wsrep_innobase_mysql_sort( + mysql_type, cs->number, sorted, true_len); + memcpy(buff, sorted, true_len); + } else { + memcpy(buff, src_start, true_len); + } + buff += true_len; + + /* Pad the unused space with spaces. */ + + if (true_len < key_len) { + ulint pad_len = key_len - true_len; + ut_a(!(pad_len % cs->mbminlen)); + + cs->cset->fill(cs, buff, pad_len, + 0x20 /* space */); + buff += pad_len; + } + } + } + + ut_a(buff <= buff_start + buff_len); + + DBUG_RETURN((uint)(buff - buff_start)); +} +#endif /* WITH_WSREP */ UNIV_INTERN uint ha_innobase::store_key_val_for_row( @@ -4994,6 +5414,9 @@ ha_innobase::write_row( ulint error = 0; int error_result= 0; ibool auto_inc_used= FALSE; +#ifdef WITH_WSREP + ibool auto_inc_inserted= FALSE; /* if NULL was inserted */ +#endif ulint sql_command; trx_t* trx = thd_to_trx(user_thd); @@ -5024,8 +5447,14 @@ ha_innobase::write_row( if ((sql_command == SQLCOM_ALTER_TABLE || sql_command == SQLCOM_OPTIMIZE || sql_command == SQLCOM_CREATE_INDEX +#ifdef WITH_WSREP + || (wsrep_on(user_thd) && sql_command == SQLCOM_LOAD) +#endif /* WITH_WSREP */ || sql_command == SQLCOM_DROP_INDEX) && num_write_row >= 10000) { +#ifdef WITH_WSREP + WSREP_DEBUG("forced commit: %s", wsrep_thd_query(user_thd)); +#endif /* WITH_WSREP */ /* ALTER TABLE is COMMITted at every 10000 copied rows. The IX table lock for the original table has to be re-issued. As this method will be called on a temporary table where the @@ -5097,7 +5526,9 @@ no_commit: /* Reset the error code before calling innobase_get_auto_increment(). */ prebuilt->autoinc_error = DB_SUCCESS; - +#ifdef WITH_WSREP + auto_inc_inserted= (table->next_number_field->val_int() == 0); +#endif if ((error = update_auto_increment())) { /* We don't want to mask autoinc overflow errors. */ @@ -5177,6 +5608,30 @@ no_commit: case SQLCOM_REPLACE_SELECT: goto set_max_autoinc; +#ifdef WITH_WSREP + /* workaround for LP bug #355000, retrying the insert */ + case SQLCOM_INSERT: + if (wsrep_on(current_thd) && + auto_inc_inserted && + wsrep_drupal_282555_workaround && + !thd_test_options(current_thd, + OPTION_NOT_AUTOCOMMIT | + OPTION_BEGIN)) { + WSREP_DEBUG( + "retrying insert: %s", + (*wsrep_thd_query(current_thd)) ? + wsrep_thd_query(current_thd) : + (char *)"void"); + error= DB_SUCCESS; + wsrep_thd_set_conflict_state( + current_thd, MUST_ABORT); + innodb_srv_conc_exit_innodb(prebuilt->trx); + /* jump straight to func exit over + * later wsrep hooks */ + goto func_exit; + } + break; +#endif default: break; } @@ -5224,6 +5679,20 @@ report_error: error_result = convert_error_code_to_mysql((int) error, prebuilt->table->flags, user_thd); +#ifdef WITH_WSREP + if (!error_result && wsrep_thd_exec_mode(user_thd) == LOCAL_STATE && + wsrep_on(user_thd) && !wsrep_consistency_check(user_thd) && + (sql_command != SQLCOM_LOAD || + thd_binlog_format(user_thd) == BINLOG_FORMAT_ROW)) { + + if (wsrep_append_keys(user_thd, false, record, NULL)) { + DBUG_PRINT("wsrep", ("row key failed")); + error_result = HA_ERR_INTERNAL_ERROR; + goto wsrep_error; + } + } +wsrep_error: +#endif func_exit: innobase_active_small(); @@ -5505,6 +5974,20 @@ ha_innobase::update_row( innobase_active_small(); +#ifdef WITH_WSREP + if (!error && wsrep_thd_exec_mode(user_thd) == LOCAL_STATE && + wsrep_on(user_thd)) { + + DBUG_PRINT("wsrep", ("update row key")); + + if (wsrep_append_keys(user_thd, false, old_row, new_row)) { + DBUG_PRINT("wsrep", ("row key failed")); + error = HA_ERR_INTERNAL_ERROR; + goto wsrep_error; + } + } +wsrep_error: +#endif DBUG_RETURN(error); } @@ -5548,6 +6031,18 @@ ha_innobase::delete_row( innobase_active_small(); +#ifdef WITH_WSREP + if (!error && wsrep_thd_exec_mode(user_thd) == LOCAL_STATE && + wsrep_on(user_thd)) { + + if (wsrep_append_keys(user_thd, false, record, NULL)) { + DBUG_PRINT("wsrep", ("delete fail")); + error = HA_ERR_INTERNAL_ERROR; + goto wsrep_error; + } + } +wsrep_error: +#endif DBUG_RETURN(error); } @@ -6333,7 +6828,240 @@ ha_innobase::rnd_pos( DBUG_RETURN(error); } +#ifdef WITH_WSREP +extern "C" { +ulint +wsrep_append_foreign_key( +/*===========================*/ + trx_t* trx, /*!< in: trx */ + dict_foreign_t* foreign, /*!< in: foreign key constraint */ + const rec_t* clust_rec, /*!<in: clustered index record */ + dict_index_t* clust_index, /*!<in: clustered index */ + ibool shared) /*!<in: is shared access */ +{ + THD* thd = (THD*)trx->mysql_thd; + ulint rcode = DB_SUCCESS; + char cache_key[512] = {'\0'}; + + if (!wsrep_on(trx->mysql_thd) || + wsrep_thd_exec_mode(thd) != LOCAL_STATE) + return DB_SUCCESS; + + byte key[WSREP_MAX_SUPPORTED_KEY_LENGTH+1]; + ulint len = WSREP_MAX_SUPPORTED_KEY_LENGTH; + + if (!dict_index_is_clust(clust_index)) { + WSREP_ERROR("clustered index not passed for FK append"); + return DB_ERROR; + } + + key[0] = '\0'; + rcode = wsrep_rec_get_primary_key( + &key[1], &len, clust_rec, clust_index); + if (rcode != DB_SUCCESS) { + WSREP_ERROR("FK key set failed: %lu", rcode); + return rcode; + } +#ifdef WSREP_DEBUG_PRINT + ulint i; + fprintf(stderr, "FK parent key, len: %lu ", len+1); + for (i=0; i<len+1; i++) { + fprintf(stderr, " (%X), ", key[i]); + } + fprintf(stderr, "\n"); +#endif + strncpy(cache_key, foreign->foreign_table->name, 512); + char *p = strchr(cache_key, '/'); + if (p) { + *p = '\0'; + } else { + WSREP_WARN("unexpected foreign key table %s", + foreign->foreign_table->name); + } + + wsrep_key_part_t wkey_part[3]; + wsrep_key_t wkey = {wkey_part, 3}; + if (!wsrep_prepare_key_for_innodb( + (const uchar*)cache_key, + strlen(foreign->foreign_table->name) + 1, + (const uchar*)key, len+1, + wkey_part, + &wkey.key_parts_len)) { + WSREP_WARN("key prepare failed for cascaded FK: %s", + (wsrep_thd_query(thd)) ? + wsrep_thd_query(thd) : "void"); + return DB_ERROR; + } + rcode = (int)wsrep->append_key( + wsrep, + wsrep_trx_handle(thd, trx), + &wkey, + 1, + shared); + if (rcode) { + DBUG_PRINT("wsrep", ("row key failed: %lu", rcode)); + WSREP_ERROR("Appending cascaded fk row key failed: %s, %lu", + (wsrep_thd_query(thd)) ? + wsrep_thd_query(thd) : "void", rcode); + return DB_ERROR; + } + + return DB_SUCCESS; +} +} + +static int +wsrep_append_key( +/*==================*/ + THD *thd, + trx_t *trx, + TABLE_SHARE *table_share, + TABLE *table, + const char* key, + uint16_t key_len, + bool shared +) +{ + DBUG_ENTER("wsrep_append_key"); +#ifdef WSREP_DEBUG_PRINT + fprintf(stderr, "%s conn %ld, trx %llu, keylen %d, table %s ", + (shared) ? "Shared" : "Exclusive", + wsrep_thd_thread_id(thd), trx->id, key_len, + table_share->table_name.str); + for (int i=0; i<key_len; i++) { + fprintf(stderr, "%hhX, ", key[i]); + } + fprintf(stderr, "\n"); +#endif + wsrep_key_part_t wkey_part[3]; + wsrep_key_t wkey = {wkey_part, 3}; + if (!wsrep_prepare_key_for_innodb( + (const uchar*)table_share->table_cache_key.str, + table_share->table_cache_key.length, + (const uchar*)key, key_len, + wkey_part, + &wkey.key_parts_len)) { + WSREP_WARN("key prepare failed for: %s", + (wsrep_thd_query(thd)) ? + wsrep_thd_query(thd) : "void"); + DBUG_RETURN(HA_ERR_INTERNAL_ERROR); + } + + int rcode = (int)wsrep->append_key( + wsrep, + wsrep_trx_handle(thd, trx), + &wkey, + 1, + shared); + if (rcode) { + DBUG_PRINT("wsrep", ("row key failed: %d", rcode)); + WSREP_WARN("Appending row key failed: %s, %d", + (wsrep_thd_query(thd)) ? + wsrep_thd_query(thd) : "void", rcode); + DBUG_RETURN(rcode); + } + DBUG_RETURN(0); +} +int +ha_innobase::wsrep_append_keys( +/*==================*/ + THD *thd, + bool shared, + const uchar* record0, /* in: row in MySQL format */ + const uchar* record1) /* in: row in MySQL format */ +{ + DBUG_ENTER("wsrep_append_keys"); + trx_t *trx = thd_to_trx(thd); + + /* if no PK, calculate hash of full row, to be the key value */ + if (prebuilt->clust_index_was_generated && wsrep_certify_nonPK) { + uchar digest[16]; + int rcode; + + MY_MD5_HASH(digest, (uchar *)record0, table->s->reclength); + if ((rcode = wsrep_append_key(thd, trx, table_share, table, + (const char*) digest, 16, + shared))) { + DBUG_RETURN(rcode); + } + if (record1) { + MY_MD5_HASH(digest, (uchar *)record1, table->s->reclength); + if ((rcode = wsrep_append_key(thd, trx, table_share, + table, + (const char*) digest, + 16, shared))) { + DBUG_RETURN(rcode); + } + } + } else if (wsrep_protocol_version == 0) { + uint len; + char keyval[WSREP_MAX_SUPPORTED_KEY_LENGTH+1] = {'\0'}; + char *key = &keyval[0]; + KEY *key_info = table->key_info; + ibool is_null; + + len = wsrep_store_key_val_for_row( + table, 0, key, key_info->key_length, record0, &is_null); + if (!is_null) { + int rcode = wsrep_append_key( + thd, trx, table_share, table, keyval, + len, shared); + if (rcode) DBUG_RETURN(rcode); + } + else + { + WSREP_DEBUG("NULL key skipped (proto 0): %s", + wsrep_thd_query(thd)); + } + } else { + ut_a(table->s->keys <= 256); + uint i; + for (i=0; i<table->s->keys; ++i) { + uint len; + char keyval0[WSREP_MAX_SUPPORTED_KEY_LENGTH+1] = {'\0'}; + char keyval1[WSREP_MAX_SUPPORTED_KEY_LENGTH+1] = {'\0'}; + char *key0 = &keyval0[1]; + char *key1 = &keyval1[1]; + KEY *key_info = table->key_info + i; + ibool is_null; + + keyval0[0] = (char)i; + keyval1[0] = (char)i; + + if (key_info->flags & HA_NOSAME) { + len = wsrep_store_key_val_for_row( + table, i, key0, key_info->key_length, + record0, &is_null); + if (!is_null) { + int rcode = wsrep_append_key( + thd, trx, table_share, table, + keyval0, len+1, shared); + if (rcode) DBUG_RETURN(rcode); + } + else + { + WSREP_DEBUG("NULL key skipped: %s", + wsrep_thd_query(thd)); + } + if (record1) { + len = wsrep_store_key_val_for_row( + table, i, key1, key_info->key_length, + record1, &is_null); + if (!is_null && memcmp(key0, key1, len)) { + int rcode = wsrep_append_key( + thd, trx, table_share, + table, + keyval1, len+1, shared); + if (rcode) DBUG_RETURN(rcode); + } + } + } + } + } + DBUG_RETURN(0); +} +#endif /*********************************************************************//** Stores a reference to the current row to 'ref' field of the handle. Note that in the case where we have generated the clustered index for the @@ -9192,11 +9920,18 @@ ha_innobase::external_lock( /* used by test case */ DBUG_EXECUTE_IF("no_innodb_binlog_errors", skip = 1;); if (!skip) { +#ifdef WITH_WSREP + if (!wsrep_on(thd) || wsrep_thd_exec_mode(thd) == LOCAL_STATE) + { +#endif /* WITH_WSREP */ my_error(ER_BINLOG_STMT_MODE_AND_ROW_ENGINE, MYF(0), " InnoDB is limited to row-logging when " "transaction isolation level is " "READ COMMITTED or READ UNCOMMITTED."); DBUG_RETURN(HA_ERR_LOGGING_IMPOSSIBLE); +#ifdef WITH_WSREP + } +#endif /* WITH_WSREP */ } } @@ -11229,6 +11964,256 @@ static SHOW_VAR innodb_status_variables_export[]= { static struct st_mysql_storage_engine innobase_storage_engine= { MYSQL_HANDLERTON_INTERFACE_VERSION }; +#ifdef WITH_WSREP +void +wsrep_abort_slave_trx(wsrep_seqno_t bf_seqno, wsrep_seqno_t victim_seqno) +{ + WSREP_ERROR("Trx %lld tries to abort slave trx %lld. This could be " + "caused by:\n\t" + "1) unsupported configuration options combination, please check documentation.\n\t" + "2) a bug in the code.\n\t" + "3) a database corruption.\n Node consistency compromized, " + "need to abort. Restart the node to resync with cluster.", + (long long)bf_seqno, (long long)victim_seqno); + abort(); +} +int +wsrep_innobase_kill_one_trx(trx_t *bf_trx, trx_t *victim_trx, ibool signal) +{ + DBUG_ENTER("wsrep_innobase_kill_one_trx"); + THD *thd = (THD *) victim_trx->mysql_thd; + THD *bf_thd = (bf_trx) ? (THD *)bf_trx->mysql_thd : NULL; + int64_t bf_seqno = (bf_thd) ? wsrep_thd_trx_seqno(bf_thd) : 0; + + if (!thd) { + DBUG_PRINT("wsrep", ("no thd for conflicting lock")); + WSREP_WARN("no THD for trx: %llu", victim_trx->id); + DBUG_RETURN(1); + } + + WSREP_DEBUG("BF kill (%lu, seqno: %lld), victim: (%lu) trx: %llu", + signal, (long long)bf_seqno, + wsrep_thd_thread_id(thd), + victim_trx->id); + + WSREP_DEBUG("Aborting query: %s", + (thd && wsrep_thd_query(thd)) ? wsrep_thd_query(thd) : "void"); + + wsrep_thd_LOCK(thd); + + if (wsrep_thd_query_state(thd) == QUERY_EXITING) { + WSREP_DEBUG("kill trx EXITING for %llu", victim_trx->id); + wsrep_thd_UNLOCK(thd); + DBUG_RETURN(0); + } + if(wsrep_thd_exec_mode(thd) != LOCAL_STATE) { + WSREP_DEBUG("withdraw for BF trx: %llu, state: %d", + victim_trx->id, + wsrep_thd_conflict_state(thd)); + } + + switch (wsrep_thd_conflict_state(thd)) { + case NO_CONFLICT: + wsrep_thd_set_conflict_state(thd, MUST_ABORT); + break; + case MUST_ABORT: + WSREP_DEBUG("victim %llu in MUST ABORT state", + victim_trx->id); + wsrep_thd_UNLOCK(thd); + wsrep_thd_awake(thd, signal); + DBUG_RETURN(0); + break; + case ABORTED: + case ABORTING: // fall through + default: + WSREP_DEBUG("victim %llu in state %d", + victim_trx->id, wsrep_thd_conflict_state(thd)); + wsrep_thd_UNLOCK(thd); + DBUG_RETURN(0); + break; + } + + switch (wsrep_thd_query_state(thd)) { + case QUERY_COMMITTING: + enum wsrep_status rcode; + + WSREP_DEBUG("kill trx QUERY_COMMITTING for %llu", + victim_trx->id); + + if (wsrep_thd_exec_mode(thd) == REPL_RECV) { + wsrep_abort_slave_trx(bf_seqno, + wsrep_thd_trx_seqno(thd)); + } else { + rcode = wsrep->abort_pre_commit( + wsrep, bf_seqno, + (wsrep_trx_id_t)victim_trx->id + ); + + switch (rcode) { + case WSREP_WARNING: + WSREP_DEBUG("cancel commit warning: %llu", + victim_trx->id); + wsrep_thd_UNLOCK(thd); + DBUG_RETURN(1); + break; + case WSREP_OK: + break; + default: + WSREP_ERROR( + "cancel commit bad exit: %d %llu", + rcode, + victim_trx->id); + /* unable to interrupt, must abort */ + /* note: kill_mysql() will block, if we cannot. + * kill the lock holder first. + */ + abort(); + break; + } + } + break; + case QUERY_EXEC: + /* it is possible that victim trx is itself waiting for some + * other lock. We need to cancel this waiting + */ + WSREP_DEBUG("kill trx QUERY_EXEC for %llu", victim_trx->id); + + victim_trx->was_chosen_as_deadlock_victim= TRUE; + if (victim_trx->wait_lock) { + WSREP_DEBUG("victim has wait flag: %ld", + wsrep_thd_thread_id(thd)); + lock_t* wait_lock = victim_trx->wait_lock; + if (wait_lock) { + WSREP_DEBUG("canceling wait lock"); + victim_trx->was_chosen_as_deadlock_victim= TRUE; + lock_cancel_waiting_and_release(wait_lock); + } + + wsrep_thd_awake(thd, signal); + } else { + /* abort currently executing query */ + DBUG_PRINT("wsrep",("sending KILL_QUERY to: %ld", + wsrep_thd_thread_id(thd))); + WSREP_DEBUG("kill query for: %ld", + wsrep_thd_thread_id(thd)); + wsrep_thd_awake(thd, signal); + + /* for BF thd, we need to prevent him from committing */ + if (wsrep_thd_exec_mode(thd) == REPL_RECV) { + wsrep_abort_slave_trx(bf_seqno, + wsrep_thd_trx_seqno(thd)); + } + } + break; + case QUERY_IDLE: + { + bool skip_abort= false; + wsrep_aborting_thd_t abortees; + + WSREP_DEBUG("kill IDLE for %llu", victim_trx->id); + + if (wsrep_thd_exec_mode(thd) == REPL_RECV) { + WSREP_DEBUG("kill BF IDLE, seqno: %lld", + (long long)wsrep_thd_trx_seqno(thd)); + wsrep_thd_UNLOCK(thd); + wsrep_abort_slave_trx(bf_seqno, + wsrep_thd_trx_seqno(thd)); + DBUG_RETURN(0); + } + /* This will lock thd from proceeding after net_read() */ + wsrep_thd_set_conflict_state(thd, ABORTING); + + mysql_mutex_lock(&LOCK_wsrep_rollback); + + abortees = wsrep_aborting_thd; + while (abortees && !skip_abort) { + /* check if we have a kill message for this already */ + if (abortees->aborting_thd == thd) { + skip_abort = true; + WSREP_WARN("duplicate thd aborter %lu", + wsrep_thd_thread_id(thd)); + } + abortees = abortees->next; + } + if (!skip_abort) { + wsrep_aborting_thd_t aborting = (wsrep_aborting_thd_t) + my_malloc(sizeof(struct wsrep_aborting_thd), + MYF(0)); + aborting->aborting_thd = thd; + aborting->next = wsrep_aborting_thd; + wsrep_aborting_thd = aborting; + DBUG_PRINT("wsrep",("enqueuing trx abort for %lu", + wsrep_thd_thread_id(thd))); + WSREP_DEBUG("enqueuing trx abort for (%lu)", + wsrep_thd_thread_id(thd)); + } + + DBUG_PRINT("wsrep",("signalling wsrep rollbacker")); + WSREP_DEBUG("signaling aborter"); + mysql_cond_signal(&COND_wsrep_rollback); + mysql_mutex_unlock(&LOCK_wsrep_rollback); + + break; + } + default: + WSREP_WARN("bad wsrep query state: %d", + wsrep_thd_query_state(thd)); + break; + } + wsrep_thd_UNLOCK(thd); + + DBUG_RETURN(0); +} +static int +wsrep_abort_transaction(handlerton* hton, THD *bf_thd, THD *victim_thd, + my_bool signal) +{ + DBUG_ENTER("wsrep_innobase_abort_thd"); + trx_t* victim_trx = thd_to_trx(victim_thd); + trx_t* bf_trx = (bf_thd) ? thd_to_trx(bf_thd) : NULL; + WSREP_DEBUG("abort transaction: BF: %s victim: %s", + wsrep_thd_query(bf_thd), + wsrep_thd_query(victim_thd)); + + if (victim_trx) + { + mutex_enter(&kernel_mutex); + int rcode = wsrep_innobase_kill_one_trx(bf_trx, victim_trx, + signal); + mutex_exit(&kernel_mutex); + DBUG_RETURN(rcode); + } else { + WSREP_DEBUG("victim does not have transaction"); + wsrep_thd_LOCK(victim_thd); + wsrep_thd_set_conflict_state(victim_thd, MUST_ABORT); + wsrep_thd_UNLOCK(victim_thd); + wsrep_thd_awake(victim_thd, signal); + } + DBUG_RETURN(-1); +} + +static int innobase_wsrep_set_checkpoint(handlerton* hton, const XID* xid) +{ + DBUG_ASSERT(hton == innodb_hton_ptr); + if (wsrep_is_wsrep_xid(xid)) { + mtr_t mtr; + mtr_start(&mtr); + trx_sys_update_wsrep_checkpoint(xid, &mtr); + mtr_commit(&mtr); + return 0; + } else { + return 1; + } +} + +static int innobase_wsrep_get_checkpoint(handlerton* hton, XID* xid) +{ + DBUG_ASSERT(hton == innodb_hton_ptr); + trx_sys_read_wsrep_checkpoint(xid); + return 0; +} + +#endif /* WITH_WSREP */ /* plugin options */ static MYSQL_SYSVAR_BOOL(checksums, innobase_use_checksums, PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY, @@ -11560,6 +12545,40 @@ static MYSQL_SYSVAR_UINT(change_buffering_debug, ibuf_debug, NULL, NULL, 0, 0, 1, 0); #endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */ +#ifdef WITH_INNODB_DISALLOW_WRITES +/******************************************************* + * innobase_disallow_writes variable definition * + *******************************************************/ + +/* Must always init to FALSE. */ +static my_bool innobase_disallow_writes = FALSE; + +/************************************************************************** +An "update" method for innobase_disallow_writes variable. */ +static +void +innobase_disallow_writes_update( +/*============================*/ + THD* thd, /* in: thread handle */ + st_mysql_sys_var* var, /* in: pointer to system + variable */ + void* var_ptr, /* out: pointer to dynamic + variable */ + const void* save) /* in: temporary storage */ +{ + *(my_bool*)var_ptr = *(my_bool*)save; + ut_a(srv_allow_writes_event); + if (*(my_bool*)var_ptr) + os_event_reset(srv_allow_writes_event); + else + os_event_set(srv_allow_writes_event); +} + +static MYSQL_SYSVAR_BOOL(disallow_writes, innobase_disallow_writes, + PLUGIN_VAR_NOCMDOPT, + "Tell InnoDB to stop any writes to disk", + NULL, innobase_disallow_writes_update, FALSE); +#endif /* WITH_INNODB_DISALLOW_WRITES */ static MYSQL_SYSVAR_BOOL(random_read_ahead, srv_random_read_ahead, PLUGIN_VAR_NOCMDARG, "Whether to use read ahead for random access within an extent.", @@ -11641,6 +12660,9 @@ static struct st_mysql_sys_var* innobase_system_variables[]= { #if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG MYSQL_SYSVAR(change_buffering_debug), #endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */ +#ifdef WITH_INNODB_DISALLOW_WRITES + MYSQL_SYSVAR(disallow_writes), +#endif /* WITH_INNODB_DISALLOW_WRITES */ MYSQL_SYSVAR(random_read_ahead), MYSQL_SYSVAR(read_ahead_threshold), MYSQL_SYSVAR(io_capacity), diff --git a/storage/innobase/handler/ha_innodb.h b/storage/innobase/handler/ha_innodb.h index 7cce0c4a16c..8b3b63bd9fb 100644 --- a/storage/innobase/handler/ha_innodb.h +++ b/storage/innobase/handler/ha_innodb.h @@ -112,6 +112,10 @@ class ha_innobase: public handler dict_index_t* innobase_get_index(uint keynr); int info_low(uint flag, bool called_from_analyze); +#ifdef WITH_WSREP + int wsrep_append_keys(THD *thd, bool shared, + const uchar* record0, const uchar* record1); +#endif /* Init values for the class: */ public: ha_innobase(handlerton *hton, TABLE_SHARE *table_arg); @@ -290,6 +294,37 @@ bool thd_sqlcom_can_generate_row_events(const MYSQL_THD thd); */ extern void mysql_bin_log_commit_pos(THD *thd, ulonglong *out_pos, const char **out_file); +#ifdef WITH_WSREP +#include <wsrep_mysqld.h> +//extern "C" int wsrep_trx_order_before(void *thd1, void *thd2); + +extern "C" bool wsrep_thd_is_wsrep_on(THD *thd); + +extern "C" enum wsrep_exec_mode wsrep_thd_exec_mode(THD *thd); +extern "C" enum wsrep_conflict_state wsrep_thd_conflict_state(THD *thd); +extern "C" enum wsrep_query_state wsrep_thd_query_state(THD *thd); +extern "C" wsrep_trx_handle_t* wsrep_thd_trx_handle(THD *thd); + +extern "C" void wsrep_thd_set_exec_mode(THD *thd, enum wsrep_exec_mode mode); +extern "C" void wsrep_thd_set_query_state( + THD *thd, enum wsrep_query_state state); +extern "C" void wsrep_thd_set_conflict_state( + THD *thd, enum wsrep_conflict_state state); + +extern "C" void wsrep_thd_set_trx_to_replay(THD *thd, uint64 trx_id); + +extern "C"void wsrep_thd_LOCK(THD *thd); +extern "C"void wsrep_thd_UNLOCK(THD *thd); +extern "C" uint32 wsrep_thd_wsrep_rand(THD *thd); +extern "C" time_t wsrep_thd_query_start(THD *thd); +extern "C" my_thread_id wsrep_thd_thread_id(THD *thd); +extern "C" int64_t wsrep_thd_trx_seqno(THD *thd); +extern "C" query_id_t wsrep_thd_query_id(THD *thd); +extern "C" char * wsrep_thd_query(THD *thd); +extern "C" query_id_t wsrep_thd_wsrep_last_query_id(THD *thd); +extern "C" void wsrep_thd_set_wsrep_last_query_id(THD *thd, query_id_t id); +extern "C" void wsrep_thd_awake(THD *thd, my_bool signal); +#endif typedef struct trx_struct trx_t; /********************************************************************//** @file handler/ha_innodb.h @@ -330,3 +365,6 @@ innobase_index_name_is_reserved( ulint num_of_keys); /*!< in: Number of indexes to be created. */ +#ifdef WITH_WSREP +extern "C" int wsrep_trx_is_aborting(void *thd_ptr); +#endif diff --git a/storage/innobase/handler/handler0alter.cc b/storage/innobase/handler/handler0alter.cc index 7d89979fd1f..f05bcb844d5 100644 --- a/storage/innobase/handler/handler0alter.cc +++ b/storage/innobase/handler/handler0alter.cc @@ -36,6 +36,10 @@ extern "C" { #include "handler0alter.h" } +#ifdef WITH_WSREP +//#include "wsrep_api.h" +#include <sql_acl.h> // PROCESS_ACL +#endif #include "ha_innodb.h" /*************************************************************//** diff --git a/storage/innobase/include/dict0mem.h b/storage/innobase/include/dict0mem.h index 9ded0dba39b..3389c7d72b3 100644 --- a/storage/innobase/include/dict0mem.h +++ b/storage/innobase/include/dict0mem.h @@ -343,6 +343,9 @@ barracuda format, the length could be REC_VERSION_56_MAX_INDEX_COL_LEN /** Defines the maximum fixed length column size */ #define DICT_MAX_FIXED_COL_LEN DICT_ANTELOPE_MAX_INDEX_COL_LEN +#ifdef WITH_WSREP +#define WSREP_MAX_SUPPORTED_KEY_LENGTH 3500 +#endif /* WITH_WSREP */ /** Data structure for a field in an index */ struct dict_field_struct{ diff --git a/storage/innobase/include/ha_prototypes.h b/storage/innobase/include/ha_prototypes.h index edf7a1a28c1..4ac365d5c40 100644 --- a/storage/innobase/include/ha_prototypes.h +++ b/storage/innobase/include/ha_prototypes.h @@ -285,6 +285,18 @@ thd_set_lock_wait_time( void* thd, /*!< in: thread handle (THD*) */ ulint value); /*!< in: time waited for the lock */ +#ifdef WITH_WSREP +UNIV_INTERN +int +wsrep_innobase_kill_one_trx(trx_t *bf_trx, trx_t *victim_trx, ibool signal); +int wsrep_thd_is_brute_force(void *thd_ptr); +int wsrep_trx_order_before(void *thd1, void *thd2); +void wsrep_innobase_mysql_sort(int mysql_type, uint charset_number, + unsigned char* str, unsigned int str_length); +int +wsrep_on(void *thd_ptr); +int wsrep_is_wsrep_xid(const void*); +#endif /* WITH_WSREP */ /**********************************************************************//** Get the current setting of the lower_case_table_names global parameter from mysqld.cc. We do a dirty read because for one there is no synchronization diff --git a/storage/innobase/include/lock0lock.h b/storage/innobase/include/lock0lock.h index 29fdc3bbe97..e5a61e599bc 100644 --- a/storage/innobase/include/lock0lock.h +++ b/storage/innobase/include/lock0lock.h @@ -797,6 +797,7 @@ lock_rec_get_page_no( remains set when the waiting lock is granted, or if the lock is inherited to a neighboring record */ +#define WSREP_BF 4096 #if (LOCK_WAIT|LOCK_GAP|LOCK_REC_NOT_GAP|LOCK_INSERT_INTENTION)&LOCK_MODE_MASK # error #endif diff --git a/storage/innobase/include/rem0rec.h b/storage/innobase/include/rem0rec.h index 10b74d18c13..ab390f4fb3a 100644 --- a/storage/innobase/include/rem0rec.h +++ b/storage/innobase/include/rem0rec.h @@ -832,6 +832,13 @@ are given in one byte (resp. two byte) format. */ two upmost bits in a two byte offset for special purposes */ #define REC_MAX_DATA_SIZE (16 * 1024) +#ifdef WITH_WSREP +int wsrep_rec_get_primary_key( + byte *buf, /* out: extracted key */ + ulint *buf_len, /* in/out: length of buf */ + const rec_t* rec, /* in: physical record */ + dict_index_t* index); /* in: record descriptor */ +#endif /* WITH_WSREP */ #ifndef UNIV_NONINL #include "rem0rec.ic" #endif diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h index dfe7397d189..edfea04bed3 100644 --- a/storage/innobase/include/srv0srv.h +++ b/storage/innobase/include/srv0srv.h @@ -141,6 +141,10 @@ extern ulint srv_log_buffer_size; extern ulong srv_flush_log_at_trx_commit; extern char srv_adaptive_flushing; +#ifdef WITH_INNODB_DISALLOW_WRITES +/* When this event is reset we do not allow any file writes to take place. */ +extern os_event_t srv_allow_writes_event; +#endif /* WITH_INNODB_DISALLOW_WRITES */ /* If this flag is TRUE, then we will load the indexes' (and tables') metadata even if they are marked as "corrupted". Mostly it is for DBA to process corrupted index and table */ diff --git a/storage/innobase/include/trx0sys.h b/storage/innobase/include/trx0sys.h index 3913792d594..7a27342b44d 100644 --- a/storage/innobase/include/trx0sys.h +++ b/storage/innobase/include/trx0sys.h @@ -41,6 +41,9 @@ Created 3/26/1996 Heikki Tuuri #include "ut0bh.h" #include "read0types.h" #include "page0types.h" +#ifdef WITH_WSREP +#include "trx0xa.h" +#endif /* WITH_WSREP */ /** In a MySQL replication slave, in crash recovery we store the master log file name and position here. */ @@ -316,6 +319,17 @@ UNIV_INTERN void trx_sys_print_mysql_binlog_offset(void); /*===================================*/ +#ifdef WITH_WSREP +/** Update WSREP checkpoint XID in sys header. */ +void +trx_sys_update_wsrep_checkpoint( + const XID* xid, /*!< in: WSREP XID */ + mtr_t* mtr); /*!< in: mtr */ +void +/** Read WSREP checkpoint XID from sys header. */ +trx_sys_read_wsrep_checkpoint( + XID* xid); /*!< out: WSREP XID */ +#endif /* WITH_WSREP */ /*****************************************************************//** Prints to stderr the MySQL master log offset info in the trx system header if the magic number shows it valid. */ @@ -521,6 +535,22 @@ this contains the same fields as TRX_SYS_MYSQL_LOG_INFO below */ within that file */ #define TRX_SYS_MYSQL_LOG_NAME 12 /*!< MySQL log file name */ +#ifdef WITH_WSREP +/* We hijack TRX_SYS_MYSQL_MASTER_LOG_INFO, it seems to be completely unused + otherwise (see comments for MySQL bug #34058). */ +/** */ +#define TRX_SYS_WSREP_XID_INFO TRX_SYS_MYSQL_MASTER_LOG_INFO +#define TRX_SYS_WSREP_XID_MAGIC_N_FLD 0 +#define TRX_SYS_WSREP_XID_MAGIC_N 0x77737265 + +/* XID field: formatID, gtrid_len, bqual_len, xid_data */ +#define TRX_SYS_WSREP_XID_LEN (4 + 4 + 4 + XIDDATASIZE) +#define TRX_SYS_WSREP_XID_FORMAT 4 +#define TRX_SYS_WSREP_XID_GTRID_LEN 8 +#define TRX_SYS_WSREP_XID_BQUAL_LEN 12 +#define TRX_SYS_WSREP_XID_DATA 16 +#endif /* WITH_WSREP*/ + /** Doublewrite buffer */ /* @{ */ /** The offset of the doublewrite buffer header on the trx system header page */ diff --git a/storage/innobase/lock/lock0lock.c b/storage/innobase/lock/lock0lock.c index 263516ab5fd..35cbb228e05 100644 --- a/storage/innobase/lock/lock0lock.c +++ b/storage/innobase/lock/lock0lock.c @@ -40,6 +40,9 @@ Created 5/7/1996 Heikki Tuuri #include "trx0sys.h" #include "btr0btr.h" +#ifdef WITH_WSREP +extern my_bool wsrep_debug; +#endif /* Restricts the length of search we will do in the waits-for graph of transactions */ #define LOCK_MAX_N_STEPS_IN_DEADLOCK_CHECK 1000000 @@ -921,6 +924,11 @@ lock_rec_has_to_wait( if (trx != lock2->trx && !lock_mode_compatible(LOCK_MODE_MASK & type_mode, lock_get_mode(lock2))) { +#ifdef WITH_WSREP + if ((type_mode & WSREP_BF) && (lock2->type_mode & WSREP_BF)) { + return FALSE; + } +#endif /* WITH_WSREP */ /* We have somewhat complex rules when gap type record locks cause waits */ @@ -1450,6 +1458,11 @@ lock_rec_has_expl( return(NULL); } +#ifdef WITH_WSREP +static +void +lock_rec_discard(lock_t* in_lock); +#endif #ifdef UNIV_DEBUG /*********************************************************************//** Checks if some other transaction has a lock request in the queue. @@ -1499,6 +1512,27 @@ lock_rec_other_has_expl_req( } #endif /* UNIV_DEBUG */ +#ifdef WITH_WSREP +static void +wsrep_kill_victim(trx_t *trx, lock_t *lock) { + int bf_this = wsrep_thd_is_brute_force(trx->mysql_thd); + int bf_other = + wsrep_thd_is_brute_force(lock->trx->mysql_thd); + if ((bf_this && !bf_other) || + (bf_this && bf_other && wsrep_trx_order_before( + trx->mysql_thd, lock->trx->mysql_thd))) { + + if (lock->trx->que_state == TRX_QUE_LOCK_WAIT) { + if (wsrep_debug) + fprintf(stderr, "WSREP: BF victim waiting\n"); + /* cannot release lock, until our lock + is in the queue*/ + } else if (lock->trx != trx) { + wsrep_innobase_kill_one_trx(trx, lock->trx, TRUE); + } + } +} +#endif /*********************************************************************//** Checks if some other transaction has a conflicting explicit lock request in the queue, so that we have to wait. @@ -1528,6 +1562,9 @@ lock_rec_other_has_conflicting( do { if (lock_rec_has_to_wait(trx, mode, lock, TRUE)) { +#ifdef WITH_WSREP + wsrep_kill_victim(trx, lock); +#endif return(lock); } @@ -1538,6 +1575,9 @@ lock_rec_other_has_conflicting( do { if (lock_rec_has_to_wait(trx, mode, lock, FALSE)) { +#ifdef WITH_WSREP + wsrep_kill_victim(trx, lock); +#endif return(lock); } @@ -1669,6 +1709,9 @@ static lock_t* lock_rec_create( /*============*/ +#ifdef WITH_WSREP + lock_t* c_lock, /* conflicting lock */ +#endif ulint type_mode,/*!< in: lock mode and wait flag, type is ignored and replaced by LOCK_REC */ @@ -1714,6 +1757,11 @@ lock_rec_create( lock->trx = trx; lock->type_mode = (type_mode & ~LOCK_TYPE_MASK) | LOCK_REC; +#ifdef WITH_WSREP + if (wsrep_thd_is_brute_force(trx->mysql_thd)) { + lock->type_mode |= WSREP_BF; + } +#endif /* WITH_WSREP */ lock->index = index; lock->un_member.rec_lock.space = space; @@ -1728,8 +1776,56 @@ lock_rec_create( /* Set the bit corresponding to rec */ lock_rec_set_nth_bit(lock, heap_no); +#ifdef WITH_WSREP + if (c_lock && wsrep_thd_is_brute_force(trx->mysql_thd)) { + lock_t *hash = c_lock->hash; + lock_t *prev = NULL; + + while (hash && + wsrep_thd_is_brute_force(hash->trx->mysql_thd) && + wsrep_trx_order_before(hash->trx->mysql_thd, trx->mysql_thd)){ + prev = hash; + hash = hash->hash; + } + lock->hash = hash; + if (prev) { + prev->hash = lock; + } else { + c_lock->hash = lock; + } + /* + * delayed conflict resolution '...kill_one_trx' was not called, + * if victim was waiting for some other lock + */ + if (c_lock && c_lock->trx->que_state == TRX_QUE_LOCK_WAIT) { + c_lock->trx->was_chosen_as_deadlock_victim = TRUE; + trx->que_state = TRX_QUE_LOCK_WAIT; + lock_set_lock_and_trx_wait(lock, trx); + + lock_cancel_waiting_and_release(c_lock->trx->wait_lock); + + /* trx might not wait for c_lock, but some other lock */ + if (wsrep_debug && c_lock->trx->wait_lock != c_lock) { + fprintf(stderr, "WSREP: c_lock != wait lock\n"); + } + if (c_lock->trx->wait_lock == c_lock) { + lock_reset_lock_and_trx_wait(lock); + } + + if (wsrep_debug) + fprintf(stderr, "WSREP: c_lock canceled %llu\n", + (ulonglong) c_lock->trx->id); + /* have to bail out here to avoid lock_set_lock... */ + return(lock); + } + } else { + HASH_INSERT(lock_t, hash, lock_sys->rec_hash, + lock_rec_fold(space, page_no), lock); + } +#else HASH_INSERT(lock_t, hash, lock_sys->rec_hash, lock_rec_fold(space, page_no), lock); +#endif if (UNIV_UNLIKELY(type_mode & LOCK_WAIT)) { lock_set_lock_and_trx_wait(lock, trx); @@ -1749,6 +1845,9 @@ static enum db_err lock_rec_enqueue_waiting( /*=====================*/ +#ifdef WITH_WSREP + lock_t* c_lock, /* conflicting lock */ +#endif ulint type_mode,/*!< in: lock mode this transaction is requesting: LOCK_S or LOCK_X, possibly @@ -1800,8 +1899,16 @@ lock_rec_enqueue_waiting( } /* Enqueue the lock request that will wait to be granted */ +#ifdef WITH_WSREP + if (wsrep_on(trx->mysql_thd) && trx->was_chosen_as_deadlock_victim) { + return(DB_DEADLOCK); + } + lock = lock_rec_create(c_lock, type_mode | LOCK_WAIT, + block, heap_no, index, trx); +#else lock = lock_rec_create(type_mode | LOCK_WAIT, block, heap_no, index, trx); +#endif /* Check if a deadlock occurs: if yes, remove the lock request and return an error code */ @@ -1879,7 +1986,19 @@ lock_rec_add_to_queue( lock_t* other_lock = lock_rec_other_has_expl_req(mode, 0, LOCK_WAIT, block, heap_no, trx); +#ifdef WITH_WSREP + /* this can potentionally assert with wsrep */ + if (wsrep_on(trx->mysql_thd)) { + if (wsrep_debug && other_lock) { + fprintf(stderr, + "WSREP: InnoDB assert ignored\n"); + } + } else { + ut_a(!other_lock); + } +#else ut_a(!other_lock); +#endif /* WITH_WSREP */ } #endif /* UNIV_DEBUG */ @@ -1932,7 +2051,11 @@ lock_rec_add_to_queue( } somebody_waits: +#ifdef WITH_WSREP + return(lock_rec_create(NULL, type_mode, block, heap_no, index, trx)); +#else return(lock_rec_create(type_mode, block, heap_no, index, trx)); +#endif } /** Record locking request status */ @@ -1982,6 +2105,10 @@ lock_rec_lock_fast( || (LOCK_MODE_MASK & mode) == LOCK_X); ut_ad(mode - (LOCK_MODE_MASK & mode) == LOCK_GAP || mode - (LOCK_MODE_MASK & mode) == 0 +#ifdef WITH_WSREP + || mode - (LOCK_MODE_MASK & mode) == WSREP_BF + || mode - (LOCK_MODE_MASK & mode) - LOCK_REC_NOT_GAP == WSREP_BF +#endif /* WITH_WSREP */ || mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP); lock = lock_rec_get_first_on_page(block); @@ -1990,7 +2117,11 @@ lock_rec_lock_fast( if (lock == NULL) { if (!impl) { +#ifdef WITH_WSREP + lock_rec_create(NULL, mode, block, heap_no, index, trx); +#else lock_rec_create(mode, block, heap_no, index, trx); +#endif } return(LOCK_REC_SUCCESS_CREATED); @@ -2046,6 +2177,9 @@ lock_rec_lock_slow( que_thr_t* thr) /*!< in: query thread */ { trx_t* trx; +#ifdef WITH_WSREP + lock_t *c_lock; +#endif ut_ad(mutex_own(&kernel_mutex)); ut_ad((LOCK_MODE_MASK & mode) != LOCK_S @@ -2056,6 +2190,10 @@ lock_rec_lock_slow( || (LOCK_MODE_MASK & mode) == LOCK_X); ut_ad(mode - (LOCK_MODE_MASK & mode) == LOCK_GAP || mode - (LOCK_MODE_MASK & mode) == 0 +#ifdef WITH_WSREP + || mode - (LOCK_MODE_MASK & mode) == WSREP_BF + || mode - (LOCK_MODE_MASK & mode) - LOCK_REC_NOT_GAP == WSREP_BF +#endif /* WITH_WSREP */ || mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP); trx = thr_get_trx(thr); @@ -2064,14 +2202,24 @@ lock_rec_lock_slow( /* The trx already has a strong enough lock on rec: do nothing */ +#ifdef WITH_WSREP + } else if ((c_lock = lock_rec_other_has_conflicting( + mode, block, heap_no, trx))) { +#else } else if (lock_rec_other_has_conflicting(mode, block, heap_no, trx)) { +#endif /* If another transaction has a non-gap conflicting request in the queue, as this transaction does not have a lock strong enough already granted on the record, we have to wait. */ +#ifdef WITH_WSREP + return(lock_rec_enqueue_waiting(c_lock,mode, block, heap_no, + index, thr)); +#else return(lock_rec_enqueue_waiting(mode, block, heap_no, index, thr)); +#endif } else if (!impl) { /* Set the requested lock on the record */ @@ -2117,8 +2265,16 @@ lock_rec_lock( || (LOCK_MODE_MASK & mode) == LOCK_X); ut_ad(mode - (LOCK_MODE_MASK & mode) == LOCK_GAP || mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP +#ifdef WITH_WSREP + || mode - (LOCK_MODE_MASK & mode) == WSREP_BF + || mode - (LOCK_MODE_MASK & mode) - LOCK_REC_NOT_GAP == WSREP_BF +#endif /* WITH_WSREP */ || mode - (LOCK_MODE_MASK & mode) == 0); - +#ifdef WITH_WSREP + if (wsrep_thd_is_brute_force(thr_get_trx(thr)->mysql_thd)) { + mode |= WSREP_BF; + } +#endif /* We try a simplified and faster subroutine for the most common cases */ switch (lock_rec_lock_fast(impl, mode, block, heap_no, index, thr)) { @@ -3487,6 +3643,34 @@ lock_deadlock_recursive( stderr); } #endif /* UNIV_DEBUG */ +#ifdef WITH_WSREP + if (wsrep_debug) + fputs("WSREP: Deadlock detected\n", ef); + if (wsrep_thd_is_brute_force(start->mysql_thd) && + wsrep_thd_is_brute_force( + wait_lock->trx->mysql_thd) && + (start != wait_lock->trx)) { + + if (wsrep_trx_order_before( + start->mysql_thd, + wait_lock->trx->mysql_thd)) { + + wait_lock->trx->was_chosen_as_deadlock_victim = TRUE; + lock_cancel_waiting_and_release(wait_lock); + return(LOCK_VICTIM_IS_OTHER); + } else { + return(LOCK_VICTIM_IS_START); + } + } +#endif + if (too_far) { + + fputs("TOO DEEP OR LONG SEARCH" + " IN THE LOCK TABLE" + " WAITS-FOR GRAPH\n", ef); + + return(LOCK_VICTIM_IS_START); + } if (trx_weight_ge(wait_lock->trx, start)) { /* Our recursion starting point @@ -3494,8 +3678,21 @@ lock_deadlock_recursive( choose 'start' as the victim and roll back it */ +#ifdef WITH_WSREP + if (!wsrep_thd_is_brute_force( + start->mysql_thd)) { + return(LOCK_VICTIM_IS_START); + } +#else return(LOCK_VICTIM_IS_START); +#endif } +#ifdef WITH_WSREP + if (wsrep_thd_is_brute_force( + wait_lock->trx->mysql_thd)) { + return(LOCK_VICTIM_IS_START); + } +#endif lock_deadlock_found = TRUE; @@ -3580,6 +3777,9 @@ UNIV_INLINE lock_t* lock_table_create( /*==============*/ +#ifdef WITH_WSREP + lock_t* c_lock, /* conflicting lock */ +#endif dict_table_t* table, /*!< in: database table in dictionary cache */ ulint type_mode,/*!< in: lock mode possibly ORed with LOCK_WAIT */ @@ -3615,7 +3815,25 @@ lock_table_create( lock->un_member.tab_lock.table = table; +#ifdef WITH_WSREP + if (c_lock && wsrep_thd_is_brute_force(trx->mysql_thd)) { + UT_LIST_INSERT_AFTER( + un_member.tab_lock.locks, table->locks, c_lock, lock); + } else { + UT_LIST_ADD_LAST(un_member.tab_lock.locks, table->locks, lock); + } + + if (c_lock && c_lock->trx->que_state == TRX_QUE_LOCK_WAIT) { + if (wsrep_debug) + fprintf(stderr, "WSREP: table c_lock in wait: %llu\n", + (ulonglong) lock->trx->id); + c_lock->trx->was_chosen_as_deadlock_victim = TRUE; + lock_cancel_waiting_and_release(c_lock); + } + +#else UT_LIST_ADD_LAST(un_member.tab_lock.locks, table->locks, lock); +#endif if (UNIV_UNLIKELY(type_mode & LOCK_WAIT)) { @@ -3761,6 +3979,9 @@ static ulint lock_table_enqueue_waiting( /*=======================*/ +#ifdef WITH_WSREP + lock_t* c_lock, /* conflicting lock */ +#endif ulint mode, /*!< in: lock mode this transaction is requesting */ dict_table_t* table, /*!< in: table */ @@ -3802,7 +4023,14 @@ lock_table_enqueue_waiting( /* Enqueue the lock request that will wait to be granted */ +#ifdef WITH_WSREP + if (trx->was_chosen_as_deadlock_victim) { + return(DB_DEADLOCK); + } + lock = lock_table_create(c_lock, table, mode | LOCK_WAIT, trx); +#else lock = lock_table_create(table, mode | LOCK_WAIT, trx); +#endif /* Check if a deadlock occurs: if yes, remove the lock request and return an error code */ @@ -3861,7 +4089,32 @@ lock_table_other_has_incompatible( && (!lock_mode_compatible(lock_get_mode(lock), mode)) && (wait || !(lock_get_wait(lock)))) { +#ifdef WITH_WSREP + int bf_this = wsrep_thd_is_brute_force(trx->mysql_thd); + int bf_other = wsrep_thd_is_brute_force( + lock->trx->mysql_thd); + if ((bf_this && !bf_other) || + (bf_this && bf_other && + wsrep_trx_order_before( + trx->mysql_thd, lock->trx->mysql_thd) + ) + ) { + if (lock->trx->que_state == TRX_QUE_LOCK_WAIT) { + if (wsrep_debug) fprintf(stderr, + "WSREP: BF victim waiting"); + return(lock); + } else { + if (bf_this && bf_other) + wsrep_innobase_kill_one_trx( + (trx_t *)trx, lock->trx, TRUE); + return(lock); + } + } else { + return(lock); + } +#else return(lock); +#endif } lock = UT_LIST_GET_PREV(un_member.tab_lock.locks, lock); @@ -3884,6 +4137,9 @@ lock_table( enum lock_mode mode, /*!< in: lock mode */ que_thr_t* thr) /*!< in: query thread */ { +#ifdef WITH_WSREP + lock_t *c_lock; +#endif trx_t* trx; ulint err; @@ -3912,19 +4168,32 @@ lock_table( /* We have to check if the new lock is compatible with any locks other transactions have in the table lock queue. */ +#ifdef WITH_WSREP + if ((c_lock = (lock_t *)lock_table_other_has_incompatible( + trx, LOCK_WAIT, table, mode))) { +#else if (lock_table_other_has_incompatible(trx, LOCK_WAIT, table, mode)) { +#endif /* Another trx has a request on the table in an incompatible mode: this trx may have to wait */ +#ifdef WITH_WSREP + err = lock_table_enqueue_waiting(c_lock, mode | flags, table, thr); +#else err = lock_table_enqueue_waiting(mode | flags, table, thr); +#endif lock_mutex_exit_kernel(); return(err); } +#ifdef WITH_WSREP + lock_table_create(c_lock, table, mode | flags, trx); +#else lock_table_create(table, mode | flags, trx); +#endif ut_a(!flags || mode == LOCK_S || mode == LOCK_X); @@ -4840,6 +5109,7 @@ lock_rec_queue_validate( if (!lock_rec_get_gap(lock) && !lock_get_wait(lock)) { +#ifndef WITH_WSREP enum lock_mode mode; if (lock_get_mode(lock) == LOCK_S) { @@ -4849,6 +5119,7 @@ lock_rec_queue_validate( } ut_a(!lock_rec_other_has_expl_req( mode, 0, 0, block, heap_no, lock->trx)); +#endif /* WITH_WSREP */ } else if (lock_get_wait(lock) && !lock_rec_get_gap(lock)) { @@ -5089,6 +5360,9 @@ lock_rec_insert_check_and_lock( lock_t* lock; ulint err; ulint next_rec_heap_no; +#ifdef WITH_WSREP + lock_t *c_lock; +#endif ut_ad(block->frame == page_align(rec)); @@ -5141,15 +5415,28 @@ lock_rec_insert_check_and_lock( had to wait for their insert. Both had waiting gap type lock requests on the successor, which produced an unnecessary deadlock. */ +#ifdef WITH_WSREP + if ((c_lock = lock_rec_other_has_conflicting( + LOCK_X | LOCK_GAP | LOCK_INSERT_INTENTION | WSREP_BF, + block, next_rec_heap_no, trx))) { +#else if (lock_rec_other_has_conflicting( LOCK_X | LOCK_GAP | LOCK_INSERT_INTENTION, block, next_rec_heap_no, trx)) { +#endif /* Note that we may get DB_SUCCESS also here! */ +#ifdef WITH_WSREP + err = lock_rec_enqueue_waiting(c_lock, LOCK_X | LOCK_GAP + | LOCK_INSERT_INTENTION, + block, next_rec_heap_no, + index, thr); +#else err = lock_rec_enqueue_waiting(LOCK_X | LOCK_GAP | LOCK_INSERT_INTENTION, block, next_rec_heap_no, index, thr); +#endif } else { err = DB_SUCCESS; } diff --git a/storage/innobase/os/os0file.c b/storage/innobase/os/os0file.c index 502cb44a0fa..a9069708e98 100644 --- a/storage/innobase/os/os0file.c +++ b/storage/innobase/os/os0file.c @@ -89,6 +89,12 @@ UNIV_INTERN os_mutex_t os_file_seek_mutexes[OS_FILE_N_SEEK_MUTEXES]; /* In simulated aio, merge at most this many consecutive i/os */ #define OS_AIO_MERGE_N_CONSECUTIVE 64 +#ifdef WITH_INNODB_DISALLOW_WRITES +#define WAIT_ALLOW_WRITES() os_event_wait(srv_allow_writes_event) +#else +#define WAIT_ALLOW_WRITES() do { } while (0) +#endif /* WITH_INNODB_DISALLOW_WRITES */ + /********************************************************************** InnoDB AIO Implementation: @@ -724,7 +730,9 @@ os_file_create_tmpfile(void) /*========================*/ { FILE* file = NULL; - int fd = innobase_mysql_tmpfile(); + int fd; + WAIT_ALLOW_WRITES(); + fd = innobase_mysql_tmpfile(); if (fd >= 0) { file = fdopen(fd, "w+b"); @@ -1043,6 +1051,7 @@ os_file_create_directory( return (TRUE); #else int rcode; + WAIT_ALLOW_WRITES(); rcode = mkdir(pathname, 0770); @@ -1144,6 +1153,8 @@ try_again: os_file_t file; int create_flag; ibool retry; + if (create_mode != OS_FILE_OPEN && create_mode != OS_FILE_OPEN_RAW) + WAIT_ALLOW_WRITES(); try_again: ut_a(name); @@ -1276,6 +1287,8 @@ os_file_create_simple_no_error_handling_func( int create_flag; ut_a(name); + if (create_mode != OS_FILE_OPEN && create_mode != OS_FILE_OPEN_RAW) + WAIT_ALLOW_WRITES(); if (create_mode == OS_FILE_OPEN) { if (access_type == OS_FILE_READ_ONLY) { @@ -1509,6 +1522,8 @@ try_again: int create_flag; ibool retry; const char* mode_str = NULL; + if (create_mode != OS_FILE_OPEN && create_mode != OS_FILE_OPEN_RAW) + WAIT_ALLOW_WRITES(); try_again: ut_a(name); @@ -1665,6 +1680,7 @@ loop: goto loop; #else int ret; + WAIT_ALLOW_WRITES(); ret = unlink(name); @@ -1728,6 +1744,7 @@ loop: goto loop; #else int ret; + WAIT_ALLOW_WRITES(); ret = unlink(name); @@ -1768,6 +1785,7 @@ os_file_rename_func( return(FALSE); #else int ret; + WAIT_ALLOW_WRITES(); ret = rename(oldpath, newpath); @@ -2031,6 +2049,7 @@ os_file_set_eof( HANDLE h = (HANDLE) _get_osfhandle(fileno(file)); return(SetEndOfFile(h)); #else /* __WIN__ */ + WAIT_ALLOW_WRITES(); return(!ftruncate(fileno(file), ftell(file))); #endif /* __WIN__ */ } @@ -2125,6 +2144,7 @@ os_file_flush_func( return(FALSE); #else int ret; + WAIT_ALLOW_WRITES(); #if defined(HAVE_DARWIN_THREADS) # ifndef F_FULLFSYNC @@ -2817,6 +2837,7 @@ retry: return(FALSE); #else ssize_t ret; + WAIT_ALLOW_WRITES(); ret = os_file_pwrite(file, buf, n, offset, offset_high); diff --git a/storage/innobase/rem/rem0rec.c b/storage/innobase/rem/rem0rec.c index 30fc28561fa..9ba1c646d9d 100644 --- a/storage/innobase/rem/rem0rec.c +++ b/storage/innobase/rem/rem0rec.c @@ -31,6 +31,9 @@ Created 5/30/1994 Heikki Tuuri #include "mtr0mtr.h" #include "mtr0log.h" +#ifdef WITH_WSREP +#include <ha_prototypes.h> +#endif /* WITH_WSREP */ /* PHYSICAL RECORD (OLD STYLE) =========================== @@ -1772,3 +1775,72 @@ rec_print( } } #endif /* !UNIV_HOTBACKUP */ +#ifdef WITH_WSREP +int +wsrep_rec_get_primary_key( + byte *buf, /* out: extracted key */ + ulint *buf_len, /* in/out: length of buf */ + const rec_t* rec, /* in: physical record */ + dict_index_t* index) /* in: record descriptor */ +{ + const byte* data; + ulint len; + ulint key_len = 0; + ulint i; + uint key_parts; + mem_heap_t* heap = NULL; + ulint offsets_[REC_OFFS_NORMAL_SIZE]; + + ut_ad(index); + key_parts = dict_index_get_n_unique_in_tree(index); + *offsets_ = (sizeof offsets_) / sizeof *offsets_; + + rec_get_offsets(rec, index, offsets_, ULINT_UNDEFINED, &heap); + if (UNIV_LIKELY_NULL(heap)) { + mem_heap_free(heap); + } + + ut_ad(rec_offs_validate(rec, NULL, offsets_)); + + ut_ad(rec); + + for (i = 0; i < key_parts; i++) { + dict_field_t* field = dict_index_get_nth_field(index, i); + const dict_col_t* col = dict_field_get_col(field); + + data = rec_get_nth_field(rec, offsets_, i, &len); + if (key_len + len > ((col->prtype & DATA_NOT_NULL) ? + *buf_len : *buf_len - 1)) { + fprintf (stderr, + "WSREP: FK key len exceeded %lu %lu %lu\n", + key_len, len, *buf_len); + goto err_out; + } + + if (len == UNIV_SQL_NULL) { + ut_a(!(col->prtype & DATA_NOT_NULL)); + *buf++ = 1; + key_len++; + } else { + if (!(col->prtype & DATA_NOT_NULL)) { + *buf++ = 0; + key_len++; + } + memcpy(buf, data, len); + wsrep_innobase_mysql_sort( + (int)(col->prtype & DATA_MYSQL_TYPE_MASK), + (uint)dtype_get_charset_coll(col->prtype), + buf, len); + key_len += len; + buf += len; + } + } + + rec_validate(rec, offsets_); + *buf_len = key_len; + return DB_SUCCESS; + + err_out: + return DB_ERROR; +} +#endif // WITH_WSREP diff --git a/storage/innobase/row/row0ins.c b/storage/innobase/row/row0ins.c index 67846ab6f69..19da93f55eb 100644 --- a/storage/innobase/row/row0ins.c +++ b/storage/innobase/row/row0ins.c @@ -752,6 +752,13 @@ row_ins_invalidate_query_cache( innobase_invalidate_query_cache(thr_get_trx(thr), buf, len); mem_free(buf); } +#ifdef WITH_WSREP +ulint wsrep_append_foreign_key(trx_t *trx, + dict_foreign_t* foreign, + const rec_t* clust_rec, + dict_index_t* clust_index, + ibool shared); +#endif /* WITH_WSREP */ /*********************************************************************//** Perform referential actions or checks when a parent row is deleted or updated @@ -1068,6 +1075,16 @@ row_ins_foreign_check_on_constraint( err = row_update_cascade_for_mysql(thr, cascade, foreign->foreign_table); +#ifdef WITH_WSREP + if (err == DB_SUCCESS) { + err = wsrep_append_foreign_key( + thr_get_trx(thr), + foreign, + clust_rec, + clust_index, + FALSE); + } +#endif /* WITH_WSREP */ if (foreign->foreign_table->n_foreign_key_checks_running == 0) { fprintf(stderr, "InnoDB: error: table %s has the counter 0" @@ -1397,7 +1414,14 @@ run_again: if (check_ref) { err = DB_SUCCESS; - +#ifdef WITH_WSREP + err = wsrep_append_foreign_key( + thr_get_trx(thr), + foreign, + rec, + check_index, + TRUE); +#endif /* WITH_WSREP */ goto end_scan; } else if (foreign->type != 0) { /* There is an ON UPDATE or ON DELETE @@ -1649,6 +1673,9 @@ row_ins_scan_sec_index_for_duplicate( dtuple_t* entry, /*!< in: index entry */ que_thr_t* thr) /*!< in: query thread */ { +#ifdef WITH_WSREP + trx_t* trx = thr_get_trx(thr); +#endif ulint n_unique; ulint i; int cmp; @@ -1702,7 +1729,14 @@ row_ins_scan_sec_index_for_duplicate( offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap); +#ifdef WITH_WSREP + /* slave applier must not get duplicate error */ + if (allow_duplicates || + (wsrep_on(trx->mysql_thd) && + wsrep_thd_is_brute_force(trx->mysql_thd))) { +#else if (allow_duplicates) { +#endif /* If the SQL-query will update or replace duplicate key we will take X-lock for @@ -1820,7 +1854,13 @@ row_ins_duplicate_error_in_clust( sure that in roll-forward we get the same duplicate errors as in original execution */ +#ifdef WITH_WSREP + if (trx->duplicates || + (wsrep_on(trx->mysql_thd) && + wsrep_thd_is_brute_force(trx->mysql_thd))) { +#else if (trx->duplicates) { +#endif /* If the SQL-query will update or replace duplicate key we will take X-lock for @@ -1864,7 +1904,13 @@ row_ins_duplicate_error_in_clust( offsets = rec_get_offsets(rec, cursor->index, offsets, ULINT_UNDEFINED, &heap); +#ifdef WITH_WSREP + if (trx->duplicates || + (wsrep_on(trx->mysql_thd) && + wsrep_thd_is_brute_force(trx->mysql_thd))) { +#else if (trx->duplicates) { +#endif /* If the SQL-query will update or replace duplicate key we will take X-lock for diff --git a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c index bda086f4778..04370e8fe13 100644 --- a/storage/innobase/srv/srv0srv.c +++ b/storage/innobase/srv/srv0srv.c @@ -86,6 +86,10 @@ Created 10/8/1995 Heikki Tuuri #include "mysql/plugin.h" #include "mysql/service_thd_wait.h" +#ifdef WITH_WSREP +extern int wsrep_debug; +extern int wsrep_trx_is_aborting(void *thd_ptr); +#endif /* The following counter is incremented whenever there is some user activity in the server */ UNIV_INTERN ulint srv_activity_count = 0; @@ -198,6 +202,10 @@ srv_printf_innodb_monitor() will request mutex acquisition with mutex_enter(), which will wait until it gets the mutex. */ #define MUTEX_NOWAIT(mutex_skipped) ((mutex_skipped) < MAX_MUTEX_NOWAIT) +#ifdef WITH_INNODB_DISALLOW_WRITES +UNIV_INTERN os_event_t srv_allow_writes_event; +#endif /* WITH_INNODB_DISALLOW_WRITES */ + /** The sort order table of the MySQL latin1_swedish_ci character set collation */ UNIV_INTERN const byte* srv_latin1_ordering; @@ -368,6 +376,9 @@ struct srv_conc_slot_struct{ free to proceed; but reserved may still be TRUE at that point */ +#ifdef WITH_WSREP + void *thd; /*!< to see priority */ +#endif UT_LIST_NODE_T(srv_conc_slot_t) srv_conc_queue; /*!< queue node */ }; @@ -1075,8 +1086,20 @@ srv_init(void) conc_slot->reserved = FALSE; conc_slot->event = os_event_create(NULL); ut_a(conc_slot->event); +#ifdef WITH_WSREP + conc_slot->thd = NULL; +#endif /* WITH_WSREP */ } +#ifdef WITH_INNODB_DISALLOW_WRITES + /* Writes have to be enabled on init or else we hang. Thus, we + always set the event here regardless of innobase_disallow_writes. + That flag will always be 0 at this point because it isn't settable + via my.cnf or command line arg. */ + srv_allow_writes_event = os_event_create(NULL); + os_event_set(srv_allow_writes_event); +#endif /* WITH_INNODB_DISALLOW_WRITES */ + /* Initialize some INFORMATION SCHEMA internal structures */ trx_i_s_cache_init(trx_i_s_cache); } @@ -1162,6 +1185,18 @@ srv_conc_enter_innodb( return; } +#ifdef WITH_WSREP + if (wsrep_on(trx->mysql_thd) && + wsrep_thd_is_brute_force(trx->mysql_thd)) { + srv_conc_force_enter_innodb(trx); + return; + } + if (wsrep_on(trx->mysql_thd) && + wsrep_trx_is_aborting(trx->mysql_thd)) { + srv_conc_force_enter_innodb(trx); + return; + } +#endif os_fast_mutex_lock(&srv_conc_mutex); retry: if (trx->declared_to_be_inside_innodb) { @@ -1254,6 +1289,9 @@ retry: /* Add to the queue */ slot->reserved = TRUE; slot->wait_ended = FALSE; +#ifdef WITH_WSREP + slot->thd = trx->mysql_thd; +#endif UT_LIST_ADD_LAST(srv_conc_queue, srv_conc_queue, slot); @@ -1286,6 +1324,9 @@ retry: incremented the thread counter on behalf of this thread */ slot->reserved = FALSE; +#ifdef WITH_WSREP + slot->thd = NULL; +#endif UT_LIST_REMOVE(srv_conc_queue, srv_conc_queue, slot); @@ -1356,6 +1397,9 @@ srv_conc_force_exit_innodb( trx->n_tickets_to_enter_innodb = 0; if (srv_conc_n_threads < (lint)srv_thread_concurrency) { +#ifdef WITH_WSREP + srv_conc_slot_t* wsrep_slot; +#endif /* Look for a slot where a thread is waiting and no other thread has yet released the thread */ @@ -1365,6 +1409,19 @@ srv_conc_force_exit_innodb( slot = UT_LIST_GET_NEXT(srv_conc_queue, slot); } +#ifdef WITH_WSREP + /* look for aborting trx, they must be released asap */ + wsrep_slot= slot; + while (wsrep_slot && (wsrep_slot->wait_ended == TRUE || + !wsrep_trx_is_aborting(wsrep_slot->thd))) { + wsrep_slot = UT_LIST_GET_NEXT(srv_conc_queue, wsrep_slot); + } + if (wsrep_slot) { + slot = wsrep_slot; + if (wsrep_debug) + fprintf(stderr, "WSREP: releasing aborting thd\n"); + } +#endif if (slot != NULL) { slot->wait_ended = TRUE; @@ -1737,7 +1794,20 @@ srv_suspend_mysql_thread( if (lock_wait_timeout < 100000000 && wait_time > (double) lock_wait_timeout) { +#ifdef WITH_WSREP + if (wsrep_on(trx->mysql_thd) && + wsrep_thd_is_brute_force(trx->mysql_thd)) { + fprintf(stderr, + "WSREP: BF long lock wait ended after %.f sec\n", + wait_time); + srv_print_innodb_monitor = FALSE; + srv_print_innodb_lock_monitor = FALSE; + } else { +#endif trx->error_state = DB_LOCK_WAIT_TIMEOUT; +#ifdef WITH_WSREP + } +#endif } if (trx_is_interrupted(trx)) { @@ -2256,6 +2326,27 @@ exit_func: OS_THREAD_DUMMY_RETURN; } +#ifdef WITH_WSREP +/*********************************************************************//** +check if lock timeout was for priority thread, +as a side effect trigger lock monitor +@return false for regular lock timeout */ +static ibool +wsrep_is_BF_lock_timeout( +/*====================*/ + srv_slot_t* slot) /* in: lock slot to check for lock priority */ +{ + if (wsrep_on(thr_get_trx(slot->thr)->mysql_thd) && + wsrep_thd_is_brute_force((thr_get_trx(slot->thr))->mysql_thd)) { + fprintf(stderr, "WSREP: BF lock wait long\n"); + srv_print_innodb_monitor = TRUE; + srv_print_innodb_lock_monitor = TRUE; + os_event_set(srv_lock_timeout_thread_event); + return TRUE; + } + return FALSE; + } +#endif /* WITH_WSREP */ /*********************************************************************//** A thread which wakes up threads whose lock wait may have lasted too long. @return a dummy parameter */ @@ -2324,8 +2415,14 @@ loop: granted: in that case do nothing */ if (trx->wait_lock) { +#ifdef WITH_WSREP + if (!wsrep_is_BF_lock_timeout(slot)) { +#endif lock_cancel_waiting_and_release( trx->wait_lock); +#ifdef WITH_WSREP + } +#endif } } } @@ -2442,7 +2539,20 @@ loop: if (sync_array_print_long_waits(&waiter, &sema) && sema == old_sema && os_thread_eq(waiter, old_waiter)) { +#if defined(WITH_WSREP) && defined(WITH_INNODB_DISALLOW_WRITES) + if (srv_allow_writes_event->is_set) { +#endif /* WITH_WSREP */ fatal_cnt++; +#if defined(WITH_WSREP) && defined(WITH_INNODB_DISALLOW_WRITES) + } else { + fprintf(stderr, + "WSREP: avoiding InnoDB self crash due to long " + "semaphore wait of > %lu seconds\n" + "Server is processing SST donor operation, " + "fatal_cnt now: %lu", + (ulong) srv_fatal_semaphore_wait_threshold, fatal_cnt); + } +#endif /* WITH_WSREP */ if (fatal_cnt > 10) { fprintf(stderr, diff --git a/storage/innobase/trx/trx0roll.c b/storage/innobase/trx/trx0roll.c index b55471959ce..3db63469cbe 100644 --- a/storage/innobase/trx/trx0roll.c +++ b/storage/innobase/trx/trx0roll.c @@ -42,6 +42,9 @@ Created 3/26/1996 Heikki Tuuri #include "row0mysql.h" #include "lock0lock.h" #include "pars0pars.h" +#ifdef WITH_WSREP +#include "ha_prototypes.h" +#endif /* WITH_WSREP */ /** This many pages must be undone before a truncate is tried within rollback */ @@ -147,6 +150,12 @@ trx_rollback_for_mysql( trx->op_info = ""; +#ifdef WITH_WSREP + if (wsrep_on(trx->mysql_thd) && + trx->was_chosen_as_deadlock_victim) { + trx->was_chosen_as_deadlock_victim = FALSE; + } +#endif return(err); } @@ -174,6 +183,12 @@ trx_rollback_last_sql_stat_for_mysql( trx->op_info = ""; +#ifdef WITH_WSREP + if (wsrep_on(trx->mysql_thd) && + trx->was_chosen_as_deadlock_victim) { + trx->was_chosen_as_deadlock_victim = FALSE; + } +#endif return(err); } @@ -1123,6 +1138,12 @@ trx_rollback( srv_que_task_enqueue_low(thr); /* srv_que_task_enqueue_low(thr2); */ } +#ifdef WITH_WSREP + if (wsrep_on(trx->mysql_thd) && + trx->was_chosen_as_deadlock_victim) { + trx->was_chosen_as_deadlock_victim = FALSE; + } +#endif } /****************************************************************//** @@ -1281,6 +1302,12 @@ trx_finish_rollback_off_kernel( sig = next_sig; } +#ifdef WITH_WSREP + if (wsrep_on(trx->mysql_thd) && + trx->was_chosen_as_deadlock_victim) { + trx->was_chosen_as_deadlock_victim = FALSE; + } +#endif } /*********************************************************************//** diff --git a/storage/innobase/trx/trx0sys.c b/storage/innobase/trx/trx0sys.c index c0283448d10..5d8c967e323 100644 --- a/storage/innobase/trx/trx0sys.c +++ b/storage/innobase/trx/trx0sys.c @@ -44,6 +44,10 @@ Created 3/26/1996 Heikki Tuuri #include "os0file.h" #include "read0read.h" +#ifdef WITH_WSREP +#include "ha_prototypes.h" /* wsrep_is_wsrep_xid() */ +#endif /* */ + /** The file format tag structure with id and name. */ struct file_format_struct { ulint id; /*!< id of the file format */ @@ -791,6 +795,89 @@ trx_sys_print_mysql_binlog_offset(void) mtr_commit(&mtr); } +#ifdef WITH_WSREP + +void +trx_sys_update_wsrep_checkpoint( + const XID* xid, /*!< in: transaction XID */ + mtr_t* mtr) /*!< in: mtr */ +{ + trx_sysf_t* sys_header; + + ut_ad(xid && mtr); + ut_a(xid->formatID == -1 || wsrep_is_wsrep_xid(xid)); + + sys_header = trx_sysf_get(mtr); + if (mach_read_from_4(sys_header + TRX_SYS_WSREP_XID_INFO + + TRX_SYS_WSREP_XID_MAGIC_N_FLD) + != TRX_SYS_WSREP_XID_MAGIC_N) { + mlog_write_ulint(sys_header + TRX_SYS_WSREP_XID_INFO + + TRX_SYS_WSREP_XID_MAGIC_N_FLD, + TRX_SYS_WSREP_XID_MAGIC_N, + MLOG_4BYTES, mtr); + } + + mlog_write_ulint(sys_header + TRX_SYS_WSREP_XID_INFO + + TRX_SYS_WSREP_XID_FORMAT, + (int)xid->formatID, + MLOG_4BYTES, mtr); + mlog_write_ulint(sys_header + TRX_SYS_WSREP_XID_INFO + + TRX_SYS_WSREP_XID_GTRID_LEN, + (int)xid->gtrid_length, + MLOG_4BYTES, mtr); + mlog_write_ulint(sys_header + TRX_SYS_WSREP_XID_INFO + + TRX_SYS_WSREP_XID_BQUAL_LEN, + (int)xid->bqual_length, + MLOG_4BYTES, mtr); + mlog_write_string(sys_header + TRX_SYS_WSREP_XID_INFO + + TRX_SYS_WSREP_XID_DATA, + (const unsigned char*) xid->data, + XIDDATASIZE, mtr); + +} + +void +trx_sys_read_wsrep_checkpoint(XID* xid) +/*===================================*/ +{ + trx_sysf_t* sys_header; + mtr_t mtr; + ulint magic; + + ut_ad(xid); + + mtr_start(&mtr); + + sys_header = trx_sysf_get(&mtr); + + if ((magic = mach_read_from_4(sys_header + TRX_SYS_WSREP_XID_INFO + + TRX_SYS_WSREP_XID_MAGIC_N_FLD)) + != TRX_SYS_WSREP_XID_MAGIC_N) { + memset(xid, 0, sizeof(*xid)); + xid->formatID = -1; + trx_sys_update_wsrep_checkpoint(xid, &mtr); + mtr_commit(&mtr); + return; + } + + xid->formatID = (int)mach_read_from_4( + sys_header + + TRX_SYS_WSREP_XID_INFO + TRX_SYS_WSREP_XID_FORMAT); + xid->gtrid_length = (int)mach_read_from_4( + sys_header + + TRX_SYS_WSREP_XID_INFO + TRX_SYS_WSREP_XID_GTRID_LEN); + xid->bqual_length = (int)mach_read_from_4( + sys_header + + TRX_SYS_WSREP_XID_INFO + TRX_SYS_WSREP_XID_BQUAL_LEN); + ut_memcpy(xid->data, + sys_header + TRX_SYS_WSREP_XID_INFO + TRX_SYS_WSREP_XID_DATA, + XIDDATASIZE); + + mtr_commit(&mtr); +} + +#endif /* WITH_WSREP */ + /*****************************************************************//** Prints to stderr the MySQL master log offset info in the trx system header if the magic number shows it valid. */ diff --git a/storage/innobase/trx/trx0trx.c b/storage/innobase/trx/trx0trx.c index ab7677b5b35..82f54b8295a 100644 --- a/storage/innobase/trx/trx0trx.c +++ b/storage/innobase/trx/trx0trx.c @@ -714,6 +714,11 @@ trx_start_low( trx->id = trx_sys_get_new_trx_id(); +#ifdef WITH_WSREP + memset(&trx->xid, 0, sizeof(trx->xid)); + trx->xid.formatID = -1; +#endif /* WITH_WSREP */ + /* The initial value for trx->no: IB_ULONGLONG_MAX is used in read_view_open_now: */ @@ -867,6 +872,14 @@ trx_write_serialisation_history( mutex_exit(&rseg->mutex); +#ifdef WITH_WSREP + /* Update latest MySQL wsrep XID in trx sys header. */ + if (wsrep_is_wsrep_xid(&trx->xid)) + { + trx_sys_update_wsrep_checkpoint(&trx->xid, &mtr); + } +#endif /* WITH_WSREP */ + /* Update the latest MySQL binlog name and offset info in trx sys header if MySQL binlogging is on or the database server is a MySQL replication slave */ @@ -1064,6 +1077,12 @@ trx_commit_off_kernel( ut_ad(UT_LIST_GET_LEN(trx->wait_thrs) == 0); ut_ad(UT_LIST_GET_LEN(trx->trx_locks) == 0); +#ifdef WITH_WSREP + if (wsrep_on(trx->mysql_thd) && + trx->was_chosen_as_deadlock_victim) { + trx->was_chosen_as_deadlock_victim = FALSE; + } +#endif UT_LIST_REMOVE(trx_list, trx_sys->trx_list, trx); } diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc index 7a51ecd8d2d..ae16bb24adf 100644 --- a/storage/xtradb/handler/ha_innodb.cc +++ b/storage/xtradb/handler/ha_innodb.cc @@ -97,6 +97,12 @@ extern "C" { #include "ha_prototypes.h" #include "ut0mem.h" #include "ibuf0ibuf.h" +#ifdef WITH_WSREP +#include "../storage/innobase/include/ut0byte.h" +#ifndef EXTRA_DEBUG + //#include "../storage/innobase/include/ut0byte.ic" +#endif /* EXTRA_DEBUG */ +#endif /* WITH_WSREP */ } #include "ha_innodb.h" @@ -114,6 +120,32 @@ extern ib_int64_t trx_sys_mysql_relay_log_pos; # define MYSQL_PLUGIN_IMPORT /* nothing */ # endif /* MYSQL_PLUGIN_IMPORT */ +#ifdef WITH_WSREP +#include <wsrep_mysqld.h> +#include <my_md5.h> +extern my_bool wsrep_certify_nonPK; +class binlog_trx_data; +extern handlerton *binlog_hton; + +extern MYSQL_PLUGIN_IMPORT MYSQL_BIN_LOG mysql_bin_log; +extern MYSQL_PLUGIN_IMPORT mysql_mutex_t LOCK_wsrep_rollback; +extern MYSQL_PLUGIN_IMPORT mysql_cond_t COND_wsrep_rollback; +extern MYSQL_PLUGIN_IMPORT wsrep_aborting_thd_t wsrep_aborting_thd; + +static inline wsrep_trx_handle_t* +wsrep_trx_handle(THD* thd, const trx_t* trx) { + return wsrep_trx_handle_for_id(wsrep_thd_trx_handle(thd), + (wsrep_trx_id_t)trx->id); +} + +extern bool wsrep_prepare_key_for_innodb(const uchar *cache_key, + size_t cache_key_len, + const uchar* row_id, + size_t row_id_len, + wsrep_key_part_t* key, + size_t* key_len); + +#endif /* WITH_WSREP */ /** to protect innobase_open_files */ static mysql_mutex_t innobase_share_mutex; static ulong commit_threads = 0; @@ -1018,6 +1050,15 @@ thd_to_trx( { return(*(trx_t**) thd_ha_data(thd, innodb_hton_ptr)); } +#ifdef WITH_WSREP +ulonglong +thd_to_trx_id( +/*=======*/ + THD* thd) /*!< in: MySQL thread */ +{ + return(thd_to_trx(thd)->id); +} +#endif /********************************************************************//** Call this function when mysqld passes control to the client. That is to @@ -1048,6 +1089,13 @@ innobase_release_temporary_latches( return(0); } +#ifdef WITH_WSREP +static int +wsrep_abort_transaction(handlerton* hton, THD *bf_thd, THD *victim_thd, + my_bool signal); +static int innobase_wsrep_set_checkpoint(handlerton* hton, const XID* xid); +static int innobase_wsrep_get_checkpoint(handlerton* hton, XID* xid); +#endif /********************************************************************//** Increments innobase_active_counter and every INNOBASE_WAKE_INTERVALth time calls srv_active_wake_master_thread. This function should be used @@ -1497,6 +1545,9 @@ int innobase_mysql_tmpfile(void) /*========================*/ { +#ifdef WITH_INNODB_DISALLOW_WRITES + os_event_wait(srv_allow_writes_event); +#endif /* WITH_INNODB_DISALLOW_WRITES */ int fd2 = -1; File fd = mysql_tmpfile("ib"); if (fd >= 0) { @@ -2459,6 +2510,11 @@ innobase_init( innobase_hton->flags=HTON_NO_FLAGS; innobase_hton->release_temporary_latches=innobase_release_temporary_latches; innobase_hton->alter_table_flags = innobase_alter_table_flags; +#ifdef WITH_WSREP + innobase_hton->wsrep_abort_transaction=wsrep_abort_transaction; + innobase_hton->wsrep_set_checkpoint=innobase_wsrep_set_checkpoint; + innobase_hton->wsrep_get_checkpoint=innobase_wsrep_get_checkpoint; +#endif /* WITH_WSREP */ ut_a(DATA_MYSQL_TRUE_VARCHAR == (ulint)MYSQL_TYPE_VARCHAR); @@ -3146,6 +3202,27 @@ innobase_commit_low( trx_commit_for_mysql(trx); } +#ifdef WITH_WSREP + THD* thd = (THD*)trx->mysql_thd; + const char* tmp = 0; + if (wsrep_on((void*)thd)) { +#ifdef WSREP_PROC_INFO + char info[64]; + info[sizeof(info) - 1] = '\0'; + snprintf(info, sizeof(info) - 1, + "innobase_commit_low():trx_commit_for_mysql(%lld)", + (long long) wsrep_thd_trx_seqno(thd)); + tmp = thd_proc_info(thd, info); + +#else + tmp = thd_proc_info(thd, "innobase_commit_low()"); +#endif /* WSREP_PROC_INFO */ + } +#endif /* WITH_WSREP */ + trx_commit_for_mysql(trx); +#ifdef WITH_WSREP + if (wsrep_on((void*)thd)) { thd_proc_info(thd, tmp); } +#endif /* WITH_WSREP */ } /*****************************************************************//** @@ -3765,7 +3842,11 @@ ha_innobase::max_supported_key_length() const therefore set to slightly less than 1 / 4 of page size which is 16 kB; but currently MySQL does not work with keys whose size is > MAX_KEY_LENGTH */ +#ifdef WITH_WSREP + return(3500); +#else return(3500); +#endif } /****************************************************************//** @@ -4694,7 +4775,96 @@ innobase_mysql_cmp( return(0); } +#ifdef WITH_WSREP +extern "C" UNIV_INTERN +void +wsrep_innobase_mysql_sort( +/*===============*/ + /* out: str contains sort string */ + int mysql_type, /* in: MySQL type */ + uint charset_number, /* in: number of the charset */ + unsigned char* str, /* in: data field */ + unsigned int str_length) /* in: data field length, + not UNIV_SQL_NULL */ +{ + CHARSET_INFO* charset; + enum_field_types mysql_tp; + + DBUG_ASSERT(str_length != UNIV_SQL_NULL); + mysql_tp = (enum_field_types) mysql_type; + + switch (mysql_tp) { + + case MYSQL_TYPE_BIT: + case MYSQL_TYPE_STRING: + case MYSQL_TYPE_VAR_STRING: + case MYSQL_TYPE_TINY_BLOB: + case MYSQL_TYPE_MEDIUM_BLOB: + case MYSQL_TYPE_BLOB: + case MYSQL_TYPE_LONG_BLOB: + case MYSQL_TYPE_VARCHAR: + { + uchar tmp_str[REC_VERSION_56_MAX_INDEX_COL_LEN]; + uint tmp_length = REC_VERSION_56_MAX_INDEX_COL_LEN; + + /* Use the charset number to pick the right charset struct for + the comparison. Since the MySQL function get_charset may be + slow before Bar removes the mutex operation there, we first + look at 2 common charsets directly. */ + + if (charset_number == default_charset_info->number) { + charset = default_charset_info; + } else if (charset_number == my_charset_latin1.number) { + charset = &my_charset_latin1; + } else { + charset = get_charset(charset_number, MYF(MY_WME)); + + if (charset == NULL) { + sql_print_error("InnoDB needs charset %lu for doing " + "a comparison, but MySQL cannot " + "find that charset.", + (ulong) charset_number); + ut_a(0); + } + } + + ut_a(str_length <= tmp_length); + memcpy(tmp_str, str, str_length); + + tmp_length = charset->coll->strnxfrm(charset, str, str_length, + tmp_str, tmp_length); + DBUG_ASSERT(tmp_length == str_length); + + break; + } + case MYSQL_TYPE_DECIMAL : + case MYSQL_TYPE_TINY : + case MYSQL_TYPE_SHORT : + case MYSQL_TYPE_LONG : + case MYSQL_TYPE_FLOAT : + case MYSQL_TYPE_DOUBLE : + case MYSQL_TYPE_NULL : + case MYSQL_TYPE_TIMESTAMP : + case MYSQL_TYPE_LONGLONG : + case MYSQL_TYPE_INT24 : + case MYSQL_TYPE_DATE : + case MYSQL_TYPE_TIME : + case MYSQL_TYPE_DATETIME : + case MYSQL_TYPE_YEAR : + case MYSQL_TYPE_NEWDATE : + case MYSQL_TYPE_NEWDECIMAL : + case MYSQL_TYPE_ENUM : + case MYSQL_TYPE_SET : + case MYSQL_TYPE_GEOMETRY : + break; + default: + break; + } + + return; +} +#endif // WITH_WSREP /**************************************************************//** Converts a MySQL type to an InnoDB type. Note that this function returns the 'mtype' of InnoDB. InnoDB differentiates between MySQL's old <= 4.1 @@ -4848,6 +5018,256 @@ innobase_read_from_2_little_endian( /*******************************************************************//** Stores a key value for a row to a buffer. @return key value length as stored in buff */ +#ifdef WITH_WSREP +UNIV_INTERN +uint +wsrep_store_key_val_for_row( +/*===============================*/ + TABLE* table, + uint keynr, /*!< in: key number */ + char* buff, /*!< in/out: buffer for the key value (in MySQL + format) */ + uint buff_len,/*!< in: buffer length */ + const uchar* record, + ibool* key_is_null)/*!< out: full key was null */ +{ + KEY* key_info = table->key_info + keynr; + KEY_PART_INFO* key_part = key_info->key_part; + KEY_PART_INFO* end = key_part + key_info->key_parts; + char* buff_start = buff; + enum_field_types mysql_type; + Field* field; + + DBUG_ENTER("store_key_val_for_row"); + + bzero(buff, buff_len); + *key_is_null = TRUE; + + for (; key_part != end; key_part++) { + uchar sorted[REC_VERSION_56_MAX_INDEX_COL_LEN] = {'\0'}; + ibool part_is_null = FALSE; + + if (key_part->null_bit) { + if (record[key_part->null_offset] & + key_part->null_bit) { + *buff = 1; + part_is_null = TRUE; + } else { + *buff = 0; + } + buff++; + } + if (!part_is_null) *key_is_null = FALSE; + + field = key_part->field; + mysql_type = field->type(); + + if (mysql_type == MYSQL_TYPE_VARCHAR) { + /* >= 5.0.3 true VARCHAR */ + ulint lenlen; + ulint len; + const byte* data; + ulint key_len; + ulint true_len; + CHARSET_INFO* cs; + int error=0; + + key_len = key_part->length; + + if (part_is_null) { + buff += key_len + 2; + + continue; + } + cs = field->charset(); + + lenlen = (ulint) + (((Field_varstring*)field)->length_bytes); + + data = row_mysql_read_true_varchar(&len, + (byte*) (record + + (ulint)get_field_offset(table, field)), + lenlen); + + true_len = len; + + /* For multi byte character sets we need to calculate + the true length of the key */ + + if (len > 0 && cs->mbmaxlen > 1) { + true_len = (ulint) cs->cset->well_formed_len(cs, + (const char *) data, + (const char *) data + len, + (uint) (key_len / + cs->mbmaxlen), + &error); + } + + /* In a column prefix index, we may need to truncate + the stored value: */ + + if (true_len > key_len) { + true_len = key_len; + } + + memcpy(sorted, data, true_len); + wsrep_innobase_mysql_sort( + mysql_type, cs->number, sorted, true_len); + + /* Note that we always reserve the maximum possible + length of the true VARCHAR in the key value, though + only len first bytes after the 2 length bytes contain + actual data. The rest of the space was reset to zero + in the bzero() call above. */ + + buff += key_len; + + } else if (mysql_type == MYSQL_TYPE_TINY_BLOB + || mysql_type == MYSQL_TYPE_MEDIUM_BLOB + || mysql_type == MYSQL_TYPE_BLOB + || mysql_type == MYSQL_TYPE_LONG_BLOB + /* MYSQL_TYPE_GEOMETRY data is treated + as BLOB data in innodb. */ + || mysql_type == MYSQL_TYPE_GEOMETRY) { + + CHARSET_INFO* cs; + ulint key_len; + ulint true_len; + int error=0; + ulint blob_len; + const byte* blob_data; + + ut_a(key_part->key_part_flag & HA_PART_KEY_SEG); + + key_len = key_part->length; + + if (part_is_null) { + buff += key_len + 2; + + continue; + } + + cs = field->charset(); + + blob_data = row_mysql_read_blob_ref(&blob_len, + (byte*) (record + + (ulint)get_field_offset(table, field)), + (ulint) field->pack_length()); + + true_len = blob_len; + + ut_a(get_field_offset(table, field) + == key_part->offset); + + /* For multi byte character sets we need to calculate + the true length of the key */ + + if (blob_len > 0 && cs->mbmaxlen > 1) { + true_len = (ulint) cs->cset->well_formed_len(cs, + (const char *) blob_data, + (const char *) blob_data + + blob_len, + (uint) (key_len / + cs->mbmaxlen), + &error); + } + + /* All indexes on BLOB and TEXT are column prefix + indexes, and we may need to truncate the data to be + stored in the key value: */ + + if (true_len > key_len) { + true_len = key_len; + } + + memcpy(sorted, blob_data, true_len); + wsrep_innobase_mysql_sort( + mysql_type, cs->number, sorted, true_len); + + memcpy(buff, sorted, true_len); + + /* Note that we always reserve the maximum possible + length of the BLOB prefix in the key value. */ + + buff += key_len; + } else { + /* Here we handle all other data types except the + true VARCHAR, BLOB and TEXT. Note that the column + value we store may be also in a column prefix + index. */ + + CHARSET_INFO* cs; + ulint true_len; + ulint key_len; + const uchar* src_start; + int error=0; + enum_field_types real_type; + + key_len = key_part->length; + + if (part_is_null) { + buff += key_len; + + continue; + } + + src_start = record + key_part->offset; + real_type = field->real_type(); + true_len = key_len; + + /* Character set for the field is defined only + to fields whose type is string and real field + type is not enum or set. For these fields check + if character set is multi byte. */ + + if (real_type != MYSQL_TYPE_ENUM + && real_type != MYSQL_TYPE_SET + && ( mysql_type == MYSQL_TYPE_VAR_STRING + || mysql_type == MYSQL_TYPE_STRING)) { + + cs = field->charset(); + + /* For multi byte character sets we need to + calculate the true length of the key */ + + if (key_len > 0 && cs->mbmaxlen > 1) { + + true_len = (ulint) + cs->cset->well_formed_len(cs, + (const char *)src_start, + (const char *)src_start + + key_len, + (uint) (key_len / + cs->mbmaxlen), + &error); + } + memcpy(sorted, src_start, true_len); + wsrep_innobase_mysql_sort( + mysql_type, cs->number, sorted, true_len); + memcpy(buff, sorted, true_len); + } else { + memcpy(buff, src_start, true_len); + } + buff += true_len; + + /* Pad the unused space with spaces. */ + + if (true_len < key_len) { + ulint pad_len = key_len - true_len; + ut_a(!(pad_len % cs->mbminlen)); + + cs->cset->fill(cs, buff, pad_len, + 0x20 /* space */); + buff += pad_len; + } + } + } + + ut_a(buff <= buff_start + buff_len); + + DBUG_RETURN((uint)(buff - buff_start)); +} +#endif /* WITH_WSREP */ UNIV_INTERN uint ha_innobase::store_key_val_for_row( @@ -5658,6 +6078,9 @@ ha_innobase::write_row( ulint error = 0; int error_result= 0; ibool auto_inc_used= FALSE; +#ifdef WITH_WSREP + ibool auto_inc_inserted= FALSE; /* if NULL was inserted */ +#endif ulint sql_command; trx_t* trx = thd_to_trx(user_thd); @@ -5692,8 +6115,14 @@ ha_innobase::write_row( if ((sql_command == SQLCOM_ALTER_TABLE || sql_command == SQLCOM_OPTIMIZE || sql_command == SQLCOM_CREATE_INDEX +#ifdef WITH_WSREP + || (wsrep_on(user_thd) && sql_command == SQLCOM_LOAD) +#endif /* WITH_WSREP */ || sql_command == SQLCOM_DROP_INDEX) && num_write_row >= 10000) { +#ifdef WITH_WSREP + WSREP_DEBUG("forced commit: %s", wsrep_thd_query(user_thd)); +#endif /* WITH_WSREP */ /* ALTER TABLE is COMMITted at every 10000 copied rows. The IX table lock for the original table has to be re-issued. As this method will be called on a temporary table where the @@ -5765,7 +6194,9 @@ no_commit: /* Reset the error code before calling innobase_get_auto_increment(). */ prebuilt->autoinc_error = DB_SUCCESS; - +#ifdef WITH_WSREP + auto_inc_inserted= (table->next_number_field->val_int() == 0); +#endif if ((error = update_auto_increment())) { /* We don't want to mask autoinc overflow errors. */ @@ -5849,6 +6280,30 @@ no_commit: case SQLCOM_REPLACE_SELECT: goto set_max_autoinc; +#ifdef WITH_WSREP + /* workaround for LP bug #355000, retrying the insert */ + case SQLCOM_INSERT: + if (wsrep_on(current_thd) && + auto_inc_inserted && + wsrep_drupal_282555_workaround && + !thd_test_options(current_thd, + OPTION_NOT_AUTOCOMMIT | + OPTION_BEGIN)) { + WSREP_DEBUG( + "retrying insert: %s", + (*wsrep_thd_query(current_thd)) ? + wsrep_thd_query(current_thd) : + (char *)"void"); + error= DB_SUCCESS; + wsrep_thd_set_conflict_state( + current_thd, MUST_ABORT); + innodb_srv_conc_exit_innodb(prebuilt->trx); + /* jump straight to func exit over + * later wsrep hooks */ + goto func_exit; + } + break; +#endif default: break; } @@ -5896,6 +6351,20 @@ report_error: error_result = convert_error_code_to_mysql((int) error, prebuilt->table->flags, user_thd); +#ifdef WITH_WSREP + if (!error_result && wsrep_thd_exec_mode(user_thd) == LOCAL_STATE && + wsrep_on(user_thd) && !wsrep_consistency_check(user_thd) && + (sql_command != SQLCOM_LOAD || + thd_binlog_format(user_thd) == BINLOG_FORMAT_ROW)) { + + if (wsrep_append_keys(user_thd, false, record, NULL)) { + DBUG_PRINT("wsrep", ("row key failed")); + error_result = HA_ERR_INTERNAL_ERROR; + goto wsrep_error; + } + } +wsrep_error: +#endif func_exit: innobase_active_small(); @@ -6181,6 +6650,20 @@ ha_innobase::update_row( DBUG_RETURN(HA_ERR_CRASHED); } +#ifdef WITH_WSREP + if (!error && wsrep_thd_exec_mode(user_thd) == LOCAL_STATE && + wsrep_on(user_thd)) { + + DBUG_PRINT("wsrep", ("update row key")); + + if (wsrep_append_keys(user_thd, false, old_row, new_row)) { + DBUG_PRINT("wsrep", ("row key failed")); + error = HA_ERR_INTERNAL_ERROR; + goto wsrep_error; + } + } +wsrep_error: +#endif DBUG_RETURN(error); } @@ -6236,6 +6719,18 @@ ha_innobase::delete_row( DBUG_RETURN(HA_ERR_CRASHED); } +#ifdef WITH_WSREP + if (!error && wsrep_thd_exec_mode(user_thd) == LOCAL_STATE && + wsrep_on(user_thd)) { + + if (wsrep_append_keys(user_thd, false, record, NULL)) { + DBUG_PRINT("wsrep", ("delete fail")); + error = HA_ERR_INTERNAL_ERROR; + goto wsrep_error; + } + } +wsrep_error: +#endif DBUG_RETURN(error); } @@ -7054,7 +7549,240 @@ ha_innobase::rnd_pos( DBUG_RETURN(error); } +#ifdef WITH_WSREP +extern "C" { +ulint +wsrep_append_foreign_key( +/*===========================*/ + trx_t* trx, /*!< in: trx */ + dict_foreign_t* foreign, /*!< in: foreign key constraint */ + const rec_t* clust_rec, /*!<in: clustered index record */ + dict_index_t* clust_index, /*!<in: clustered index */ + ibool shared) /*!<in: is shared access */ +{ + THD* thd = (THD*)trx->mysql_thd; + ulint rcode = DB_SUCCESS; + char cache_key[512] = {'\0'}; + + if (!wsrep_on(trx->mysql_thd) || + wsrep_thd_exec_mode(thd) != LOCAL_STATE) + return DB_SUCCESS; + + byte key[WSREP_MAX_SUPPORTED_KEY_LENGTH+1]; + ulint len = WSREP_MAX_SUPPORTED_KEY_LENGTH; + + if (!dict_index_is_clust(clust_index)) { + WSREP_ERROR("clustered index not passed for FK append"); + return DB_ERROR; + } + + key[0] = '\0'; + rcode = wsrep_rec_get_primary_key( + &key[1], &len, clust_rec, clust_index); + if (rcode != DB_SUCCESS) { + WSREP_ERROR("FK key set failed: %lu", rcode); + return rcode; + } +#ifdef WSREP_DEBUG_PRINT + ulint i; + fprintf(stderr, "FK parent key, len: %lu ", len+1); + for (i=0; i<len+1; i++) { + fprintf(stderr, " (%X), ", key[i]); + } + fprintf(stderr, "\n"); +#endif + strncpy(cache_key, foreign->foreign_table->name, 512); + char *p = strchr(cache_key, '/'); + if (p) { + *p = '\0'; + } else { + WSREP_WARN("unexpected foreign key table %s", + foreign->foreign_table->name); + } + + wsrep_key_part_t wkey_part[3]; + wsrep_key_t wkey = {wkey_part, 3}; + if (!wsrep_prepare_key_for_innodb( + (const uchar*)cache_key, + strlen(foreign->foreign_table->name) + 1, + (const uchar*)key, len+1, + wkey_part, + &wkey.key_parts_len)) { + WSREP_WARN("key prepare failed for cascaded FK: %s", + (wsrep_thd_query(thd)) ? + wsrep_thd_query(thd) : "void"); + return DB_ERROR; + } + rcode = (int)wsrep->append_key( + wsrep, + wsrep_trx_handle(thd, trx), + &wkey, + 1, + shared); + if (rcode) { + DBUG_PRINT("wsrep", ("row key failed: %lu", rcode)); + WSREP_ERROR("Appending cascaded fk row key failed: %s, %lu", + (wsrep_thd_query(thd)) ? + wsrep_thd_query(thd) : "void", rcode); + return DB_ERROR; + } + + return DB_SUCCESS; +} +} + +static int +wsrep_append_key( +/*==================*/ + THD *thd, + trx_t *trx, + TABLE_SHARE *table_share, + TABLE *table, + const char* key, + uint16_t key_len, + bool shared +) +{ + DBUG_ENTER("wsrep_append_key"); +#ifdef WSREP_DEBUG_PRINT + fprintf(stderr, "%s conn %ld, trx %llu, keylen %d, table %s ", + (shared) ? "Shared" : "Exclusive", + wsrep_thd_thread_id(thd), trx->id, key_len, + table_share->table_name.str); + for (int i=0; i<key_len; i++) { + fprintf(stderr, "%hhX, ", key[i]); + } + fprintf(stderr, "\n"); +#endif + wsrep_key_part_t wkey_part[3]; + wsrep_key_t wkey = {wkey_part, 3}; + if (!wsrep_prepare_key_for_innodb( + (const uchar*)table_share->table_cache_key.str, + table_share->table_cache_key.length, + (const uchar*)key, key_len, + wkey_part, + &wkey.key_parts_len)) { + WSREP_WARN("key prepare failed for: %s", + (wsrep_thd_query(thd)) ? + wsrep_thd_query(thd) : "void"); + DBUG_RETURN(HA_ERR_INTERNAL_ERROR); + } + + int rcode = (int)wsrep->append_key( + wsrep, + wsrep_trx_handle(thd, trx), + &wkey, + 1, + shared); + if (rcode) { + DBUG_PRINT("wsrep", ("row key failed: %d", rcode)); + WSREP_WARN("Appending row key failed: %s, %d", + (wsrep_thd_query(thd)) ? + wsrep_thd_query(thd) : "void", rcode); + DBUG_RETURN(rcode); + } + DBUG_RETURN(0); +} +int +ha_innobase::wsrep_append_keys( +/*==================*/ + THD *thd, + bool shared, + const uchar* record0, /* in: row in MySQL format */ + const uchar* record1) /* in: row in MySQL format */ +{ + DBUG_ENTER("wsrep_append_keys"); + trx_t *trx = thd_to_trx(thd); + + /* if no PK, calculate hash of full row, to be the key value */ + if (prebuilt->clust_index_was_generated && wsrep_certify_nonPK) { + uchar digest[16]; + int rcode; + + MY_MD5_HASH(digest, (uchar *)record0, table->s->reclength); + if ((rcode = wsrep_append_key(thd, trx, table_share, table, + (const char*) digest, 16, + shared))) { + DBUG_RETURN(rcode); + } + if (record1) { + MY_MD5_HASH(digest, (uchar *)record1, table->s->reclength); + if ((rcode = wsrep_append_key(thd, trx, table_share, + table, + (const char*) digest, + 16, shared))) { + DBUG_RETURN(rcode); + } + } + } else if (wsrep_protocol_version == 0) { + uint len; + char keyval[WSREP_MAX_SUPPORTED_KEY_LENGTH+1] = {'\0'}; + char *key = &keyval[0]; + KEY *key_info = table->key_info; + ibool is_null; + + len = wsrep_store_key_val_for_row( + table, 0, key, key_info->key_length, record0, &is_null); + if (!is_null) { + int rcode = wsrep_append_key( + thd, trx, table_share, table, keyval, + len, shared); + if (rcode) DBUG_RETURN(rcode); + } + else + { + WSREP_DEBUG("NULL key skipped (proto 0): %s", + wsrep_thd_query(thd)); + } + } else { + ut_a(table->s->keys <= 256); + uint i; + for (i=0; i<table->s->keys; ++i) { + uint len; + char keyval0[WSREP_MAX_SUPPORTED_KEY_LENGTH+1] = {'\0'}; + char keyval1[WSREP_MAX_SUPPORTED_KEY_LENGTH+1] = {'\0'}; + char *key0 = &keyval0[1]; + char *key1 = &keyval1[1]; + KEY *key_info = table->key_info + i; + ibool is_null; + + keyval0[0] = (char)i; + keyval1[0] = (char)i; + + if (key_info->flags & HA_NOSAME) { + len = wsrep_store_key_val_for_row( + table, i, key0, key_info->key_length, + record0, &is_null); + if (!is_null) { + int rcode = wsrep_append_key( + thd, trx, table_share, table, + keyval0, len+1, shared); + if (rcode) DBUG_RETURN(rcode); + } + else + { + WSREP_DEBUG("NULL key skipped: %s", + wsrep_thd_query(thd)); + } + if (record1) { + len = wsrep_store_key_val_for_row( + table, i, key1, key_info->key_length, + record1, &is_null); + if (!is_null && memcmp(key0, key1, len)) { + int rcode = wsrep_append_key( + thd, trx, table_share, + table, + keyval1, len+1, shared); + if (rcode) DBUG_RETURN(rcode); + } + } + } + } + } + DBUG_RETURN(0); +} +#endif /*********************************************************************//** Stores a reference to the current row to 'ref' field of the handle. Note that in the case where we have generated the clustered index for the @@ -10012,11 +10740,18 @@ ha_innobase::external_lock( /* used by test case */ DBUG_EXECUTE_IF("no_innodb_binlog_errors", skip = 1;); if (!skip) { +#ifdef WITH_WSREP + if (!wsrep_on(thd) || wsrep_thd_exec_mode(thd) == LOCAL_STATE) + { +#endif /* WITH_WSREP */ my_error(ER_BINLOG_STMT_MODE_AND_ROW_ENGINE, MYF(0), " InnoDB is limited to row-logging when " "transaction isolation level is " "READ COMMITTED or READ UNCOMMITTED."); DBUG_RETURN(HA_ERR_LOGGING_IMPOSSIBLE); +#ifdef WITH_WSREP + } +#endif /* WITH_WSREP */ } } @@ -12139,6 +12874,256 @@ static SHOW_VAR innodb_status_variables_export[]= { static struct st_mysql_storage_engine innobase_storage_engine= { MYSQL_HANDLERTON_INTERFACE_VERSION }; +#ifdef WITH_WSREP +void +wsrep_abort_slave_trx(wsrep_seqno_t bf_seqno, wsrep_seqno_t victim_seqno) +{ + WSREP_ERROR("Trx %lld tries to abort slave trx %lld. This could be " + "caused by:\n\t" + "1) unsupported configuration options combination, please check documentation.\n\t" + "2) a bug in the code.\n\t" + "3) a database corruption.\n Node consistency compromized, " + "need to abort. Restart the node to resync with cluster.", + (long long)bf_seqno, (long long)victim_seqno); + abort(); +} +int +wsrep_innobase_kill_one_trx(trx_t *bf_trx, trx_t *victim_trx, ibool signal) +{ + DBUG_ENTER("wsrep_innobase_kill_one_trx"); + THD *thd = (THD *) victim_trx->mysql_thd; + THD *bf_thd = (bf_trx) ? (THD *)bf_trx->mysql_thd : NULL; + int64_t bf_seqno = (bf_thd) ? wsrep_thd_trx_seqno(bf_thd) : 0; + + if (!thd) { + DBUG_PRINT("wsrep", ("no thd for conflicting lock")); + WSREP_WARN("no THD for trx: %llu", victim_trx->id); + DBUG_RETURN(1); + } + + WSREP_DEBUG("BF kill (%lu, seqno: %lld), victim: (%lu) trx: %llu", + signal, (long long)bf_seqno, + wsrep_thd_thread_id(thd), + victim_trx->id); + + WSREP_DEBUG("Aborting query: %s", + (thd && wsrep_thd_query(thd)) ? wsrep_thd_query(thd) : "void"); + + wsrep_thd_LOCK(thd); + + if (wsrep_thd_query_state(thd) == QUERY_EXITING) { + WSREP_DEBUG("kill trx EXITING for %llu", victim_trx->id); + wsrep_thd_UNLOCK(thd); + DBUG_RETURN(0); + } + if(wsrep_thd_exec_mode(thd) != LOCAL_STATE) { + WSREP_DEBUG("withdraw for BF trx: %llu, state: %d", + victim_trx->id, + wsrep_thd_conflict_state(thd)); + } + + switch (wsrep_thd_conflict_state(thd)) { + case NO_CONFLICT: + wsrep_thd_set_conflict_state(thd, MUST_ABORT); + break; + case MUST_ABORT: + WSREP_DEBUG("victim %llu in MUST ABORT state", + victim_trx->id); + wsrep_thd_UNLOCK(thd); + wsrep_thd_awake(thd, signal); + DBUG_RETURN(0); + break; + case ABORTED: + case ABORTING: // fall through + default: + WSREP_DEBUG("victim %llu in state %d", + victim_trx->id, wsrep_thd_conflict_state(thd)); + wsrep_thd_UNLOCK(thd); + DBUG_RETURN(0); + break; + } + + switch (wsrep_thd_query_state(thd)) { + case QUERY_COMMITTING: + enum wsrep_status rcode; + + WSREP_DEBUG("kill trx QUERY_COMMITTING for %llu", + victim_trx->id); + + if (wsrep_thd_exec_mode(thd) == REPL_RECV) { + wsrep_abort_slave_trx(bf_seqno, + wsrep_thd_trx_seqno(thd)); + } else { + rcode = wsrep->abort_pre_commit( + wsrep, bf_seqno, + (wsrep_trx_id_t)victim_trx->id + ); + + switch (rcode) { + case WSREP_WARNING: + WSREP_DEBUG("cancel commit warning: %llu", + victim_trx->id); + wsrep_thd_UNLOCK(thd); + DBUG_RETURN(1); + break; + case WSREP_OK: + break; + default: + WSREP_ERROR( + "cancel commit bad exit: %d %llu", + rcode, + victim_trx->id); + /* unable to interrupt, must abort */ + /* note: kill_mysql() will block, if we cannot. + * kill the lock holder first. + */ + abort(); + break; + } + } + break; + case QUERY_EXEC: + /* it is possible that victim trx is itself waiting for some + * other lock. We need to cancel this waiting + */ + WSREP_DEBUG("kill trx QUERY_EXEC for %llu", victim_trx->id); + + victim_trx->was_chosen_as_deadlock_victim= TRUE; + if (victim_trx->wait_lock) { + WSREP_DEBUG("victim has wait flag: %ld", + wsrep_thd_thread_id(thd)); + lock_t* wait_lock = victim_trx->wait_lock; + if (wait_lock) { + WSREP_DEBUG("canceling wait lock"); + victim_trx->was_chosen_as_deadlock_victim= TRUE; + lock_cancel_waiting_and_release(wait_lock); + } + + wsrep_thd_awake(thd, signal); + } else { + /* abort currently executing query */ + DBUG_PRINT("wsrep",("sending KILL_QUERY to: %ld", + wsrep_thd_thread_id(thd))); + WSREP_DEBUG("kill query for: %ld", + wsrep_thd_thread_id(thd)); + wsrep_thd_awake(thd, signal); + + /* for BF thd, we need to prevent him from committing */ + if (wsrep_thd_exec_mode(thd) == REPL_RECV) { + wsrep_abort_slave_trx(bf_seqno, + wsrep_thd_trx_seqno(thd)); + } + } + break; + case QUERY_IDLE: + { + bool skip_abort= false; + wsrep_aborting_thd_t abortees; + + WSREP_DEBUG("kill IDLE for %llu", victim_trx->id); + + if (wsrep_thd_exec_mode(thd) == REPL_RECV) { + WSREP_DEBUG("kill BF IDLE, seqno: %lld", + (long long)wsrep_thd_trx_seqno(thd)); + wsrep_thd_UNLOCK(thd); + wsrep_abort_slave_trx(bf_seqno, + wsrep_thd_trx_seqno(thd)); + DBUG_RETURN(0); + } + /* This will lock thd from proceeding after net_read() */ + wsrep_thd_set_conflict_state(thd, ABORTING); + + mysql_mutex_lock(&LOCK_wsrep_rollback); + + abortees = wsrep_aborting_thd; + while (abortees && !skip_abort) { + /* check if we have a kill message for this already */ + if (abortees->aborting_thd == thd) { + skip_abort = true; + WSREP_WARN("duplicate thd aborter %lu", + wsrep_thd_thread_id(thd)); + } + abortees = abortees->next; + } + if (!skip_abort) { + wsrep_aborting_thd_t aborting = (wsrep_aborting_thd_t) + my_malloc(sizeof(struct wsrep_aborting_thd), + MYF(0)); + aborting->aborting_thd = thd; + aborting->next = wsrep_aborting_thd; + wsrep_aborting_thd = aborting; + DBUG_PRINT("wsrep",("enqueuing trx abort for %lu", + wsrep_thd_thread_id(thd))); + WSREP_DEBUG("enqueuing trx abort for (%lu)", + wsrep_thd_thread_id(thd)); + } + + DBUG_PRINT("wsrep",("signalling wsrep rollbacker")); + WSREP_DEBUG("signaling aborter"); + mysql_cond_signal(&COND_wsrep_rollback); + mysql_mutex_unlock(&LOCK_wsrep_rollback); + + break; + } + default: + WSREP_WARN("bad wsrep query state: %d", + wsrep_thd_query_state(thd)); + break; + } + wsrep_thd_UNLOCK(thd); + + DBUG_RETURN(0); +} +static int +wsrep_abort_transaction(handlerton* hton, THD *bf_thd, THD *victim_thd, + my_bool signal) +{ + DBUG_ENTER("wsrep_innobase_abort_thd"); + trx_t* victim_trx = thd_to_trx(victim_thd); + trx_t* bf_trx = (bf_thd) ? thd_to_trx(bf_thd) : NULL; + WSREP_DEBUG("abort transaction: BF: %s victim: %s", + wsrep_thd_query(bf_thd), + wsrep_thd_query(victim_thd)); + + if (victim_trx) + { + mutex_enter(&kernel_mutex); + int rcode = wsrep_innobase_kill_one_trx(bf_trx, victim_trx, + signal); + mutex_exit(&kernel_mutex); + DBUG_RETURN(rcode); + } else { + WSREP_DEBUG("victim does not have transaction"); + wsrep_thd_LOCK(victim_thd); + wsrep_thd_set_conflict_state(victim_thd, MUST_ABORT); + wsrep_thd_UNLOCK(victim_thd); + wsrep_thd_awake(victim_thd, signal); + } + DBUG_RETURN(-1); +} + +static int innobase_wsrep_set_checkpoint(handlerton* hton, const XID* xid) +{ + DBUG_ASSERT(hton == innodb_hton_ptr); + if (wsrep_is_wsrep_xid(xid)) { + mtr_t mtr; + mtr_start(&mtr); + trx_sys_update_wsrep_checkpoint(xid, &mtr); + mtr_commit(&mtr); + return 0; + } else { + return 1; + } +} + +static int innobase_wsrep_get_checkpoint(handlerton* hton, XID* xid) +{ + DBUG_ASSERT(hton == innodb_hton_ptr); + trx_sys_read_wsrep_checkpoint(xid); + return 0; +} + +#endif /* WITH_WSREP */ /* plugin options */ static MYSQL_SYSVAR_BOOL(checksums, innobase_use_checksums, PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY, @@ -12570,6 +13555,40 @@ static MYSQL_SYSVAR_UINT(change_buffering_debug, ibuf_debug, NULL, NULL, 0, 0, 1, 0); #endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */ +#ifdef WITH_INNODB_DISALLOW_WRITES +/******************************************************* + * innobase_disallow_writes variable definition * + *******************************************************/ + +/* Must always init to FALSE. */ +static my_bool innobase_disallow_writes = FALSE; + +/************************************************************************** +An "update" method for innobase_disallow_writes variable. */ +static +void +innobase_disallow_writes_update( +/*============================*/ + THD* thd, /* in: thread handle */ + st_mysql_sys_var* var, /* in: pointer to system + variable */ + void* var_ptr, /* out: pointer to dynamic + variable */ + const void* save) /* in: temporary storage */ +{ + *(my_bool*)var_ptr = *(my_bool*)save; + ut_a(srv_allow_writes_event); + if (*(my_bool*)var_ptr) + os_event_reset(srv_allow_writes_event); + else + os_event_set(srv_allow_writes_event); +} + +static MYSQL_SYSVAR_BOOL(disallow_writes, innobase_disallow_writes, + PLUGIN_VAR_NOCMDOPT, + "Tell InnoDB to stop any writes to disk", + NULL, innobase_disallow_writes_update, FALSE); +#endif /* WITH_INNODB_DISALLOW_WRITES */ static MYSQL_SYSVAR_BOOL(random_read_ahead, srv_random_read_ahead, PLUGIN_VAR_NOCMDARG, "Whether to use read ahead for random access within an extent.", @@ -12848,6 +13867,9 @@ static struct st_mysql_sys_var* innobase_system_variables[]= { #if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG MYSQL_SYSVAR(change_buffering_debug), #endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */ +#ifdef WITH_INNODB_DISALLOW_WRITES + MYSQL_SYSVAR(disallow_writes), +#endif /* WITH_INNODB_DISALLOW_WRITES */ MYSQL_SYSVAR(random_read_ahead), MYSQL_SYSVAR(read_ahead_threshold), MYSQL_SYSVAR(io_capacity), diff --git a/storage/xtradb/handler/ha_innodb.h b/storage/xtradb/handler/ha_innodb.h index 7e11e72b7c1..5d0b4f75a3f 100644 --- a/storage/xtradb/handler/ha_innodb.h +++ b/storage/xtradb/handler/ha_innodb.h @@ -112,6 +112,10 @@ class ha_innobase: public handler dict_index_t* innobase_get_index(uint keynr); int info_low(uint flag, bool called_from_analyze); +#ifdef WITH_WSREP + int wsrep_append_keys(THD *thd, bool shared, + const uchar* record0, const uchar* record1); +#endif /* Init values for the class: */ public: ha_innobase(handlerton *hton, TABLE_SHARE *table_arg); @@ -368,6 +372,37 @@ bool thd_sqlcom_can_generate_row_events(const MYSQL_THD thd); */ extern void mysql_bin_log_commit_pos(THD *thd, ulonglong *out_pos, const char **out_file); +#ifdef WITH_WSREP +#include <wsrep_mysqld.h> +//extern "C" int wsrep_trx_order_before(void *thd1, void *thd2); + +extern "C" bool wsrep_thd_is_wsrep_on(THD *thd); + +extern "C" enum wsrep_exec_mode wsrep_thd_exec_mode(THD *thd); +extern "C" enum wsrep_conflict_state wsrep_thd_conflict_state(THD *thd); +extern "C" enum wsrep_query_state wsrep_thd_query_state(THD *thd); +extern "C" wsrep_trx_handle_t* wsrep_thd_trx_handle(THD *thd); + +extern "C" void wsrep_thd_set_exec_mode(THD *thd, enum wsrep_exec_mode mode); +extern "C" void wsrep_thd_set_query_state( + THD *thd, enum wsrep_query_state state); +extern "C" void wsrep_thd_set_conflict_state( + THD *thd, enum wsrep_conflict_state state); + +extern "C" void wsrep_thd_set_trx_to_replay(THD *thd, uint64 trx_id); + +extern "C"void wsrep_thd_LOCK(THD *thd); +extern "C"void wsrep_thd_UNLOCK(THD *thd); +extern "C" uint32 wsrep_thd_wsrep_rand(THD *thd); +extern "C" time_t wsrep_thd_query_start(THD *thd); +extern "C" my_thread_id wsrep_thd_thread_id(THD *thd); +extern "C" int64_t wsrep_thd_trx_seqno(THD *thd); +extern "C" query_id_t wsrep_thd_query_id(THD *thd); +extern "C" char * wsrep_thd_query(THD *thd); +extern "C" query_id_t wsrep_thd_wsrep_last_query_id(THD *thd); +extern "C" void wsrep_thd_set_wsrep_last_query_id(THD *thd, query_id_t id); +extern "C" void wsrep_thd_awake(THD *thd, my_bool signal); +#endif typedef struct trx_struct trx_t; /********************************************************************//** @file handler/ha_innodb.h @@ -408,3 +443,6 @@ innobase_index_name_is_reserved( ulint num_of_keys); /*!< in: Number of indexes to be created. */ +#ifdef WITH_WSREP +extern "C" int wsrep_trx_is_aborting(void *thd_ptr); +#endif diff --git a/storage/xtradb/handler/handler0alter.cc b/storage/xtradb/handler/handler0alter.cc index 2fd05901393..ef802c0d373 100644 --- a/storage/xtradb/handler/handler0alter.cc +++ b/storage/xtradb/handler/handler0alter.cc @@ -37,6 +37,10 @@ extern "C" { #include "handler0alter.h" } +#ifdef WITH_WSREP +//#include "wsrep_api.h" +#include <sql_acl.h> // PROCESS_ACL +#endif #include "ha_innodb.h" /*************************************************************//** diff --git a/storage/xtradb/include/dict0mem.h b/storage/xtradb/include/dict0mem.h index 4701fcd87f9..7f7904f6a25 100644 --- a/storage/xtradb/include/dict0mem.h +++ b/storage/xtradb/include/dict0mem.h @@ -343,6 +343,9 @@ barracuda format, the length could be REC_VERSION_56_MAX_INDEX_COL_LEN /** Defines the maximum fixed length column size */ #define DICT_MAX_FIXED_COL_LEN DICT_ANTELOPE_MAX_INDEX_COL_LEN +#ifdef WITH_WSREP +#define WSREP_MAX_SUPPORTED_KEY_LENGTH 3500 +#endif /* WITH_WSREP */ /** Data structure for a field in an index */ struct dict_field_struct{ diff --git a/storage/xtradb/include/ha_prototypes.h b/storage/xtradb/include/ha_prototypes.h index 2e200ba7f43..d5874a51cae 100644 --- a/storage/xtradb/include/ha_prototypes.h +++ b/storage/xtradb/include/ha_prototypes.h @@ -300,6 +300,19 @@ thd_flush_log_at_trx_commit( /*================================*/ void* thd); +#ifdef WITH_WSREP +UNIV_INTERN +int +wsrep_innobase_kill_one_trx(trx_t *bf_trx, trx_t *victim_trx, ibool signal); +int wsrep_thd_is_brute_force(void *thd_ptr); +int wsrep_trx_order_before(void *thd1, void *thd2); +void wsrep_innobase_mysql_sort(int mysql_type, uint charset_number, + unsigned char* str, unsigned int str_length); +//UNIV_INTERN +int +wsrep_on(void *thd_ptr); +int wsrep_is_wsrep_xid(const void*); +#endif /* WITH_WSREP */ /**********************************************************************//** Get the current setting of the lower_case_table_names global parameter from mysqld.cc. We do a dirty read because for one there is no synchronization diff --git a/storage/xtradb/include/lock0lock.h b/storage/xtradb/include/lock0lock.h index ea636f985b4..f7c3d82e495 100644 --- a/storage/xtradb/include/lock0lock.h +++ b/storage/xtradb/include/lock0lock.h @@ -798,6 +798,7 @@ lock_rec_get_page_no( remains set when the waiting lock is granted, or if the lock is inherited to a neighboring record */ +#define WSREP_BF 4096 #if (LOCK_WAIT|LOCK_GAP|LOCK_REC_NOT_GAP|LOCK_INSERT_INTENTION)&LOCK_MODE_MASK # error #endif diff --git a/storage/xtradb/include/rem0rec.h b/storage/xtradb/include/rem0rec.h index 10b74d18c13..ab390f4fb3a 100644 --- a/storage/xtradb/include/rem0rec.h +++ b/storage/xtradb/include/rem0rec.h @@ -832,6 +832,13 @@ are given in one byte (resp. two byte) format. */ two upmost bits in a two byte offset for special purposes */ #define REC_MAX_DATA_SIZE (16 * 1024) +#ifdef WITH_WSREP +int wsrep_rec_get_primary_key( + byte *buf, /* out: extracted key */ + ulint *buf_len, /* in/out: length of buf */ + const rec_t* rec, /* in: physical record */ + dict_index_t* index); /* in: record descriptor */ +#endif /* WITH_WSREP */ #ifndef UNIV_NONINL #include "rem0rec.ic" #endif diff --git a/storage/xtradb/include/srv0srv.h b/storage/xtradb/include/srv0srv.h index b660f7ea104..eeb5806de40 100644 --- a/storage/xtradb/include/srv0srv.h +++ b/storage/xtradb/include/srv0srv.h @@ -152,6 +152,10 @@ extern ulint srv_log_buffer_size; extern char srv_use_global_flush_log_at_trx_commit; extern char srv_adaptive_flushing; +#ifdef WITH_INNODB_DISALLOW_WRITES +/* When this event is reset we do not allow any file writes to take place. */ +extern os_event_t srv_allow_writes_event; +#endif /* WITH_INNODB_DISALLOW_WRITES */ /* If this flag is TRUE, then we will load the indexes' (and tables') metadata even if they are marked as "corrupted". Mostly it is for DBA to process corrupted index and table */ diff --git a/storage/xtradb/include/trx0sys.h b/storage/xtradb/include/trx0sys.h index 495ce0e1184..0f12fc496b7 100644 --- a/storage/xtradb/include/trx0sys.h +++ b/storage/xtradb/include/trx0sys.h @@ -41,6 +41,9 @@ Created 3/26/1996 Heikki Tuuri #include "ut0bh.h" #include "read0types.h" #include "page0types.h" +#ifdef WITH_WSREP +#include "trx0xa.h" +#endif /* WITH_WSREP */ /** In a MySQL replication slave, in crash recovery we store the master log file name and position here. */ @@ -337,6 +340,17 @@ UNIV_INTERN void trx_sys_print_mysql_binlog_offset(void); /*===================================*/ +#ifdef WITH_WSREP +/** Update WSREP checkpoint XID in sys header. */ +void +trx_sys_update_wsrep_checkpoint( + const XID* xid, /*!< in: WSREP XID */ + mtr_t* mtr); /*!< in: mtr */ +void +/** Read WSREP checkpoint XID from sys header. */ +trx_sys_read_wsrep_checkpoint( + XID* xid); /*!< out: WSREP XID */ +#endif /* WITH_WSREP */ /*****************************************************************//** Prints to stderr the MySQL master log offset info in the trx system header if the magic number shows it valid. */ @@ -546,6 +560,22 @@ this contains the same fields as TRX_SYS_MYSQL_LOG_INFO below */ within that file */ #define TRX_SYS_MYSQL_LOG_NAME 12 /*!< MySQL log file name */ +#ifdef WITH_WSREP +/* We hijack TRX_SYS_MYSQL_MASTER_LOG_INFO, it seems to be completely unused + otherwise (see comments for MySQL bug #34058). */ +/** */ +#define TRX_SYS_WSREP_XID_INFO TRX_SYS_MYSQL_MASTER_LOG_INFO +#define TRX_SYS_WSREP_XID_MAGIC_N_FLD 0 +#define TRX_SYS_WSREP_XID_MAGIC_N 0x77737265 + +/* XID field: formatID, gtrid_len, bqual_len, xid_data */ +#define TRX_SYS_WSREP_XID_LEN (4 + 4 + 4 + XIDDATASIZE) +#define TRX_SYS_WSREP_XID_FORMAT 4 +#define TRX_SYS_WSREP_XID_GTRID_LEN 8 +#define TRX_SYS_WSREP_XID_BQUAL_LEN 12 +#define TRX_SYS_WSREP_XID_DATA 16 +#endif /* WITH_WSREP*/ + /** Doublewrite buffer */ /* @{ */ /** The offset of the doublewrite buffer header on the trx system header page */ diff --git a/storage/xtradb/lock/lock0lock.c b/storage/xtradb/lock/lock0lock.c index 5fce345d0fe..ff711195785 100644 --- a/storage/xtradb/lock/lock0lock.c +++ b/storage/xtradb/lock/lock0lock.c @@ -39,6 +39,9 @@ Created 5/7/1996 Heikki Tuuri #include "dict0mem.h" #include "trx0sys.h" +#ifdef WITH_WSREP +extern my_bool wsrep_debug; +#endif /* Restricts the length of search we will do in the waits-for graph of transactions */ #define LOCK_MAX_N_STEPS_IN_DEADLOCK_CHECK 1000000 @@ -921,6 +924,11 @@ lock_rec_has_to_wait( if (trx != lock2->trx && !lock_mode_compatible(LOCK_MODE_MASK & type_mode, lock_get_mode(lock2))) { +#ifdef WITH_WSREP + if ((type_mode & WSREP_BF) && (lock2->type_mode & WSREP_BF)) { + return FALSE; + } +#endif /* WITH_WSREP */ /* We have somewhat complex rules when gap type record locks cause waits */ @@ -1450,6 +1458,11 @@ lock_rec_has_expl( return(NULL); } +#ifdef WITH_WSREP +static +void +lock_rec_discard(lock_t* in_lock); +#endif #ifdef UNIV_DEBUG /*********************************************************************//** Checks if some other transaction has a lock request in the queue. @@ -1499,6 +1512,27 @@ lock_rec_other_has_expl_req( } #endif /* UNIV_DEBUG */ +#ifdef WITH_WSREP +static void +wsrep_kill_victim(trx_t *trx, lock_t *lock) { + int bf_this = wsrep_thd_is_brute_force(trx->mysql_thd); + int bf_other = + wsrep_thd_is_brute_force(lock->trx->mysql_thd); + if ((bf_this && !bf_other) || + (bf_this && bf_other && wsrep_trx_order_before( + trx->mysql_thd, lock->trx->mysql_thd))) { + + if (lock->trx->que_state == TRX_QUE_LOCK_WAIT) { + if (wsrep_debug) + fprintf(stderr, "WSREP: BF victim waiting\n"); + /* cannot release lock, until our lock + is in the queue*/ + } else if (lock->trx != trx) { + wsrep_innobase_kill_one_trx(trx, lock->trx, TRUE); + } + } +} +#endif /*********************************************************************//** Checks if some other transaction has a conflicting explicit lock request in the queue, so that we have to wait. @@ -1528,6 +1562,9 @@ lock_rec_other_has_conflicting( do { if (lock_rec_has_to_wait(trx, mode, lock, TRUE)) { +#ifdef WITH_WSREP + wsrep_kill_victim(trx, lock); +#endif return(lock); } @@ -1538,6 +1575,9 @@ lock_rec_other_has_conflicting( do { if (lock_rec_has_to_wait(trx, mode, lock, FALSE)) { +#ifdef WITH_WSREP + wsrep_kill_victim(trx, lock); +#endif return(lock); } @@ -1669,6 +1709,9 @@ static lock_t* lock_rec_create( /*============*/ +#ifdef WITH_WSREP + lock_t* c_lock, /* conflicting lock */ +#endif ulint type_mode,/*!< in: lock mode and wait flag, type is ignored and replaced by LOCK_REC */ @@ -1714,6 +1757,11 @@ lock_rec_create( lock->trx = trx; lock->type_mode = (type_mode & ~LOCK_TYPE_MASK) | LOCK_REC; +#ifdef WITH_WSREP + if (wsrep_thd_is_brute_force(trx->mysql_thd)) { + lock->type_mode |= WSREP_BF; + } +#endif /* WITH_WSREP */ lock->index = index; lock->un_member.rec_lock.space = space; @@ -1728,8 +1776,56 @@ lock_rec_create( /* Set the bit corresponding to rec */ lock_rec_set_nth_bit(lock, heap_no); +#ifdef WITH_WSREP + if (c_lock && wsrep_thd_is_brute_force(trx->mysql_thd)) { + lock_t *hash = c_lock->hash; + lock_t *prev = NULL; + + while (hash && + wsrep_thd_is_brute_force(hash->trx->mysql_thd) && + wsrep_trx_order_before(hash->trx->mysql_thd, trx->mysql_thd)){ + prev = hash; + hash = hash->hash; + } + lock->hash = hash; + if (prev) { + prev->hash = lock; + } else { + c_lock->hash = lock; + } + /* + * delayed conflict resolution '...kill_one_trx' was not called, + * if victim was waiting for some other lock + */ + if (c_lock && c_lock->trx->que_state == TRX_QUE_LOCK_WAIT) { + c_lock->trx->was_chosen_as_deadlock_victim = TRUE; + trx->que_state = TRX_QUE_LOCK_WAIT; + lock_set_lock_and_trx_wait(lock, trx); + + lock_cancel_waiting_and_release(c_lock->trx->wait_lock); + + /* trx might not wait for c_lock, but some other lock */ + if (wsrep_debug && c_lock->trx->wait_lock != c_lock) { + fprintf(stderr, "WSREP: c_lock != wait lock\n"); + } + if (c_lock->trx->wait_lock == c_lock) { + lock_reset_lock_and_trx_wait(lock); + } + + if (wsrep_debug) + fprintf(stderr, "WSREP: c_lock canceled %llu\n", + (ulonglong) c_lock->trx->id); + /* have to bail out here to avoid lock_set_lock... */ + return(lock); + } + } else { + HASH_INSERT(lock_t, hash, lock_sys->rec_hash, + lock_rec_fold(space, page_no), lock); + } +#else HASH_INSERT(lock_t, hash, lock_sys->rec_hash, lock_rec_fold(space, page_no), lock); +#endif lock_sys->rec_num++; if (UNIV_UNLIKELY(type_mode & LOCK_WAIT)) { @@ -1750,6 +1846,9 @@ static enum db_err lock_rec_enqueue_waiting( /*=====================*/ +#ifdef WITH_WSREP + lock_t* c_lock, /* conflicting lock */ +#endif ulint type_mode,/*!< in: lock mode this transaction is requesting: LOCK_S or LOCK_X, possibly @@ -1802,8 +1901,16 @@ lock_rec_enqueue_waiting( } /* Enqueue the lock request that will wait to be granted */ +#ifdef WITH_WSREP + if (wsrep_on(trx->mysql_thd) && trx->was_chosen_as_deadlock_victim) { + return(DB_DEADLOCK); + } + lock = lock_rec_create(c_lock, type_mode | LOCK_WAIT, + block, heap_no, index, trx); +#else lock = lock_rec_create(type_mode | LOCK_WAIT, block, heap_no, index, trx); +#endif /* Check if a deadlock occurs: if yes, remove the lock request and return an error code */ @@ -1885,7 +1992,19 @@ lock_rec_add_to_queue( lock_t* other_lock = lock_rec_other_has_expl_req(mode, 0, LOCK_WAIT, block, heap_no, trx); +#ifdef WITH_WSREP + /* this can potentionally assert with wsrep */ + if (wsrep_on(trx->mysql_thd)) { + if (wsrep_debug && other_lock) { + fprintf(stderr, + "WSREP: InnoDB assert ignored\n"); + } + } else { + ut_a(!other_lock); + } +#else ut_a(!other_lock); +#endif /* WITH_WSREP */ } #endif /* UNIV_DEBUG */ @@ -1938,7 +2057,11 @@ lock_rec_add_to_queue( } somebody_waits: +#ifdef WITH_WSREP + return(lock_rec_create(NULL, type_mode, block, heap_no, index, trx)); +#else return(lock_rec_create(type_mode, block, heap_no, index, trx)); +#endif } /** Record locking request status */ @@ -1988,6 +2111,10 @@ lock_rec_lock_fast( || (LOCK_MODE_MASK & mode) == LOCK_X); ut_ad(mode - (LOCK_MODE_MASK & mode) == LOCK_GAP || mode - (LOCK_MODE_MASK & mode) == 0 +#ifdef WITH_WSREP + || mode - (LOCK_MODE_MASK & mode) == WSREP_BF + || mode - (LOCK_MODE_MASK & mode) - LOCK_REC_NOT_GAP == WSREP_BF +#endif /* WITH_WSREP */ || mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP); lock = lock_rec_get_first_on_page(block); @@ -1996,7 +2123,11 @@ lock_rec_lock_fast( if (lock == NULL) { if (!impl) { +#ifdef WITH_WSREP + lock_rec_create(NULL, mode, block, heap_no, index, trx); +#else lock_rec_create(mode, block, heap_no, index, trx); +#endif } return(LOCK_REC_SUCCESS_CREATED); @@ -2052,6 +2183,9 @@ lock_rec_lock_slow( que_thr_t* thr) /*!< in: query thread */ { trx_t* trx; +#ifdef WITH_WSREP + lock_t *c_lock; +#endif ut_ad(mutex_own(&kernel_mutex)); ut_ad((LOCK_MODE_MASK & mode) != LOCK_S @@ -2062,6 +2196,10 @@ lock_rec_lock_slow( || (LOCK_MODE_MASK & mode) == LOCK_X); ut_ad(mode - (LOCK_MODE_MASK & mode) == LOCK_GAP || mode - (LOCK_MODE_MASK & mode) == 0 +#ifdef WITH_WSREP + || mode - (LOCK_MODE_MASK & mode) == WSREP_BF + || mode - (LOCK_MODE_MASK & mode) - LOCK_REC_NOT_GAP == WSREP_BF +#endif /* WITH_WSREP */ || mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP); trx = thr_get_trx(thr); @@ -2070,14 +2208,24 @@ lock_rec_lock_slow( /* The trx already has a strong enough lock on rec: do nothing */ +#ifdef WITH_WSREP + } else if ((c_lock = lock_rec_other_has_conflicting( + mode, block, heap_no, trx))) { +#else } else if (lock_rec_other_has_conflicting(mode, block, heap_no, trx)) { +#endif /* If another transaction has a non-gap conflicting request in the queue, as this transaction does not have a lock strong enough already granted on the record, we have to wait. */ +#ifdef WITH_WSREP + return(lock_rec_enqueue_waiting(c_lock,mode, block, heap_no, + index, thr)); +#else return(lock_rec_enqueue_waiting(mode, block, heap_no, index, thr)); +#endif } else if (!impl) { /* Set the requested lock on the record */ @@ -2123,8 +2271,16 @@ lock_rec_lock( || (LOCK_MODE_MASK & mode) == LOCK_X); ut_ad(mode - (LOCK_MODE_MASK & mode) == LOCK_GAP || mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP +#ifdef WITH_WSREP + || mode - (LOCK_MODE_MASK & mode) == WSREP_BF + || mode - (LOCK_MODE_MASK & mode) - LOCK_REC_NOT_GAP == WSREP_BF +#endif /* WITH_WSREP */ || mode - (LOCK_MODE_MASK & mode) == 0); - +#ifdef WITH_WSREP + if (wsrep_thd_is_brute_force(thr_get_trx(thr)->mysql_thd)) { + mode |= WSREP_BF; + } +#endif /* We try a simplified and faster subroutine for the most common cases */ switch (lock_rec_lock_fast(impl, mode, block, heap_no, index, thr)) { @@ -3496,6 +3652,34 @@ lock_deadlock_recursive( stderr); } #endif /* UNIV_DEBUG */ +#ifdef WITH_WSREP + if (wsrep_debug) + fputs("WSREP: Deadlock detected\n", ef); + if (wsrep_thd_is_brute_force(start->mysql_thd) && + wsrep_thd_is_brute_force( + wait_lock->trx->mysql_thd) && + (start != wait_lock->trx)) { + + if (wsrep_trx_order_before( + start->mysql_thd, + wait_lock->trx->mysql_thd)) { + + wait_lock->trx->was_chosen_as_deadlock_victim = TRUE; + lock_cancel_waiting_and_release(wait_lock); + return(LOCK_VICTIM_IS_OTHER); + } else { + return(LOCK_VICTIM_IS_START); + } + } +#endif + if (too_far) { + + fputs("TOO DEEP OR LONG SEARCH" + " IN THE LOCK TABLE" + " WAITS-FOR GRAPH\n", ef); + + return(LOCK_VICTIM_IS_START); + } if (trx_weight_ge(wait_lock->trx, start)) { /* Our recursion starting point @@ -3503,8 +3687,21 @@ lock_deadlock_recursive( choose 'start' as the victim and roll back it */ +#ifdef WITH_WSREP + if (!wsrep_thd_is_brute_force( + start->mysql_thd)) { + return(LOCK_VICTIM_IS_START); + } +#else return(LOCK_VICTIM_IS_START); +#endif } +#ifdef WITH_WSREP + if (wsrep_thd_is_brute_force( + wait_lock->trx->mysql_thd)) { + return(LOCK_VICTIM_IS_START); + } +#endif lock_deadlock_found = TRUE; @@ -3589,6 +3786,9 @@ UNIV_INLINE lock_t* lock_table_create( /*==============*/ +#ifdef WITH_WSREP + lock_t* c_lock, /* conflicting lock */ +#endif dict_table_t* table, /*!< in: database table in dictionary cache */ ulint type_mode,/*!< in: lock mode possibly ORed with LOCK_WAIT */ @@ -3624,7 +3824,25 @@ lock_table_create( lock->un_member.tab_lock.table = table; +#ifdef WITH_WSREP + if (c_lock && wsrep_thd_is_brute_force(trx->mysql_thd)) { + UT_LIST_INSERT_AFTER( + un_member.tab_lock.locks, table->locks, c_lock, lock); + } else { + UT_LIST_ADD_LAST(un_member.tab_lock.locks, table->locks, lock); + } + + if (c_lock && c_lock->trx->que_state == TRX_QUE_LOCK_WAIT) { + if (wsrep_debug) + fprintf(stderr, "WSREP: table c_lock in wait: %llu\n", + (ulonglong) lock->trx->id); + c_lock->trx->was_chosen_as_deadlock_victim = TRUE; + lock_cancel_waiting_and_release(c_lock); + } + +#else UT_LIST_ADD_LAST(un_member.tab_lock.locks, table->locks, lock); +#endif if (UNIV_UNLIKELY(type_mode & LOCK_WAIT)) { @@ -3770,6 +3988,9 @@ static ulint lock_table_enqueue_waiting( /*=======================*/ +#ifdef WITH_WSREP + lock_t* c_lock, /* conflicting lock */ +#endif ulint mode, /*!< in: lock mode this transaction is requesting */ dict_table_t* table, /*!< in: table */ @@ -3812,7 +4033,14 @@ lock_table_enqueue_waiting( /* Enqueue the lock request that will wait to be granted */ +#ifdef WITH_WSREP + if (trx->was_chosen_as_deadlock_victim) { + return(DB_DEADLOCK); + } + lock = lock_table_create(c_lock, table, mode | LOCK_WAIT, trx); +#else lock = lock_table_create(table, mode | LOCK_WAIT, trx); +#endif /* Check if a deadlock occurs: if yes, remove the lock request and return an error code */ @@ -3875,7 +4103,32 @@ lock_table_other_has_incompatible( && (!lock_mode_compatible(lock_get_mode(lock), mode)) && (wait || !(lock_get_wait(lock)))) { +#ifdef WITH_WSREP + int bf_this = wsrep_thd_is_brute_force(trx->mysql_thd); + int bf_other = wsrep_thd_is_brute_force( + lock->trx->mysql_thd); + if ((bf_this && !bf_other) || + (bf_this && bf_other && + wsrep_trx_order_before( + trx->mysql_thd, lock->trx->mysql_thd) + ) + ) { + if (lock->trx->que_state == TRX_QUE_LOCK_WAIT) { + if (wsrep_debug) fprintf(stderr, + "WSREP: BF victim waiting"); + return(lock); + } else { + if (bf_this && bf_other) + wsrep_innobase_kill_one_trx( + (trx_t *)trx, lock->trx, TRUE); + return(lock); + } + } else { + return(lock); + } +#else return(lock); +#endif } lock = UT_LIST_GET_PREV(un_member.tab_lock.locks, lock); @@ -3898,6 +4151,9 @@ lock_table( enum lock_mode mode, /*!< in: lock mode */ que_thr_t* thr) /*!< in: query thread */ { +#ifdef WITH_WSREP + lock_t *c_lock; +#endif trx_t* trx; ulint err; @@ -3930,19 +4186,32 @@ lock_table( /* We have to check if the new lock is compatible with any locks other transactions have in the table lock queue. */ +#ifdef WITH_WSREP + if ((c_lock = (lock_t *)lock_table_other_has_incompatible( + trx, LOCK_WAIT, table, mode))) { +#else if (lock_table_other_has_incompatible(trx, LOCK_WAIT, table, mode)) { +#endif /* Another trx has a request on the table in an incompatible mode: this trx may have to wait */ +#ifdef WITH_WSREP + err = lock_table_enqueue_waiting(c_lock, mode | flags, table, thr); +#else err = lock_table_enqueue_waiting(mode | flags, table, thr); +#endif lock_mutex_exit_kernel(); return(err); } +#ifdef WITH_WSREP + lock_table_create(c_lock, table, mode | flags, trx); +#else lock_table_create(table, mode | flags, trx); +#endif ut_a(!flags || mode == LOCK_S || mode == LOCK_X); @@ -4860,6 +5129,7 @@ lock_rec_queue_validate( if (!lock_rec_get_gap(lock) && !lock_get_wait(lock)) { +#ifndef WITH_WSREP enum lock_mode mode; if (lock_get_mode(lock) == LOCK_S) { @@ -4869,6 +5139,7 @@ lock_rec_queue_validate( } ut_a(!lock_rec_other_has_expl_req( mode, 0, 0, block, heap_no, lock->trx)); +#endif /* WITH_WSREP */ } else if (lock_get_wait(lock) && !lock_rec_get_gap(lock)) { @@ -5109,6 +5380,9 @@ lock_rec_insert_check_and_lock( lock_t* lock; ulint err; ulint next_rec_heap_no; +#ifdef WITH_WSREP + lock_t *c_lock; +#endif ut_ad(block->frame == page_align(rec)); @@ -5166,15 +5440,28 @@ lock_rec_insert_check_and_lock( had to wait for their insert. Both had waiting gap type lock requests on the successor, which produced an unnecessary deadlock. */ +#ifdef WITH_WSREP + if ((c_lock = lock_rec_other_has_conflicting( + LOCK_X | LOCK_GAP | LOCK_INSERT_INTENTION | WSREP_BF, + block, next_rec_heap_no, trx))) { +#else if (lock_rec_other_has_conflicting( LOCK_X | LOCK_GAP | LOCK_INSERT_INTENTION, block, next_rec_heap_no, trx)) { +#endif /* Note that we may get DB_SUCCESS also here! */ +#ifdef WITH_WSREP + err = lock_rec_enqueue_waiting(c_lock, LOCK_X | LOCK_GAP + | LOCK_INSERT_INTENTION, + block, next_rec_heap_no, + index, thr); +#else err = lock_rec_enqueue_waiting(LOCK_X | LOCK_GAP | LOCK_INSERT_INTENTION, block, next_rec_heap_no, index, thr); +#endif } else { err = DB_SUCCESS; } diff --git a/storage/xtradb/os/os0file.c b/storage/xtradb/os/os0file.c index ea7abb2549e..b0bfdc6c2bb 100644 --- a/storage/xtradb/os/os0file.c +++ b/storage/xtradb/os/os0file.c @@ -96,6 +96,12 @@ UNIV_INTERN os_mutex_t os_file_seek_mutexes[OS_FILE_N_SEEK_MUTEXES]; /* In simulated aio, merge at most this many consecutive i/os */ #define OS_AIO_MERGE_N_CONSECUTIVE 64 +#ifdef WITH_INNODB_DISALLOW_WRITES +#define WAIT_ALLOW_WRITES() os_event_wait(srv_allow_writes_event) +#else +#define WAIT_ALLOW_WRITES() do { } while (0) +#endif /* WITH_INNODB_DISALLOW_WRITES */ + /********************************************************************** InnoDB AIO Implementation: @@ -816,7 +822,9 @@ os_file_create_tmpfile(void) /*========================*/ { FILE* file = NULL; - int fd = innobase_mysql_tmpfile(); + int fd; + WAIT_ALLOW_WRITES(); + fd = innobase_mysql_tmpfile(); if (fd >= 0) { file = fdopen(fd, "w+b"); @@ -1135,6 +1143,7 @@ os_file_create_directory( return (TRUE); #else int rcode; + WAIT_ALLOW_WRITES(); rcode = mkdir(pathname, 0770); @@ -1236,6 +1245,8 @@ try_again: os_file_t file; int create_flag; ibool retry; + if (create_mode != OS_FILE_OPEN && create_mode != OS_FILE_OPEN_RAW) + WAIT_ALLOW_WRITES(); try_again: ut_a(name); @@ -1368,6 +1379,8 @@ os_file_create_simple_no_error_handling_func( int create_flag; ut_a(name); + if (create_mode != OS_FILE_OPEN && create_mode != OS_FILE_OPEN_RAW) + WAIT_ALLOW_WRITES(); if (create_mode == OS_FILE_OPEN) { if (access_type == OS_FILE_READ_ONLY) { @@ -1614,6 +1627,8 @@ try_again: int create_flag; ibool retry; const char* mode_str = NULL; + if (create_mode != OS_FILE_OPEN && create_mode != OS_FILE_OPEN_RAW) + WAIT_ALLOW_WRITES(); try_again: ut_a(name); @@ -1775,6 +1790,7 @@ loop: goto loop; #else int ret; + WAIT_ALLOW_WRITES(); ret = unlink(name); @@ -1838,6 +1854,7 @@ loop: goto loop; #else int ret; + WAIT_ALLOW_WRITES(); ret = unlink(name); @@ -1878,6 +1895,7 @@ os_file_rename_func( return(FALSE); #else int ret; + WAIT_ALLOW_WRITES(); ret = rename(oldpath, newpath); @@ -2141,6 +2159,7 @@ os_file_set_eof( HANDLE h = (HANDLE) _get_osfhandle(fileno(file)); return(SetEndOfFile(h)); #else /* __WIN__ */ + WAIT_ALLOW_WRITES(); return(!ftruncate(fileno(file), ftell(file))); #endif /* __WIN__ */ } @@ -2249,6 +2268,7 @@ os_file_flush_func( return(FALSE); #else int ret; + WAIT_ALLOW_WRITES(); #if defined(HAVE_DARWIN_THREADS) # ifndef F_FULLFSYNC @@ -2924,6 +2944,7 @@ retry: return(FALSE); #else ssize_t ret; + WAIT_ALLOW_WRITES(); ret = os_file_pwrite(file, buf, n, offset, offset_high); diff --git a/storage/xtradb/rem/rem0rec.c b/storage/xtradb/rem/rem0rec.c index 30fc28561fa..9ba1c646d9d 100644 --- a/storage/xtradb/rem/rem0rec.c +++ b/storage/xtradb/rem/rem0rec.c @@ -31,6 +31,9 @@ Created 5/30/1994 Heikki Tuuri #include "mtr0mtr.h" #include "mtr0log.h" +#ifdef WITH_WSREP +#include <ha_prototypes.h> +#endif /* WITH_WSREP */ /* PHYSICAL RECORD (OLD STYLE) =========================== @@ -1772,3 +1775,72 @@ rec_print( } } #endif /* !UNIV_HOTBACKUP */ +#ifdef WITH_WSREP +int +wsrep_rec_get_primary_key( + byte *buf, /* out: extracted key */ + ulint *buf_len, /* in/out: length of buf */ + const rec_t* rec, /* in: physical record */ + dict_index_t* index) /* in: record descriptor */ +{ + const byte* data; + ulint len; + ulint key_len = 0; + ulint i; + uint key_parts; + mem_heap_t* heap = NULL; + ulint offsets_[REC_OFFS_NORMAL_SIZE]; + + ut_ad(index); + key_parts = dict_index_get_n_unique_in_tree(index); + *offsets_ = (sizeof offsets_) / sizeof *offsets_; + + rec_get_offsets(rec, index, offsets_, ULINT_UNDEFINED, &heap); + if (UNIV_LIKELY_NULL(heap)) { + mem_heap_free(heap); + } + + ut_ad(rec_offs_validate(rec, NULL, offsets_)); + + ut_ad(rec); + + for (i = 0; i < key_parts; i++) { + dict_field_t* field = dict_index_get_nth_field(index, i); + const dict_col_t* col = dict_field_get_col(field); + + data = rec_get_nth_field(rec, offsets_, i, &len); + if (key_len + len > ((col->prtype & DATA_NOT_NULL) ? + *buf_len : *buf_len - 1)) { + fprintf (stderr, + "WSREP: FK key len exceeded %lu %lu %lu\n", + key_len, len, *buf_len); + goto err_out; + } + + if (len == UNIV_SQL_NULL) { + ut_a(!(col->prtype & DATA_NOT_NULL)); + *buf++ = 1; + key_len++; + } else { + if (!(col->prtype & DATA_NOT_NULL)) { + *buf++ = 0; + key_len++; + } + memcpy(buf, data, len); + wsrep_innobase_mysql_sort( + (int)(col->prtype & DATA_MYSQL_TYPE_MASK), + (uint)dtype_get_charset_coll(col->prtype), + buf, len); + key_len += len; + buf += len; + } + } + + rec_validate(rec, offsets_); + *buf_len = key_len; + return DB_SUCCESS; + + err_out: + return DB_ERROR; +} +#endif // WITH_WSREP diff --git a/storage/xtradb/row/row0ins.c b/storage/xtradb/row/row0ins.c index adc75bd5760..bc722d56203 100644 --- a/storage/xtradb/row/row0ins.c +++ b/storage/xtradb/row/row0ins.c @@ -752,6 +752,13 @@ row_ins_invalidate_query_cache( innobase_invalidate_query_cache(thr_get_trx(thr), buf, len); mem_free(buf); } +#ifdef WITH_WSREP +ulint wsrep_append_foreign_key(trx_t *trx, + dict_foreign_t* foreign, + const rec_t* clust_rec, + dict_index_t* clust_index, + ibool shared); +#endif /* WITH_WSREP */ /*********************************************************************//** Perform referential actions or checks when a parent row is deleted or updated @@ -1068,6 +1075,16 @@ row_ins_foreign_check_on_constraint( err = row_update_cascade_for_mysql(thr, cascade, foreign->foreign_table); +#ifdef WITH_WSREP + if (err == DB_SUCCESS) { + err = wsrep_append_foreign_key( + thr_get_trx(thr), + foreign, + clust_rec, + clust_index, + FALSE); + } +#endif /* WITH_WSREP */ if (foreign->foreign_table->n_foreign_key_checks_running == 0) { fprintf(stderr, "InnoDB: error: table %s has the counter 0" @@ -1403,7 +1420,14 @@ run_again: if (check_ref) { err = DB_SUCCESS; - +#ifdef WITH_WSREP + err = wsrep_append_foreign_key( + thr_get_trx(thr), + foreign, + rec, + check_index, + TRUE); +#endif /* WITH_WSREP */ goto end_scan; } else if (foreign->type != 0) { /* There is an ON UPDATE or ON DELETE @@ -1660,6 +1684,9 @@ row_ins_scan_sec_index_for_duplicate( dtuple_t* entry, /*!< in: index entry */ que_thr_t* thr) /*!< in: query thread */ { +#ifdef WITH_WSREP + trx_t* trx = thr_get_trx(thr); +#endif ulint n_unique; ulint i; int cmp; @@ -1713,7 +1740,14 @@ row_ins_scan_sec_index_for_duplicate( offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap); +#ifdef WITH_WSREP + /* slave applier must not get duplicate error */ + if (allow_duplicates || + (wsrep_on(trx->mysql_thd) && + wsrep_thd_is_brute_force(trx->mysql_thd))) { +#else if (allow_duplicates) { +#endif /* If the SQL-query will update or replace duplicate key we will take X-lock for @@ -1831,7 +1865,13 @@ row_ins_duplicate_error_in_clust( sure that in roll-forward we get the same duplicate errors as in original execution */ +#ifdef WITH_WSREP + if (trx->duplicates || + (wsrep_on(trx->mysql_thd) && + wsrep_thd_is_brute_force(trx->mysql_thd))) { +#else if (trx->duplicates) { +#endif /* If the SQL-query will update or replace duplicate key we will take X-lock for @@ -1875,7 +1915,13 @@ row_ins_duplicate_error_in_clust( offsets = rec_get_offsets(rec, cursor->index, offsets, ULINT_UNDEFINED, &heap); +#ifdef WITH_WSREP + if (trx->duplicates || + (wsrep_on(trx->mysql_thd) && + wsrep_thd_is_brute_force(trx->mysql_thd))) { +#else if (trx->duplicates) { +#endif /* If the SQL-query will update or replace duplicate key we will take X-lock for diff --git a/storage/xtradb/srv/srv0srv.c b/storage/xtradb/srv/srv0srv.c index 3476743a692..c8eefb56fef 100644 --- a/storage/xtradb/srv/srv0srv.c +++ b/storage/xtradb/srv/srv0srv.c @@ -96,6 +96,10 @@ ulong innobase_thd_get_thread_id(const void* thd); /* prototypes for new functions added to ha_innodb.cc */ ibool innobase_get_slow_log(); +#ifdef WITH_WSREP +extern int wsrep_debug; +extern int wsrep_trx_is_aborting(void *thd_ptr); +#endif /* The following counter is incremented whenever there is some user activity in the server */ UNIV_INTERN ulint srv_activity_count = 0; @@ -219,6 +223,10 @@ srv_printf_innodb_monitor() will request mutex acquisition with mutex_enter(), which will wait until it gets the mutex. */ #define MUTEX_NOWAIT(mutex_skipped) ((mutex_skipped) < MAX_MUTEX_NOWAIT) +#ifdef WITH_INNODB_DISALLOW_WRITES +UNIV_INTERN os_event_t srv_allow_writes_event; +#endif /* WITH_INNODB_DISALLOW_WRITES */ + /** The sort order table of the MySQL latin1_swedish_ci character set collation */ UNIV_INTERN const byte* srv_latin1_ordering; @@ -405,6 +413,9 @@ struct srv_conc_slot_struct{ free to proceed; but reserved may still be TRUE at that point */ +#ifdef WITH_WSREP + void *thd; /*!< to see priority */ +#endif UT_LIST_NODE_T(srv_conc_slot_t) srv_conc_queue; /*!< queue node */ }; @@ -1141,8 +1152,20 @@ srv_init(void) conc_slot->reserved = FALSE; conc_slot->event = os_event_create(NULL); ut_a(conc_slot->event); +#ifdef WITH_WSREP + conc_slot->thd = NULL; +#endif /* WITH_WSREP */ } +#ifdef WITH_INNODB_DISALLOW_WRITES + /* Writes have to be enabled on init or else we hang. Thus, we + always set the event here regardless of innobase_disallow_writes. + That flag will always be 0 at this point because it isn't settable + via my.cnf or command line arg. */ + srv_allow_writes_event = os_event_create(NULL); + os_event_set(srv_allow_writes_event); +#endif /* WITH_INNODB_DISALLOW_WRITES */ + /* Initialize some INFORMATION SCHEMA internal structures */ trx_i_s_cache_init(trx_i_s_cache); } @@ -1308,6 +1331,18 @@ srv_conc_enter_innodb( } #endif +#ifdef WITH_WSREP + if (wsrep_on(trx->mysql_thd) && + wsrep_thd_is_brute_force(trx->mysql_thd)) { + srv_conc_force_enter_innodb(trx); + return; + } + if (wsrep_on(trx->mysql_thd) && + wsrep_trx_is_aborting(trx->mysql_thd)) { + srv_conc_force_enter_innodb(trx); + return; + } +#endif os_fast_mutex_lock(&srv_conc_mutex); retry: if (trx->declared_to_be_inside_innodb) { @@ -1401,6 +1436,9 @@ retry: /* Add to the queue */ slot->reserved = TRUE; slot->wait_ended = FALSE; +#ifdef WITH_WSREP + slot->thd = trx->mysql_thd; +#endif UT_LIST_ADD_LAST(srv_conc_queue, srv_conc_queue, slot); @@ -1447,6 +1485,9 @@ retry: incremented the thread counter on behalf of this thread */ slot->reserved = FALSE; +#ifdef WITH_WSREP + slot->thd = NULL; +#endif UT_LIST_REMOVE(srv_conc_queue, srv_conc_queue, slot); @@ -1532,6 +1573,9 @@ srv_conc_force_exit_innodb( trx->n_tickets_to_enter_innodb = 0; if (srv_conc_n_threads < (lint)srv_thread_concurrency) { +#ifdef WITH_WSREP + srv_conc_slot_t* wsrep_slot; +#endif /* Look for a slot where a thread is waiting and no other thread has yet released the thread */ @@ -1541,6 +1585,19 @@ srv_conc_force_exit_innodb( slot = UT_LIST_GET_NEXT(srv_conc_queue, slot); } +#ifdef WITH_WSREP + /* look for aborting trx, they must be released asap */ + wsrep_slot= slot; + while (wsrep_slot && (wsrep_slot->wait_ended == TRUE || + !wsrep_trx_is_aborting(wsrep_slot->thd))) { + wsrep_slot = UT_LIST_GET_NEXT(srv_conc_queue, wsrep_slot); + } + if (wsrep_slot) { + slot = wsrep_slot; + if (wsrep_debug) + fprintf(stderr, "WSREP: releasing aborting thd\n"); + } +#endif if (slot != NULL) { slot->wait_ended = TRUE; @@ -1913,7 +1970,20 @@ srv_suspend_mysql_thread( if (lock_wait_timeout < 100000000 && wait_time > (double) lock_wait_timeout) { +#ifdef WITH_WSREP + if (wsrep_on(trx->mysql_thd) && + wsrep_thd_is_brute_force(trx->mysql_thd)) { + fprintf(stderr, + "WSREP: BF long lock wait ended after %.f sec\n", + wait_time); + srv_print_innodb_monitor = FALSE; + srv_print_innodb_lock_monitor = FALSE; + } else { +#endif trx->error_state = DB_LOCK_WAIT_TIMEOUT; +#ifdef WITH_WSREP + } +#endif } if (trx_is_interrupted(trx)) { @@ -2646,6 +2716,27 @@ exit_func: OS_THREAD_DUMMY_RETURN; } +#ifdef WITH_WSREP +/*********************************************************************//** +check if lock timeout was for priority thread, +as a side effect trigger lock monitor +@return false for regular lock timeout */ +static ibool +wsrep_is_BF_lock_timeout( +/*====================*/ + srv_slot_t* slot) /* in: lock slot to check for lock priority */ +{ + if (wsrep_on(thr_get_trx(slot->thr)->mysql_thd) && + wsrep_thd_is_brute_force((thr_get_trx(slot->thr))->mysql_thd)) { + fprintf(stderr, "WSREP: BF lock wait long\n"); + srv_print_innodb_monitor = TRUE; + srv_print_innodb_lock_monitor = TRUE; + os_event_set(srv_lock_timeout_thread_event); + return TRUE; + } + return FALSE; + } +#endif /* WITH_WSREP */ /*********************************************************************//** A thread which wakes up threads whose lock wait may have lasted too long. @return a dummy parameter */ @@ -2714,8 +2805,14 @@ loop: granted: in that case do nothing */ if (trx->wait_lock) { +#ifdef WITH_WSREP + if (!wsrep_is_BF_lock_timeout(slot)) { +#endif lock_cancel_waiting_and_release( trx->wait_lock); +#ifdef WITH_WSREP + } +#endif } } } @@ -2831,7 +2928,20 @@ loop: if (sync_array_print_long_waits(&waiter, &sema) && sema == old_sema && os_thread_eq(waiter, old_waiter)) { +#if defined(WITH_WSREP) && defined(WITH_INNODB_DISALLOW_WRITES) + if (srv_allow_writes_event->is_set) { +#endif /* WITH_WSREP */ fatal_cnt++; +#if defined(WITH_WSREP) && defined(WITH_INNODB_DISALLOW_WRITES) + } else { + fprintf(stderr, + "WSREP: avoiding InnoDB self crash due to long " + "semaphore wait of > %lu seconds\n" + "Server is processing SST donor operation, " + "fatal_cnt now: %lu", + (ulong) srv_fatal_semaphore_wait_threshold, fatal_cnt); + } +#endif /* WITH_WSREP */ if (fatal_cnt > 10) { fprintf(stderr, diff --git a/storage/xtradb/trx/trx0roll.c b/storage/xtradb/trx/trx0roll.c index b55471959ce..3db63469cbe 100644 --- a/storage/xtradb/trx/trx0roll.c +++ b/storage/xtradb/trx/trx0roll.c @@ -42,6 +42,9 @@ Created 3/26/1996 Heikki Tuuri #include "row0mysql.h" #include "lock0lock.h" #include "pars0pars.h" +#ifdef WITH_WSREP +#include "ha_prototypes.h" +#endif /* WITH_WSREP */ /** This many pages must be undone before a truncate is tried within rollback */ @@ -147,6 +150,12 @@ trx_rollback_for_mysql( trx->op_info = ""; +#ifdef WITH_WSREP + if (wsrep_on(trx->mysql_thd) && + trx->was_chosen_as_deadlock_victim) { + trx->was_chosen_as_deadlock_victim = FALSE; + } +#endif return(err); } @@ -174,6 +183,12 @@ trx_rollback_last_sql_stat_for_mysql( trx->op_info = ""; +#ifdef WITH_WSREP + if (wsrep_on(trx->mysql_thd) && + trx->was_chosen_as_deadlock_victim) { + trx->was_chosen_as_deadlock_victim = FALSE; + } +#endif return(err); } @@ -1123,6 +1138,12 @@ trx_rollback( srv_que_task_enqueue_low(thr); /* srv_que_task_enqueue_low(thr2); */ } +#ifdef WITH_WSREP + if (wsrep_on(trx->mysql_thd) && + trx->was_chosen_as_deadlock_victim) { + trx->was_chosen_as_deadlock_victim = FALSE; + } +#endif } /****************************************************************//** @@ -1281,6 +1302,12 @@ trx_finish_rollback_off_kernel( sig = next_sig; } +#ifdef WITH_WSREP + if (wsrep_on(trx->mysql_thd) && + trx->was_chosen_as_deadlock_victim) { + trx->was_chosen_as_deadlock_victim = FALSE; + } +#endif } /*********************************************************************//** diff --git a/storage/xtradb/trx/trx0sys.c b/storage/xtradb/trx/trx0sys.c index e0663ca5f87..56d0723d777 100644 --- a/storage/xtradb/trx/trx0sys.c +++ b/storage/xtradb/trx/trx0sys.c @@ -44,6 +44,10 @@ Created 3/26/1996 Heikki Tuuri #include "os0file.h" #include "read0read.h" +#ifdef WITH_WSREP +#include "ha_prototypes.h" /* wsrep_is_wsrep_xid() */ +#endif /* */ + /** The file format tag structure with id and name. */ struct file_format_struct { ulint id; /*!< id of the file format */ @@ -960,6 +964,89 @@ trx_sys_print_mysql_binlog_offset(void) mtr_commit(&mtr); } +#ifdef WITH_WSREP + +void +trx_sys_update_wsrep_checkpoint( + const XID* xid, /*!< in: transaction XID */ + mtr_t* mtr) /*!< in: mtr */ +{ + trx_sysf_t* sys_header; + + ut_ad(xid && mtr); + ut_a(xid->formatID == -1 || wsrep_is_wsrep_xid(xid)); + + sys_header = trx_sysf_get(mtr); + if (mach_read_from_4(sys_header + TRX_SYS_WSREP_XID_INFO + + TRX_SYS_WSREP_XID_MAGIC_N_FLD) + != TRX_SYS_WSREP_XID_MAGIC_N) { + mlog_write_ulint(sys_header + TRX_SYS_WSREP_XID_INFO + + TRX_SYS_WSREP_XID_MAGIC_N_FLD, + TRX_SYS_WSREP_XID_MAGIC_N, + MLOG_4BYTES, mtr); + } + + mlog_write_ulint(sys_header + TRX_SYS_WSREP_XID_INFO + + TRX_SYS_WSREP_XID_FORMAT, + (int)xid->formatID, + MLOG_4BYTES, mtr); + mlog_write_ulint(sys_header + TRX_SYS_WSREP_XID_INFO + + TRX_SYS_WSREP_XID_GTRID_LEN, + (int)xid->gtrid_length, + MLOG_4BYTES, mtr); + mlog_write_ulint(sys_header + TRX_SYS_WSREP_XID_INFO + + TRX_SYS_WSREP_XID_BQUAL_LEN, + (int)xid->bqual_length, + MLOG_4BYTES, mtr); + mlog_write_string(sys_header + TRX_SYS_WSREP_XID_INFO + + TRX_SYS_WSREP_XID_DATA, + (const unsigned char*) xid->data, + XIDDATASIZE, mtr); + +} + +void +trx_sys_read_wsrep_checkpoint(XID* xid) +/*===================================*/ +{ + trx_sysf_t* sys_header; + mtr_t mtr; + ulint magic; + + ut_ad(xid); + + mtr_start(&mtr); + + sys_header = trx_sysf_get(&mtr); + + if ((magic = mach_read_from_4(sys_header + TRX_SYS_WSREP_XID_INFO + + TRX_SYS_WSREP_XID_MAGIC_N_FLD)) + != TRX_SYS_WSREP_XID_MAGIC_N) { + memset(xid, 0, sizeof(*xid)); + xid->formatID = -1; + trx_sys_update_wsrep_checkpoint(xid, &mtr); + mtr_commit(&mtr); + return; + } + + xid->formatID = (int)mach_read_from_4( + sys_header + + TRX_SYS_WSREP_XID_INFO + TRX_SYS_WSREP_XID_FORMAT); + xid->gtrid_length = (int)mach_read_from_4( + sys_header + + TRX_SYS_WSREP_XID_INFO + TRX_SYS_WSREP_XID_GTRID_LEN); + xid->bqual_length = (int)mach_read_from_4( + sys_header + + TRX_SYS_WSREP_XID_INFO + TRX_SYS_WSREP_XID_BQUAL_LEN); + ut_memcpy(xid->data, + sys_header + TRX_SYS_WSREP_XID_INFO + TRX_SYS_WSREP_XID_DATA, + XIDDATASIZE); + + mtr_commit(&mtr); +} + +#endif /* WITH_WSREP */ + /*****************************************************************//** Prints to stderr the MySQL master log offset info in the trx system header if the magic number shows it valid. */ diff --git a/storage/xtradb/trx/trx0trx.c b/storage/xtradb/trx/trx0trx.c index 2bb38571bda..96e0716e5c0 100644 --- a/storage/xtradb/trx/trx0trx.c +++ b/storage/xtradb/trx/trx0trx.c @@ -755,6 +755,11 @@ trx_start_low( trx->id = trx_sys_get_new_trx_id(); +#ifdef WITH_WSREP + memset(&trx->xid, 0, sizeof(trx->xid)); + trx->xid.formatID = -1; +#endif /* WITH_WSREP */ + /* The initial value for trx->no: IB_ULONGLONG_MAX is used in read_view_open_now: */ @@ -909,6 +914,14 @@ trx_write_serialisation_history( mutex_exit(&rseg->mutex); +#ifdef WITH_WSREP + /* Update latest MySQL wsrep XID in trx sys header. */ + if (wsrep_is_wsrep_xid(&trx->xid)) + { + trx_sys_update_wsrep_checkpoint(&trx->xid, &mtr); + } +#endif /* WITH_WSREP */ + /* Update the latest MySQL binlog name and offset info in trx sys header if MySQL binlogging is on or the database server is a MySQL replication slave */ @@ -1138,6 +1151,12 @@ trx_commit_off_kernel( ut_ad(UT_LIST_GET_LEN(trx->wait_thrs) == 0); ut_ad(UT_LIST_GET_LEN(trx->trx_locks) == 0); +#ifdef WITH_WSREP + if (wsrep_on(trx->mysql_thd) && + trx->was_chosen_as_deadlock_victim) { + trx->was_chosen_as_deadlock_victim = FALSE; + } +#endif UT_LIST_REMOVE(trx_list, trx_sys->trx_list, trx); } diff --git a/support-files/CMakeLists.txt b/support-files/CMakeLists.txt index f8a65e75d4d..d0a7f89d150 100644 --- a/support-files/CMakeLists.txt +++ b/support-files/CMakeLists.txt @@ -40,7 +40,7 @@ ELSE() SET(inst_location ${INSTALL_SUPPORTFILESDIR}) ENDIF() -FOREACH(inifile my-huge my-innodb-heavy-4G my-large my-medium my-small) +FOREACH(inifile my-huge my-innodb-heavy-4G my-large my-medium my-small wsrep) CONFIGURE_FILE(${CMAKE_CURRENT_SOURCE_DIR}/${inifile}.cnf.sh ${CMAKE_CURRENT_BINARY_DIR}/${inifile}.${ini_file_extension} @ONLY) INSTALL(FILES ${CMAKE_CURRENT_BINARY_DIR}/${inifile}.${ini_file_extension} @@ -84,4 +84,10 @@ IF(UNIX) DESTINATION ${inst_location} COMPONENT SupportFiles PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE) + CONFIGURE_FILE(${CMAKE_CURRENT_SOURCE_DIR}/wsrep_notify.sh + ${CMAKE_CURRENT_BINARY_DIR}/wsrep_notify @ONLY) + INSTALL(FILES ${CMAKE_CURRENT_BINARY_DIR}/wsrep_notify + DESTINATION ${inst_location} COMPONENT SupportFiles + PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ + GROUP_EXECUTE WORLD_READ WORLD_EXECUTE) ENDIF() diff --git a/support-files/build-tags b/support-files/build-tags index b5386dc79c3..f42fae218d2 100755 --- a/support-files/build-tags +++ b/support-files/build-tags @@ -1,12 +1,17 @@ #! /bin/sh rm -f TAGS -filter='\.cc$\|\.c$\|\.h$\|\.yy$' +filter='\.cc$\|\.c$\|\.h$\|\.yy\|\.ic\|.ih$' -list="find . -type f" +#list="find . -type f" bzr root >/dev/null 2>/dev/null && list="bzr ls --from-root -R --kind=file --versioned" $list |grep $filter |while read f; do etags -o TAGS --append $f done + +(cd storage/galera && svn ls -R) | grep $filter | while read f; +do + etags -o TAGS --append storage/galera/$f +done diff --git a/support-files/mysql.spec.sh b/support-files/mysql.spec.sh index a3d8554188e..1b6cc1cdb04 100644 --- a/support-files/mysql.spec.sh +++ b/support-files/mysql.spec.sh @@ -68,6 +68,17 @@ # # ---------------------------------------------------------------------------- +# wsrep builds +# ---------------------------------------------------------------------------- +%if %{defined with_wsrep} +%define mysql_version @VERSION@_wsrep_@WSREP_API_VERSION@.@WSREP_PATCH_VERSION@ +%define wsrep_version @WSREP_VERSION@ +%define wsrep_comment , wsrep_%{wsrep_version} +%else +%define wsrep_comment %{nil} +%endif + +# ---------------------------------------------------------------------------- # Commercial builds # ---------------------------------------------------------------------------- %if %{undefined commercial} @@ -93,10 +104,10 @@ # Server comment strings # ---------------------------------------------------------------------------- %if %{undefined compilation_comment_debug} -%define compilation_comment_debug MySQL Community Server - Debug (GPL) +%define compilation_comment_debug MySQL Community Server - Debug (GPL)%{wsrep_comment} %endif %if %{undefined compilation_comment_release} -%define compilation_comment_release MySQL Community Server (GPL) +%define compilation_comment_release MySQL Community Server (GPL)%{wsrep_comment} %endif # ---------------------------------------------------------------------------- @@ -115,6 +126,13 @@ %endif # ---------------------------------------------------------------------------- +# Packager +# ---------------------------------------------------------------------------- +%if %{undefined mysql_packager} +%define mysql_packager MySQL Build Team <build@mysql.com> +%endif + +# ---------------------------------------------------------------------------- # Distribution support # ---------------------------------------------------------------------------- %if %{undefined distro_specific} @@ -280,6 +298,10 @@ documentation and the manual for more information. ############################################################################## %package -n MySQL-server%{product_suffix} +%if %{defined with_wsrep} +Version: %{mysql_version} +#Release: %{wsrep_version}.%{release} +%endif Summary: MySQL: a very fast and reliable SQL database server Group: Applications/Databases Requires: %{distro_requires} @@ -309,6 +331,9 @@ and the manual for more information. This package includes the MySQL server binary as well as related utilities to run and administer a MySQL server. +%if %{defined with_wsrep} +Built with wsrep patch %{wsrep_version}. +%endif If you want to access and work with the database, you have to install package "MySQL-client%{product_suffix}" as well! @@ -374,6 +399,7 @@ This package contains the shared libraries (*.so*) which certain languages and applications need to dynamically load and use MySQL. # ---------------------------------------------------------------------------- +%if %{undefined with_wsrep} %package -n MySQL-embedded%{product_suffix} Summary: MySQL - Embedded library Group: Applications/Databases @@ -395,6 +421,7 @@ The API is identical for the embedded MySQL version and the client/server version. For a description of MySQL see the base MySQL RPM or http://www.mysql.com/ +%endif ############################################################################## %prep @@ -460,6 +487,9 @@ mkdir debug -DMYSQL_UNIX_ADDR="%{mysqldatadir}/mysql.sock" \ -DFEATURE_SET="%{feature_set}" \ -DCOMPILATION_COMMENT="%{compilation_comment_debug}" \ +%if %{defined with_wsrep} + -DWITH_WSREP=1 \ +%endif -DMYSQL_SERVER_SUFFIX="%{server_suffix}" echo BEGIN_DEBUG_CONFIG ; egrep '^#define' include/config.h ; echo END_DEBUG_CONFIG make ${MAKE_JFLAG} VERBOSE=1 @@ -475,6 +505,9 @@ mkdir release -DMYSQL_UNIX_ADDR="%{mysqldatadir}/mysql.sock" \ -DFEATURE_SET="%{feature_set}" \ -DCOMPILATION_COMMENT="%{compilation_comment_release}" \ +%if %{defined with_wsrep} + -DWITH_WSREP=1 \ +%endif -DMYSQL_SERVER_SUFFIX="%{server_suffix}" echo BEGIN_NORMAL_CONFIG ; egrep '^#define' include/config.h ; echo END_NORMAL_CONFIG make ${MAKE_JFLAG} VERBOSE=1 @@ -530,11 +563,20 @@ install -m 755 $MBD/release/support-files/mysql.server $RBR%{_sysconfdir}/init.d # Create a symlink "rcmysql", pointing to the init.script. SuSE users # will appreciate that, as all services usually offer this. -ln -s %{_sysconfdir}/init.d/mysql $RBR%{_sbindir}/rcmysql +ln -sf %{_sysconfdir}/init.d/mysql $RBR%{_sbindir}/rcmysql + +%if %{defined with_wsrep} +# Create a wsrep_sst_rsync_wan symlink. +install -d $RBR%{_bindir} +ln -sf wsrep_sst_rsync $RBR%{_bindir}/wsrep_sst_rsync_wan +%endif # Touch the place where the my.cnf config file might be located # Just to make sure it's in the file list and marked as a config file touch $RBR%{_sysconfdir}/my.cnf +%if %{defined with_wsrep} +touch $RBR%{_sysconfdir}/wsrep.cnf +%endif # Install SELinux files in datadir install -m 600 $MBD/%{src_dir}/support-files/RHEL4-SElinux/mysql.{fc,te} \ @@ -988,6 +1030,11 @@ echo "=====" >> $STATUS_HISTORY %doc %{src_dir}/Docs/INFO_SRC* %doc release/Docs/INFO_BIN* %doc release/support-files/my-*.cnf +%if %{defined with_wsrep} +%doc %{src_dir}/Docs/README-wsrep +%doc release/support-files/wsrep.cnf +%doc release/support-files/wsrep_notify +%endif %doc %attr(644, root, root) %{_infodir}/mysql.info* @@ -1021,6 +1068,9 @@ echo "=====" >> $STATUS_HISTORY %doc %attr(644, root, man) %{_mandir}/man1/resolveip.1* %ghost %config(noreplace,missingok) %{_sysconfdir}/my.cnf +%if %{defined with_wsrep} +%ghost %config(noreplace,missingok) %{_sysconfdir}/wsrep.cnf +%endif %attr(755, root, root) %{_bindir}/innochecksum %attr(755, root, root) %{_bindir}/my_print_defaults @@ -1047,6 +1097,11 @@ echo "=====" >> $STATUS_HISTORY %attr(755, root, root) %{_bindir}/replace %attr(755, root, root) %{_bindir}/resolve_stack_dump %attr(755, root, root) %{_bindir}/resolveip +%if %{defined with_wsrep} +%attr(755, root, root) %{_bindir}/wsrep_sst_mysqldump +%attr(755, root, root) %{_bindir}/wsrep_sst_rsync +%attr(755, root, root) %{_bindir}/wsrep_sst_rsync_wan +%endif %attr(755, root, root) %{_sbindir}/mysqld %attr(755, root, root) %{_sbindir}/mysqld-debug @@ -1125,8 +1180,10 @@ echo "=====" >> $STATUS_HISTORY %defattr(-, root, root, 0755) %attr(-, root, root) %{_datadir}/mysql-test %attr(755, root, root) %{_bindir}/mysql_client_test +%if %{undefined with_wsrep} %attr(755, root, root) %{_bindir}/mysql_client_test_embedded %attr(755, root, root) %{_bindir}/mysqltest_embedded +%endif %doc %attr(644, root, man) %{_mandir}/man1/mysql_client_test.1* %doc %attr(644, root, man) %{_mandir}/man1/mysql-stress-test.pl.1* %doc %attr(644, root, man) %{_mandir}/man1/mysql-test-run.pl.1* @@ -1134,11 +1191,13 @@ echo "=====" >> $STATUS_HISTORY %doc %attr(644, root, man) %{_mandir}/man1/mysqltest_embedded.1* # ---------------------------------------------------------------------------- +%if %{undefined with_wsrep} %files -n MySQL-embedded%{product_suffix} %defattr(-, root, root, 0755) %attr(755, root, root) %{_bindir}/mysql_embedded %attr(644, root, root) %{_libdir}/mysql/libmysqld.a %attr(644, root, root) %{_libdir}/mysql/libmysqld-debug.a +%endif ############################################################################## # The spec file changelog only includes changes made to the spec file @@ -1146,6 +1205,10 @@ echo "=====" >> $STATUS_HISTORY # merging BK trees) ############################################################################## %changelog +* Wed Dec 07 2011 Alexey Yurchenko <alexey.yurchenko@codership.com> + +- wsrep-related cleanups. + * Wed Sep 28 2011 Joerg Bruehe <joerg.bruehe@oracle.com> - Fix duplicate mentioning of "mysql_plugin" and its manual page, @@ -1198,7 +1261,6 @@ echo "=====" >> $STATUS_HISTORY - Fix bug#12561297: Added the MySQL embedded binary * Thu Jul 07 2011 Joerg Bruehe <joerg.bruehe@oracle.com> - - Fix bug#45415: "rpm upgrade recreates test database" Let the creation of the "test" database happen only during a new installation, not in an RPM upgrade. |