diff options
author | Sergei Petrunia <psergey@askmonty.org> | 2014-10-15 17:21:59 +0400 |
---|---|---|
committer | Sergei Petrunia <psergey@askmonty.org> | 2014-10-15 17:21:59 +0400 |
commit | be1c17669cb90e18315d7675c8b4d3c906cd64a7 (patch) | |
tree | e4a0cddd284e813510666ec843344886a160a619 /sql | |
parent | 041e03e251e783d51ca86e53112e3b87bd2da146 (diff) | |
parent | fec5ab5a56cb9a45c621207620cc85079cddf537 (diff) | |
download | mariadb-git-be1c17669cb90e18315d7675c8b4d3c906cd64a7.tar.gz |
Merge ../10.1 into bb-10.1-explain-json
Diffstat (limited to 'sql')
126 files changed, 12836 insertions, 21330 deletions
diff --git a/sql/CMakeLists.txt b/sql/CMakeLists.txt index 3e9010a76d8..8d399cac9bd 100644 --- a/sql/CMakeLists.txt +++ b/sql/CMakeLists.txt @@ -13,6 +13,26 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +IF(WITH_WSREP AND NOT EMBEDDED_LIBRARY) + SET(WSREP_INCLUDES ${CMAKE_SOURCE_DIR}/wsrep) + SET(WSREP_SOURCES + wsrep_check_opts.cc + wsrep_hton.cc + wsrep_mysqld.cc + wsrep_notify.cc + wsrep_sst.cc + wsrep_utils.cc + wsrep_var.cc + wsrep_binlog.cc + wsrep_applier.cc + wsrep_thd.cc + ) + SET(WSREP_LIB wsrep) +ELSE() + SET(WSREP_SOURCES wsrep_dummy.cc) +ENDIF() + INCLUDE_DIRECTORIES( ${CMAKE_SOURCE_DIR}/include ${CMAKE_SOURCE_DIR}/sql @@ -20,6 +40,7 @@ ${PCRE_INCLUDES} ${ZLIB_INCLUDE_DIR} ${SSL_INCLUDE_DIRS} ${CMAKE_BINARY_DIR}/sql +${WSREP_INCLUDES} ) SET(GEN_SOURCES @@ -92,6 +113,7 @@ SET (SQL_SOURCE my_apc.cc my_apc.h my_json_writer.cc my_json_writer.h rpl_gtid.cc rpl_parallel.cc + ${WSREP_SOURCES} table_cache.cc ${CMAKE_CURRENT_BINARY_DIR}/sql_builtin.cc ${GEN_SOURCES} @@ -113,6 +135,7 @@ DTRACE_INSTRUMENT(sql) TARGET_LINK_LIBRARIES(sql ${MYSQLD_STATIC_PLUGIN_LIBS} mysys mysys_ssl dbug strings vio pcre ${LIBJEMALLOC} ${LIBWRAP} ${LIBCRYPT} ${LIBDL} ${CMAKE_THREAD_LIBS_INIT} + ${WSREP_LIB} ${SSL_LIBRARIES}) IF(WIN32) diff --git a/sql/create_options.cc b/sql/create_options.cc index d60639a4f4a..efae87e7533 100644 --- a/sql/create_options.cc +++ b/sql/create_options.cc @@ -331,7 +331,7 @@ bool parse_option_list(THD* thd, handlerton *hton, void *option_struct_arg, char buf[256]; String sbuf(buf, sizeof(buf), system_charset_info), *str; - if ((str= sysvar->val_str(&sbuf, thd, OPT_SESSION, 0))) + if ((str= sysvar->val_str(&sbuf, thd, OPT_SESSION, &null_lex_str))) { LEX_STRING name= { const_cast<char*>(opt->name), opt->name_length }; default_val.str= strmake_root(root, str->ptr(), str->length()); diff --git a/sql/event_data_objects.cc b/sql/event_data_objects.cc index d65180a60be..bf824a98310 100644 --- a/sql/event_data_objects.cc +++ b/sql/event_data_objects.cc @@ -1472,8 +1472,17 @@ end: bool save_tx_read_only= thd->tx_read_only; thd->tx_read_only= false; + if (WSREP(thd)) + { + thd->lex->sql_command = SQLCOM_DROP_EVENT; + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL); + } + ret= Events::drop_event(thd, dbname, name, FALSE); + WSREP_TO_ISOLATION_END; + + error: thd->tx_read_only= save_tx_read_only; thd->security_ctx->master_access= saved_master_access; } diff --git a/sql/event_data_objects.h b/sql/event_data_objects.h index 2483c564dff..8113fcb0e2e 100644 --- a/sql/event_data_objects.h +++ b/sql/event_data_objects.h @@ -85,7 +85,7 @@ class Event_queue_element : public Event_basic public: int on_completion; int status; - longlong originator; + uint32 originator; my_time_t last_executed; my_time_t execute_at; diff --git a/sql/event_parse_data.cc b/sql/event_parse_data.cc index 7647419aff9..44d89887c3b 100644 --- a/sql/event_parse_data.cc +++ b/sql/event_parse_data.cc @@ -564,7 +564,8 @@ Event_parse_data::init_definer(THD *thd) void Event_parse_data::check_originator_id(THD *thd) { /* Disable replicated events on slave. */ - if ((thd->system_thread == SYSTEM_THREAD_SLAVE_SQL) || + if (IF_WSREP(WSREP(thd) && thd->wsrep_applier, 0) || + (thd->system_thread == SYSTEM_THREAD_SLAVE_SQL) || (thd->system_thread == SYSTEM_THREAD_SLAVE_IO)) { DBUG_PRINT("info", ("Invoked object status set to SLAVESIDE_DISABLED.")); diff --git a/sql/event_parse_data.h b/sql/event_parse_data.h index faf42db623a..3ca7fcaab72 100644 --- a/sql/event_parse_data.h +++ b/sql/event_parse_data.h @@ -57,7 +57,7 @@ public: int on_completion; int status; bool status_changed; - longlong originator; + uint32 originator; /* do_not_create will be set if STARTS time is in the past and on_completion == ON_COMPLETION_DROP. diff --git a/sql/events.cc b/sql/events.cc index 63627b21777..7c6b29b7604 100644 --- a/sql/events.cc +++ b/sql/events.cc @@ -270,6 +270,7 @@ common_1_lev_code: static int create_query_string(THD *thd, String *buf) { + buf->length(0); /* Append the "CREATE" part of the query */ if (buf->append(STRING_WITH_LEN("CREATE "))) return 1; @@ -380,7 +381,8 @@ Events::create_event(THD *thd, Event_parse_data *parse_data, { /* Binlog the create event. */ DBUG_ASSERT(thd->query() && thd->query_length()); - String log_query; + char buffer[1024]; + String log_query(buffer, sizeof(buffer), &my_charset_bin); if (create_query_string(thd, &log_query)) { sql_print_error("Event Error: An error occurred while creating query " @@ -1128,7 +1130,6 @@ Events::load_events_from_db(THD *thd) delete et; goto end; } - /** Since the Event_queue_element object could be deleted inside Event_queue::create_event we should save the value of dropped flag @@ -1174,6 +1175,20 @@ end: DBUG_RETURN(ret); } +#ifdef WITH_WSREP +int wsrep_create_event_query(THD *thd, uchar** buf, size_t* buf_len) +{ + char buffer[1024]; + String log_query(buffer, sizeof(buffer), &my_charset_bin); + + if (create_query_string(thd, &log_query)) + { + WSREP_WARN("events create string failed: %s", thd->query()); + return 1; + } + return wsrep_to_buf_helper(thd, log_query.ptr(), log_query.length(), buf, buf_len); +} +#endif /* WITH_WSREP */ /** @} (End of group Event_Scheduler) */ diff --git a/sql/field.h b/sql/field.h index b5f332f5edc..fed6084fda2 100644 --- a/sql/field.h +++ b/sql/field.h @@ -281,6 +281,12 @@ public: LEX_STRING comment; /* Field is part of the following keys */ key_map key_start, part_of_key, part_of_key_not_clustered; + + /* + Bitmap of indexes that have records ordered by col1, ... this_field, ... + + For example, INDEX (col(prefix_n)) is not present in col.part_of_sortkey. + */ key_map part_of_sortkey; /* We use three additional unireg types for TIMESTAMP to overcome limitation diff --git a/sql/ha_ndbcluster.cc b/sql/ha_ndbcluster.cc deleted file mode 100644 index 9524a0366d3..00000000000 --- a/sql/ha_ndbcluster.cc +++ /dev/null @@ -1,11060 +0,0 @@ -/* Copyright (c) 2004, 2011, Oracle and/or its affiliates. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; version 2 of the License. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ - -/** - @file - - @brief - This file defines the NDB Cluster handler: the interface between - MySQL and NDB Cluster -*/ - -#ifdef USE_PRAGMA_IMPLEMENTATION -#pragma implementation // gcc: Class implementation -#endif - -#include "sql_priv.h" -#include "unireg.h" // REQUIRED: for other includes -#include "sql_table.h" // build_table_filename, - // tablename_to_filename, - // filename_to_tablename -#include "sql_partition.h" // HA_CAN_*, partition_info, part_id_range -#include "sql_base.h" // close_cached_tables -#include "discover.h" // readfrm -#include "sql_acl.h" // wild_case_compare -#include "rpl_mi.h" -#include "transaction.h" - -/* - There is an incompatibility between GNU ar and the Solaris linker - which makes the Solaris linker return an elf error when compiling - without NDB support (which makes libndb.a an empty library). - To avoid this we add a dummy declaration of a static variable - which makes us avoid this bug. -*/ -int ha_ndb_dummy; -#include <my_dir.h> -#ifdef WITH_NDBCLUSTER_STORAGE_ENGINE -#include "ha_ndbcluster.h" -#include <ndbapi/NdbApi.hpp> -#include "ha_ndbcluster_cond.h" -#include <../util/Bitmask.hpp> -#include <ndbapi/NdbIndexStat.hpp> - -#include "ha_ndbcluster_binlog.h" -#include "ha_ndbcluster_tables.h" - -#include "sql_plugin.h" -#include "probes_mysql.h" -#include "sql_show.h" // init_fill_schema_files_row, - // schema_table_store_record -#include "sql_test.h" // print_where - -#ifdef ndb_dynamite -#undef assert -#define assert(x) do { if(x) break; ::printf("%s %d: assert failed: %s\n", __FILE__, __LINE__, #x); ::fflush(stdout); ::signal(SIGABRT,SIG_DFL); ::abort(); ::kill(::getpid(),6); ::kill(::getpid(),9); } while (0) -#endif - -// ndb interface initialization/cleanup functions -extern "C" void ndb_init_internal(); -extern "C" void ndb_end_internal(); - -static const int DEFAULT_PARALLELISM= 0; -static const ha_rows DEFAULT_AUTO_PREFETCH= 32; -static const ulong ONE_YEAR_IN_SECONDS= (ulong) 3600L*24L*365L; - -ulong opt_ndb_extra_logging; -static ulong opt_ndb_cache_check_time; -static char* opt_ndb_connectstring; -static char* opt_ndb_mgmd_host; -static uint opt_ndb_nodeid; - - -static MYSQL_THDVAR_UINT( - autoincrement_prefetch_sz, /* name */ - PLUGIN_VAR_RQCMDARG, - "Specify number of autoincrement values that are prefetched.", - NULL, /* check func. */ - NULL, /* update func. */ - 1, /* default */ - 1, /* min */ - 256, /* max */ - 0 /* block */ -); - - -static MYSQL_THDVAR_BOOL( - force_send, /* name */ - PLUGIN_VAR_OPCMDARG, - "Force send of buffers to ndb immediately without waiting for " - "other threads.", - NULL, /* check func. */ - NULL, /* update func. */ - 1 /* default */ -); - - -static MYSQL_THDVAR_BOOL( - use_exact_count, /* name */ - PLUGIN_VAR_OPCMDARG, - "Use exact records count during query planning and for fast " - "select count(*), disable for faster queries.", - NULL, /* check func. */ - NULL, /* update func. */ - 1 /* default */ -); - - -static MYSQL_THDVAR_BOOL( - use_transactions, /* name */ - PLUGIN_VAR_OPCMDARG, - "Use transactions for large inserts, if enabled then large " - "inserts will be split into several smaller transactions", - NULL, /* check func. */ - NULL, /* update func. */ - 1 /* default */ -); - - -static MYSQL_THDVAR_BOOL( - use_copying_alter_table, /* name */ - PLUGIN_VAR_OPCMDARG, - "Force ndbcluster to always copy tables at alter table (should " - "only be used if on-line alter table fails).", - NULL, /* check func. */ - NULL, /* update func. */ - 0 /* default */ -); - - -static MYSQL_THDVAR_UINT( - optimized_node_selection, /* name */ - PLUGIN_VAR_OPCMDARG, - "Select nodes for transactions in a more optimal way.", - NULL, /* check func. */ - NULL, /* update func. */ - 3, /* default */ - 0, /* min */ - 3, /* max */ - 0 /* block */ -); - - -static MYSQL_THDVAR_BOOL( - index_stat_enable, /* name */ - PLUGIN_VAR_OPCMDARG, - "Use ndb index statistics in query optimization.", - NULL, /* check func. */ - NULL, /* update func. */ - FALSE /* default */ -); - - -static MYSQL_THDVAR_ULONG( - index_stat_cache_entries, /* name */ - PLUGIN_VAR_NOCMDARG, - "", - NULL, /* check func. */ - NULL, /* update func. */ - 32, /* default */ - 0, /* min */ - ULONG_MAX, /* max */ - 0 /* block */ -); - - -static MYSQL_THDVAR_ULONG( - index_stat_update_freq, /* name */ - PLUGIN_VAR_NOCMDARG, - "", - NULL, /* check func. */ - NULL, /* update func. */ - 20, /* default */ - 0, /* min */ - ULONG_MAX, /* max */ - 0 /* block */ -); - -// Default value for parallelism -static const int parallelism= 0; - -// Default value for max number of transactions -// createable against NDB from this handler -static const int max_transactions= 3; // should really be 2 but there is a transaction to much allocated when loch table is used - -static uint ndbcluster_partition_flags(); -static uint ndbcluster_alter_table_flags(uint flags); -static int ndbcluster_init(void *); -static int ndbcluster_end(handlerton *hton, ha_panic_function flag); -static bool ndbcluster_show_status(handlerton *hton, THD*, - stat_print_fn *, - enum ha_stat_type); -static int ndbcluster_alter_tablespace(handlerton *hton, - THD* thd, - st_alter_tablespace *info); -static int ndbcluster_fill_is_table(handlerton *hton, - THD *thd, - TABLE_LIST *tables, - COND *cond, - enum enum_schema_tables); -static int ndbcluster_fill_files_table(handlerton *hton, - THD *thd, - TABLE_LIST *tables, - COND *cond); - -handlerton *ndbcluster_hton; - -static handler *ndbcluster_create_handler(handlerton *hton, - TABLE_SHARE *table, - MEM_ROOT *mem_root) -{ - return new (mem_root) ha_ndbcluster(hton, table); -} - -static uint ndbcluster_partition_flags() -{ - return (HA_CAN_PARTITION | HA_CAN_UPDATE_PARTITION_KEY | - HA_CAN_PARTITION_UNIQUE | HA_USE_AUTO_PARTITION); -} - -static uint ndbcluster_alter_table_flags(uint flags) -{ - if (flags & ALTER_DROP_PARTITION) - return 0; - else - return (HA_ONLINE_ADD_INDEX | HA_ONLINE_DROP_INDEX | - HA_ONLINE_ADD_UNIQUE_INDEX | HA_ONLINE_DROP_UNIQUE_INDEX | - HA_PARTITION_FUNCTION_SUPPORTED); - -} - -#define NDB_AUTO_INCREMENT_RETRIES 10 - -#define ERR_PRINT(err) \ - DBUG_PRINT("error", ("%d message: %s", err.code, err.message)) - -#define ERR_RETURN(err) \ -{ \ - const NdbError& tmp= err; \ - set_ndb_err(current_thd, tmp); \ - DBUG_RETURN(ndb_to_mysql_error(&tmp)); \ -} - -#define ERR_RETURN_PREPARE(rc, err) \ -{ \ - const NdbError& tmp= err; \ - set_ndb_err(current_thd, tmp); \ - rc= ndb_to_mysql_error(&tmp); \ -} - -#define ERR_BREAK(err, code) \ -{ \ - const NdbError& tmp= err; \ - set_ndb_err(current_thd, tmp); \ - code= ndb_to_mysql_error(&tmp); \ - break; \ -} - -static int ndbcluster_inited= 0; -int ndbcluster_terminating= 0; - -static Ndb* g_ndb= NULL; -Ndb_cluster_connection* g_ndb_cluster_connection= NULL; -uchar g_node_id_map[max_ndb_nodes]; - -/// Handler synchronization -mysql_mutex_t ndbcluster_mutex; - -/// Table lock handling -HASH ndbcluster_open_tables; - -static uchar *ndbcluster_get_key(NDB_SHARE *share, size_t *length, - my_bool not_used __attribute__((unused))); -#ifdef HAVE_NDB_BINLOG -static int rename_share(NDB_SHARE *share, const char *new_key); -#endif -static int ndb_get_table_statistics(ha_ndbcluster*, bool, Ndb*, const NDBTAB *, - struct Ndb_statistics *); - - -// Util thread variables -pthread_t ndb_util_thread; -int ndb_util_thread_running= 0; -mysql_mutex_t LOCK_ndb_util_thread; -mysql_cond_t COND_ndb_util_thread; -mysql_cond_t COND_ndb_util_ready; -pthread_handler_t ndb_util_thread_func(void *arg); - -/** - Dummy buffer to read zero pack_length fields - which are mapped to 1 char. -*/ -static uint32 dummy_buf; - -/** - Stats that can be retrieved from ndb. -*/ - -struct Ndb_statistics { - Uint64 row_count; - Uint64 commit_count; - Uint64 row_size; - Uint64 fragment_memory; -}; - -/* Status variables shown with 'show status like 'Ndb%' */ - -static long ndb_cluster_node_id= 0; -static const char * ndb_connected_host= 0; -static long ndb_connected_port= 0; -static long ndb_number_of_replicas= 0; -long ndb_number_of_data_nodes= 0; -long ndb_number_of_ready_data_nodes= 0; -long ndb_connect_count= 0; - -static int update_status_variables(Ndb_cluster_connection *c) -{ - ndb_cluster_node_id= c->node_id(); - ndb_connected_port= c->get_connected_port(); - ndb_connected_host= c->get_connected_host(); - ndb_number_of_replicas= 0; - ndb_number_of_ready_data_nodes= c->get_no_ready(); - ndb_number_of_data_nodes= c->no_db_nodes(); - ndb_connect_count= c->get_connect_count(); - return 0; -} - -SHOW_VAR ndb_status_variables[]= { - {"cluster_node_id", (char*) &ndb_cluster_node_id, SHOW_LONG}, - {"config_from_host", (char*) &ndb_connected_host, SHOW_CHAR_PTR}, - {"config_from_port", (char*) &ndb_connected_port, SHOW_LONG}, -// {"number_of_replicas", (char*) &ndb_number_of_replicas, SHOW_LONG}, - {"number_of_data_nodes",(char*) &ndb_number_of_data_nodes, SHOW_LONG}, - {NullS, NullS, SHOW_LONG} -}; - -/* - Error handling functions -*/ - -/* Note for merge: old mapping table, moved to storage/ndb/ndberror.c */ - -static int ndb_to_mysql_error(const NdbError *ndberr) -{ - /* read the mysql mapped error code */ - int error= ndberr->mysql_code; - - switch (error) - { - /* errors for which we do not add warnings, just return mapped error code - */ - case HA_ERR_NO_SUCH_TABLE: - case HA_ERR_KEY_NOT_FOUND: - return error; - - /* Mapping missing, go with the ndb error code*/ - case -1: - error= ndberr->code; - break; - /* Mapping exists, go with the mapped code */ - default: - break; - } - - /* - Push the NDB error message as warning - - Used to be able to use SHOW WARNINGS toget more info on what the error is - - Used by replication to see if the error was temporary - */ - if (ndberr->status == NdbError::TemporaryError) - push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN, - ER_GET_TEMPORARY_ERRMSG, ER(ER_GET_TEMPORARY_ERRMSG), - ndberr->code, ndberr->message, "NDB"); - else - push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN, - ER_GET_ERRMSG, ER(ER_GET_ERRMSG), - ndberr->code, ndberr->message, "NDB"); - return error; -} - -int execute_no_commit_ignore_no_key(ha_ndbcluster *h, NdbTransaction *trans) -{ - if (trans->execute(NdbTransaction::NoCommit, - NdbOperation::AO_IgnoreError, - h->m_force_send) == -1) - return -1; - - const NdbError &err= trans->getNdbError(); - if (err.classification != NdbError::NoError && - err.classification != NdbError::ConstraintViolation && - err.classification != NdbError::NoDataFound) - return -1; - - return 0; -} - -inline -int execute_no_commit(ha_ndbcluster *h, NdbTransaction *trans, - bool force_release) -{ - h->release_completed_operations(trans, force_release); - return h->m_ignore_no_key ? - execute_no_commit_ignore_no_key(h,trans) : - trans->execute(NdbTransaction::NoCommit, - NdbOperation::AbortOnError, - h->m_force_send); -} - -inline -int execute_commit(ha_ndbcluster *h, NdbTransaction *trans) -{ - return trans->execute(NdbTransaction::Commit, - NdbOperation::AbortOnError, - h->m_force_send); -} - -inline -int execute_commit(THD *thd, NdbTransaction *trans) -{ - return trans->execute(NdbTransaction::Commit, - NdbOperation::AbortOnError, - THDVAR(thd, force_send)); -} - -inline -int execute_no_commit_ie(ha_ndbcluster *h, NdbTransaction *trans, - bool force_release) -{ - h->release_completed_operations(trans, force_release); - return trans->execute(NdbTransaction::NoCommit, - NdbOperation::AO_IgnoreError, - h->m_force_send); -} - -/* - Place holder for ha_ndbcluster thread specific data -*/ -typedef struct st_thd_ndb_share { - const void *key; - struct Ndb_local_table_statistics stat; -} THD_NDB_SHARE; -static -uchar *thd_ndb_share_get_key(THD_NDB_SHARE *thd_ndb_share, size_t *length, - my_bool not_used __attribute__((unused))) -{ - *length= sizeof(thd_ndb_share->key); - return (uchar*) &thd_ndb_share->key; -} - -Thd_ndb::Thd_ndb() -{ - ndb= new Ndb(g_ndb_cluster_connection, ""); - lock_count= 0; - start_stmt_count= 0; - count= 0; - trans= NULL; - m_error= FALSE; - m_error_code= 0; - query_state&= NDB_QUERY_NORMAL; - options= 0; - (void) my_hash_init(&open_tables, &my_charset_bin, 5, 0, 0, - (my_hash_get_key)thd_ndb_share_get_key, 0, 0); -} - -Thd_ndb::~Thd_ndb() -{ - if (ndb) - { -#ifndef DBUG_OFF - Ndb::Free_list_usage tmp; - tmp.m_name= 0; - while (ndb->get_free_list_usage(&tmp)) - { - uint leaked= (uint) tmp.m_created - tmp.m_free; - if (leaked) - fprintf(stderr, "NDB: Found %u %s%s that %s not been released\n", - leaked, tmp.m_name, - (leaked == 1)?"":"'s", - (leaked == 1)?"has":"have"); - } -#endif - delete ndb; - ndb= NULL; - } - changed_tables.empty(); - my_hash_free(&open_tables); -} - -void -Thd_ndb::init_open_tables() -{ - count= 0; - m_error= FALSE; - m_error_code= 0; - my_hash_reset(&open_tables); -} - -inline -Ndb *ha_ndbcluster::get_ndb() -{ - return get_thd_ndb(current_thd)->ndb; -} - -/* - * manage uncommitted insert/deletes during transactio to get records correct - */ - -void ha_ndbcluster::set_rec_per_key() -{ - DBUG_ENTER("ha_ndbcluster::get_status_const"); - for (uint i=0 ; i < table_share->keys ; i++) - { - table->key_info[i].rec_per_key[table->key_info[i].key_parts-1]= 1; - } - DBUG_VOID_RETURN; -} - -ha_rows ha_ndbcluster::records() -{ - ha_rows retval; - DBUG_ENTER("ha_ndbcluster::records"); - struct Ndb_local_table_statistics *local_info= m_table_info; - DBUG_PRINT("info", ("id=%d, no_uncommitted_rows_count=%d", - ((const NDBTAB *)m_table)->getTableId(), - local_info->no_uncommitted_rows_count)); - - Ndb *ndb= get_ndb(); - ndb->setDatabaseName(m_dbname); - struct Ndb_statistics stat; - if (ndb_get_table_statistics(this, TRUE, ndb, m_table, &stat) == 0) - { - retval= stat.row_count; - } - else - { - DBUG_RETURN(HA_POS_ERROR); - } - - THD *thd= current_thd; - if (get_thd_ndb(thd)->m_error) - local_info->no_uncommitted_rows_count= 0; - - DBUG_RETURN(retval + local_info->no_uncommitted_rows_count); -} - -int ha_ndbcluster::records_update() -{ - if (m_ha_not_exact_count) - return 0; - DBUG_ENTER("ha_ndbcluster::records_update"); - int result= 0; - - struct Ndb_local_table_statistics *local_info= m_table_info; - DBUG_PRINT("info", ("id=%d, no_uncommitted_rows_count=%d", - ((const NDBTAB *)m_table)->getTableId(), - local_info->no_uncommitted_rows_count)); - { - Ndb *ndb= get_ndb(); - struct Ndb_statistics stat; - if (ndb->setDatabaseName(m_dbname)) - { - return my_errno= HA_ERR_OUT_OF_MEM; - } - result= ndb_get_table_statistics(this, TRUE, ndb, m_table, &stat); - if (result == 0) - { - stats.mean_rec_length= stat.row_size; - stats.data_file_length= stat.fragment_memory; - local_info->records= stat.row_count; - } - } - { - THD *thd= current_thd; - if (get_thd_ndb(thd)->m_error) - local_info->no_uncommitted_rows_count= 0; - } - if (result == 0) - stats.records= local_info->records+ local_info->no_uncommitted_rows_count; - DBUG_RETURN(result); -} - -void ha_ndbcluster::no_uncommitted_rows_execute_failure() -{ - if (m_ha_not_exact_count) - return; - DBUG_ENTER("ha_ndbcluster::no_uncommitted_rows_execute_failure"); - get_thd_ndb(current_thd)->m_error= TRUE; - get_thd_ndb(current_thd)->m_error_code= 0; - DBUG_VOID_RETURN; -} - -void ha_ndbcluster::no_uncommitted_rows_update(int c) -{ - if (m_ha_not_exact_count) - return; - DBUG_ENTER("ha_ndbcluster::no_uncommitted_rows_update"); - struct Ndb_local_table_statistics *local_info= m_table_info; - local_info->no_uncommitted_rows_count+= c; - DBUG_PRINT("info", ("id=%d, no_uncommitted_rows_count=%d", - ((const NDBTAB *)m_table)->getTableId(), - local_info->no_uncommitted_rows_count)); - DBUG_VOID_RETURN; -} - -void ha_ndbcluster::no_uncommitted_rows_reset(THD *thd) -{ - if (m_ha_not_exact_count) - return; - DBUG_ENTER("ha_ndbcluster::no_uncommitted_rows_reset"); - Thd_ndb *thd_ndb= get_thd_ndb(thd); - thd_ndb->count++; - thd_ndb->m_error= FALSE; - DBUG_VOID_RETURN; -} - -/* - Sets the latest ndb error code on the thd_ndb object such that it - can be retrieved later to know which ndb error caused the handler - error. -*/ -static void set_ndb_err(THD *thd, const NdbError &err) -{ - DBUG_ENTER("set_ndb_err"); - ERR_PRINT(err); - - Thd_ndb *thd_ndb= get_thd_ndb(thd); - if (thd_ndb == NULL) - DBUG_VOID_RETURN; -#ifdef NOT_YET - /* - Check if error code is overwritten, in this case the original - failure cause will be lost. E.g. if 4350 error is given. So - push a warning so that it can be detected which is the root - error cause. - */ - if (thd_ndb->m_query_id == thd->query_id && - thd_ndb->m_error_code != 0 && - thd_ndb->m_error_code != err.code) - { - char buf[FN_REFLEN]; - ndb_error_string(thd_ndb->m_error_code, buf, sizeof(buf)); - push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, - ER_GET_ERRMSG, ER(ER_GET_ERRMSG), - thd_ndb->m_error_code, buf, "NDB"); - } -#endif - thd_ndb->m_query_id= thd->query_id; - thd_ndb->m_error_code= err.code; - DBUG_VOID_RETURN; -} - -int ha_ndbcluster::ndb_err(NdbTransaction *trans) -{ - THD *thd= current_thd; - int res; - NdbError err= trans->getNdbError(); - DBUG_ENTER("ndb_err"); - - set_ndb_err(thd, err); - - switch (err.classification) { - case NdbError::SchemaError: - { - // TODO perhaps we need to do more here, invalidate also in the cache - m_table->setStatusInvalid(); - /* Close other open handlers not used by any thread */ - TABLE_LIST table_list; - bzero((char*) &table_list,sizeof(table_list)); - table_list.db= m_dbname; - table_list.alias= table_list.table_name= m_tabname; - close_cached_tables(thd, &table_list, FALSE, LONG_TIMEOUT); - break; - } - default: - break; - } - res= ndb_to_mysql_error(&err); - DBUG_PRINT("info", ("transformed ndbcluster error %d to mysql error %d", - err.code, res)); - if (res == HA_ERR_FOUND_DUPP_KEY) - { - char *error_data= err.details; - uint dupkey= MAX_KEY; - - for (uint i= 0; i < MAX_KEY; i++) - { - if (m_index[i].type == UNIQUE_INDEX || - m_index[i].type == UNIQUE_ORDERED_INDEX) - { - const NDBINDEX *unique_index= - (const NDBINDEX *) m_index[i].unique_index; - if (unique_index && - (char *) unique_index->getObjectId() == error_data) - { - dupkey= i; - break; - } - } - } - if (m_rows_to_insert == 1) - { - /* - We can only distinguish between primary and non-primary - violations here, so we need to return MAX_KEY for non-primary - to signal that key is unknown - */ - m_dupkey= err.code == 630 ? table_share->primary_key : dupkey; - } - else - { - /* We are batching inserts, offending key is not available */ - m_dupkey= (uint) -1; - } - } - DBUG_RETURN(res); -} - - -/** - Override the default get_error_message in order to add the - error message of NDB . -*/ - -bool ha_ndbcluster::get_error_message(int error, - String *buf) -{ - DBUG_ENTER("ha_ndbcluster::get_error_message"); - DBUG_PRINT("enter", ("error: %d", error)); - - Ndb *ndb= check_ndb_in_thd(current_thd); - if (!ndb) - DBUG_RETURN(FALSE); - - const NdbError err= ndb->getNdbError(error); - bool temporary= err.status==NdbError::TemporaryError; - buf->set(err.message, strlen(err.message), &my_charset_bin); - DBUG_PRINT("exit", ("message: %s, temporary: %d", buf->ptr(), temporary)); - DBUG_RETURN(temporary); -} - - -#ifndef DBUG_OFF -/** - Check if type is supported by NDB. -*/ - -static bool ndb_supported_type(enum_field_types type) -{ - switch (type) { - case MYSQL_TYPE_TINY: - case MYSQL_TYPE_SHORT: - case MYSQL_TYPE_LONG: - case MYSQL_TYPE_INT24: - case MYSQL_TYPE_LONGLONG: - case MYSQL_TYPE_FLOAT: - case MYSQL_TYPE_DOUBLE: - case MYSQL_TYPE_DECIMAL: - case MYSQL_TYPE_NEWDECIMAL: - case MYSQL_TYPE_TIMESTAMP: - case MYSQL_TYPE_DATETIME: - case MYSQL_TYPE_DATE: - case MYSQL_TYPE_NEWDATE: - case MYSQL_TYPE_TIME: - case MYSQL_TYPE_YEAR: - case MYSQL_TYPE_STRING: - case MYSQL_TYPE_VAR_STRING: - case MYSQL_TYPE_VARCHAR: - case MYSQL_TYPE_TINY_BLOB: - case MYSQL_TYPE_BLOB: - case MYSQL_TYPE_MEDIUM_BLOB: - case MYSQL_TYPE_LONG_BLOB: - case MYSQL_TYPE_ENUM: - case MYSQL_TYPE_SET: - case MYSQL_TYPE_BIT: - case MYSQL_TYPE_GEOMETRY: - return TRUE; - case MYSQL_TYPE_NULL: - break; - } - return FALSE; -} -#endif /* !DBUG_OFF */ - - -/** - Check if MySQL field type forces var part in ndb storage. -*/ -static bool field_type_forces_var_part(enum_field_types type) -{ - switch (type) { - case MYSQL_TYPE_VAR_STRING: - case MYSQL_TYPE_VARCHAR: - return TRUE; - case MYSQL_TYPE_TINY_BLOB: - case MYSQL_TYPE_BLOB: - case MYSQL_TYPE_MEDIUM_BLOB: - case MYSQL_TYPE_LONG_BLOB: - case MYSQL_TYPE_GEOMETRY: - return FALSE; - default: - return FALSE; - } -} - -/** - Instruct NDB to set the value of the hidden primary key. -*/ - -bool ha_ndbcluster::set_hidden_key(NdbOperation *ndb_op, - uint fieldnr, const uchar *field_ptr) -{ - DBUG_ENTER("set_hidden_key"); - DBUG_RETURN(ndb_op->equal(fieldnr, (char*)field_ptr) != 0); -} - - -/** - Instruct NDB to set the value of one primary key attribute. -*/ - -int ha_ndbcluster::set_ndb_key(NdbOperation *ndb_op, Field *field, - uint fieldnr, const uchar *field_ptr) -{ - uint32 pack_len= field->pack_length(); - DBUG_ENTER("set_ndb_key"); - DBUG_PRINT("enter", ("%d: %s, ndb_type: %u, len=%d", - fieldnr, field->field_name, field->type(), - pack_len)); - DBUG_DUMP("key", field_ptr, pack_len); - - DBUG_ASSERT(ndb_supported_type(field->type())); - DBUG_ASSERT(! (field->flags & BLOB_FLAG)); - // Common implementation for most field types - DBUG_RETURN(ndb_op->equal(fieldnr, (char*) field_ptr, pack_len) != 0); -} - - -/** - Instruct NDB to set the value of one attribute. -*/ - -int ha_ndbcluster::set_ndb_value(NdbOperation *ndb_op, Field *field, - uint fieldnr, int row_offset, - bool *set_blob_value) -{ - const uchar* field_ptr= field->ptr + row_offset; - uint32 pack_len= field->pack_length(); - DBUG_ENTER("set_ndb_value"); - DBUG_PRINT("enter", ("%d: %s type: %u len=%d is_null=%s", - fieldnr, field->field_name, field->type(), - pack_len, field->is_null(row_offset) ? "Y" : "N")); - DBUG_DUMP("value", field_ptr, pack_len); - - DBUG_ASSERT(ndb_supported_type(field->type())); - { - // ndb currently does not support size 0 - uint32 empty_field; - if (pack_len == 0) - { - pack_len= sizeof(empty_field); - field_ptr= (uchar *)&empty_field; - if (field->is_null(row_offset)) - empty_field= 0; - else - empty_field= 1; - } - if (! (field->flags & BLOB_FLAG)) - { - if (field->type() != MYSQL_TYPE_BIT) - { - if (field->is_null(row_offset)) - { - DBUG_PRINT("info", ("field is NULL")); - // Set value to NULL - DBUG_RETURN((ndb_op->setValue(fieldnr, (char*)NULL) != 0)); - } - // Common implementation for most field types - DBUG_RETURN(ndb_op->setValue(fieldnr, (char*)field_ptr) != 0); - } - else // if (field->type() == MYSQL_TYPE_BIT) - { - longlong bits= field->val_int(); - - // Round up bit field length to nearest word boundry - pack_len= ((pack_len + 3) >> 2) << 2; - DBUG_ASSERT(pack_len <= 8); - if (field->is_null(row_offset)) - // Set value to NULL - DBUG_RETURN((ndb_op->setValue(fieldnr, (char*)NULL) != 0)); - DBUG_PRINT("info", ("bit field")); - DBUG_DUMP("value", (uchar*)&bits, pack_len); -#ifdef WORDS_BIGENDIAN - /* store lsw first */ - bits = ((bits >> 32) & 0x00000000FFFFFFFFLL) - | ((bits << 32) & 0xFFFFFFFF00000000LL); -#endif - DBUG_RETURN(ndb_op->setValue(fieldnr, (char*)&bits) != 0); - } - } - // Blob type - NdbBlob *ndb_blob= ndb_op->getBlobHandle(fieldnr); - if (ndb_blob != NULL) - { - if (field->is_null(row_offset)) - DBUG_RETURN(ndb_blob->setNull() != 0); - - Field_blob *field_blob= (Field_blob*)field; - - // Get length and pointer to data - uint32 blob_len= field_blob->get_length(field_ptr); - uchar* blob_ptr= NULL; - field_blob->get_ptr(&blob_ptr); - - // Looks like NULL ptr signals length 0 blob - if (blob_ptr == NULL) { - DBUG_ASSERT(blob_len == 0); - blob_ptr= (uchar*)""; - } - - DBUG_PRINT("value", ("set blob ptr: 0x%lx len: %u", - (long) blob_ptr, blob_len)); - DBUG_DUMP("value", blob_ptr, MY_MIN(blob_len, 26)); - - if (set_blob_value) - *set_blob_value= TRUE; - // No callback needed to write value - DBUG_RETURN(ndb_blob->setValue(blob_ptr, blob_len) != 0); - } - DBUG_RETURN(1); - } -} - - -NdbBlob::ActiveHook g_get_ndb_blobs_value; - -/** - Callback to read all blob values. - - not done in unpack_record because unpack_record is valid - after execute(Commit) but reading blobs is not - - may only generate read operations; they have to be executed - somewhere before the data is available - - due to single buffer for all blobs, we let the last blob - process all blobs (last so that all are active) - - null bit is still set in unpack_record. - - @todo - allocate blob part aligned buffers -*/ - -int g_get_ndb_blobs_value(NdbBlob *ndb_blob, void *arg) -{ - DBUG_ENTER("g_get_ndb_blobs_value"); - if (ndb_blob->blobsNextBlob() != NULL) - DBUG_RETURN(0); - ha_ndbcluster *ha= (ha_ndbcluster *)arg; - int ret= get_ndb_blobs_value(ha->table, ha->m_value, - ha->m_blobs_buffer, ha->m_blobs_buffer_size, - ha->m_blobs_offset); - DBUG_RETURN(ret); -} - -/* - This routine is shared by injector. There is no common blobs buffer - so the buffer and length are passed by reference. Injector also - passes a record pointer diff. - */ -int get_ndb_blobs_value(TABLE* table, NdbValue* value_array, - uchar*& buffer, uint& buffer_size, - my_ptrdiff_t ptrdiff) -{ - DBUG_ENTER("get_ndb_blobs_value"); - - // Field has no field number so cannot use TABLE blob_field - // Loop twice, first only counting total buffer size - for (int loop= 0; loop <= 1; loop++) - { - uint32 offset= 0; - for (uint i= 0; i < table->s->fields; i++) - { - Field *field= table->field[i]; - NdbValue value= value_array[i]; - if (! (field->flags & BLOB_FLAG)) - continue; - if (value.blob == NULL) - { - DBUG_PRINT("info",("[%u] skipped", i)); - continue; - } - Field_blob *field_blob= (Field_blob *)field; - NdbBlob *ndb_blob= value.blob; - int isNull; - if (ndb_blob->getNull(isNull) != 0) - ERR_RETURN(ndb_blob->getNdbError()); - if (isNull == 0) { - Uint64 len64= 0; - if (ndb_blob->getLength(len64) != 0) - ERR_RETURN(ndb_blob->getNdbError()); - // Align to Uint64 - uint32 size= len64; - if (size % 8 != 0) - size+= 8 - size % 8; - if (loop == 1) - { - uchar *buf= buffer + offset; - uint32 len= 0xffffffff; // Max uint32 - if (ndb_blob->readData(buf, len) != 0) - ERR_RETURN(ndb_blob->getNdbError()); - DBUG_PRINT("info", ("[%u] offset: %u buf: 0x%lx len=%u [ptrdiff=%d]", - i, offset, (long) buf, len, (int)ptrdiff)); - DBUG_ASSERT(len == len64); - // Ugly hack assumes only ptr needs to be changed - field_blob->set_ptr_offset(ptrdiff, len, buf); - } - offset+= size; - } - else if (loop == 1) // undefined or null - { - // have to set length even in this case - uchar *buf= buffer + offset; // or maybe NULL - uint32 len= 0; - field_blob->set_ptr_offset(ptrdiff, len, buf); - DBUG_PRINT("info", ("[%u] isNull=%d", i, isNull)); - } - } - if (loop == 0 && offset > buffer_size) - { - my_free(buffer); - buffer_size= 0; - DBUG_PRINT("info", ("allocate blobs buffer size %u", offset)); - buffer= (uchar*) my_malloc(offset, MYF(MY_WME)); - if (buffer == NULL) - { - sql_print_error("ha_ndbcluster::get_ndb_blobs_value: " - "my_malloc(%u) failed", offset); - DBUG_RETURN(-1); - } - buffer_size= offset; - } - } - DBUG_RETURN(0); -} - - -/** - Instruct NDB to fetch one field. - - Data is read directly into buffer provided by field - if field is NULL, data is read into memory provided by NDBAPI. -*/ - -int ha_ndbcluster::get_ndb_value(NdbOperation *ndb_op, Field *field, - uint fieldnr, uchar* buf) -{ - DBUG_ENTER("get_ndb_value"); - DBUG_PRINT("enter", ("fieldnr: %d flags: %o", fieldnr, - (int)(field != NULL ? field->flags : 0))); - - if (field != NULL) - { - DBUG_ASSERT(buf); - DBUG_ASSERT(ndb_supported_type(field->type())); - DBUG_ASSERT(field->ptr != NULL); - if (! (field->flags & BLOB_FLAG)) - { - if (field->type() != MYSQL_TYPE_BIT) - { - uchar *field_buf; - if (field->pack_length() != 0) - field_buf= buf + (field->ptr - table->record[0]); - else - field_buf= (uchar *)&dummy_buf; - m_value[fieldnr].rec= ndb_op->getValue(fieldnr, - (char*) field_buf); - } - else // if (field->type() == MYSQL_TYPE_BIT) - { - m_value[fieldnr].rec= ndb_op->getValue(fieldnr); - } - DBUG_RETURN(m_value[fieldnr].rec == NULL); - } - - // Blob type - NdbBlob *ndb_blob= ndb_op->getBlobHandle(fieldnr); - m_value[fieldnr].blob= ndb_blob; - if (ndb_blob != NULL) - { - // Set callback - m_blobs_offset= buf - (uchar*) table->record[0]; - void *arg= (void *)this; - DBUG_RETURN(ndb_blob->setActiveHook(g_get_ndb_blobs_value, arg) != 0); - } - DBUG_RETURN(1); - } - - // Used for hidden key only - m_value[fieldnr].rec= ndb_op->getValue(fieldnr, (char*) m_ref); - DBUG_RETURN(m_value[fieldnr].rec == NULL); -} - -/* - Instruct NDB to fetch the partition id (fragment id) -*/ -int ha_ndbcluster::get_ndb_partition_id(NdbOperation *ndb_op) -{ - DBUG_ENTER("get_ndb_partition_id"); - DBUG_RETURN(ndb_op->getValue(NdbDictionary::Column::FRAGMENT, - (char *)&m_part_id) == NULL); -} - -/** - Check if any set or get of blob value in current query. -*/ - -bool ha_ndbcluster::uses_blob_value() -{ - MY_BITMAP *bitmap; - uint *blob_index, *blob_index_end; - if (table_share->blob_fields == 0) - return FALSE; - - bitmap= m_write_op ? table->write_set : table->read_set; - blob_index= table_share->blob_field; - blob_index_end= blob_index + table_share->blob_fields; - do - { - if (bitmap_is_set(bitmap, table->field[*blob_index]->field_index)) - return TRUE; - } while (++blob_index != blob_index_end); - return FALSE; -} - - -/** - Get metadata for this table from NDB. - - Check that frm-file on disk is equal to frm-file - of table accessed in NDB. - - @retval - 0 ok - @retval - -2 Meta data has changed; Re-read data and try again -*/ - -int cmp_frm(const NDBTAB *ndbtab, const void *pack_data, - uint pack_length) -{ - DBUG_ENTER("cmp_frm"); - /* - Compare FrmData in NDB with frm file from disk. - */ - if ((pack_length != ndbtab->getFrmLength()) || - (memcmp(pack_data, ndbtab->getFrmData(), pack_length))) - DBUG_RETURN(1); - DBUG_RETURN(0); -} - -int ha_ndbcluster::get_metadata(const char *path) -{ - Ndb *ndb= get_ndb(); - NDBDICT *dict= ndb->getDictionary(); - const NDBTAB *tab; - int error; - DBUG_ENTER("get_metadata"); - DBUG_PRINT("enter", ("m_tabname: %s, path: %s", m_tabname, path)); - - DBUG_ASSERT(m_table == NULL); - DBUG_ASSERT(m_table_info == NULL); - - uchar *data= NULL, *pack_data= NULL; - size_t length, pack_length; - - /* - Compare FrmData in NDB with frm file from disk. - */ - error= 0; - if (readfrm(path, &data, &length) || - packfrm(data, length, &pack_data, &pack_length)) - { - my_free(data); - my_free(pack_data); - DBUG_RETURN(1); - } - - Ndb_table_guard ndbtab_g(dict, m_tabname); - if (!(tab= ndbtab_g.get_table())) - ERR_RETURN(dict->getNdbError()); - - if (get_ndb_share_state(m_share) != NSS_ALTERED - && cmp_frm(tab, pack_data, pack_length)) - { - DBUG_PRINT("error", - ("metadata, pack_length: %lu getFrmLength: %d memcmp: %d", - (ulong) pack_length, tab->getFrmLength(), - memcmp(pack_data, tab->getFrmData(), pack_length))); - DBUG_DUMP("pack_data", (uchar*) pack_data, pack_length); - DBUG_DUMP("frm", (uchar*) tab->getFrmData(), tab->getFrmLength()); - error= HA_ERR_TABLE_DEF_CHANGED; - } - my_free(data); - my_free(pack_data); - - if (error) - goto err; - - DBUG_PRINT("info", ("fetched table %s", tab->getName())); - m_table= tab; - if ((error= open_indexes(ndb, table, FALSE)) == 0) - { - ndbtab_g.release(); - DBUG_RETURN(0); - } -err: - ndbtab_g.invalidate(); - m_table= NULL; - DBUG_RETURN(error); -} - -static int fix_unique_index_attr_order(NDB_INDEX_DATA &data, - const NDBINDEX *index, - KEY *key_info) -{ - DBUG_ENTER("fix_unique_index_attr_order"); - unsigned sz= index->getNoOfIndexColumns(); - - if (data.unique_index_attrid_map) - my_free(data.unique_index_attrid_map); - data.unique_index_attrid_map= (uchar*)my_malloc(sz,MYF(MY_WME)); - if (data.unique_index_attrid_map == 0) - { - sql_print_error("fix_unique_index_attr_order: my_malloc(%u) failure", - (unsigned int)sz); - DBUG_RETURN(HA_ERR_OUT_OF_MEM); - } - - KEY_PART_INFO* key_part= key_info->key_part; - KEY_PART_INFO* end= key_part+key_info->user_defined_key_parts; - DBUG_ASSERT(key_info->user_defined_key_parts == sz); - for (unsigned i= 0; key_part != end; key_part++, i++) - { - const char *field_name= key_part->field->field_name; -#ifndef DBUG_OFF - data.unique_index_attrid_map[i]= 255; -#endif - for (unsigned j= 0; j < sz; j++) - { - const NDBCOL *c= index->getColumn(j); - if (strcmp(field_name, c->getName()) == 0) - { - data.unique_index_attrid_map[i]= j; - break; - } - } - DBUG_ASSERT(data.unique_index_attrid_map[i] != 255); - } - DBUG_RETURN(0); -} - -/* - Create all the indexes for a table. - If any index should fail to be created, - the error is returned immediately -*/ -int ha_ndbcluster::create_indexes(Ndb *ndb, TABLE *tab) -{ - uint i; - int error= 0; - const char *index_name; - KEY* key_info= tab->key_info; - const char **key_name= tab->s->keynames.type_names; - DBUG_ENTER("ha_ndbcluster::create_indexes"); - - for (i= 0; i < tab->s->keys; i++, key_info++, key_name++) - { - index_name= *key_name; - NDB_INDEX_TYPE idx_type= get_index_type_from_table(i); - error= create_index(index_name, key_info, idx_type, i); - if (error) - { - DBUG_PRINT("error", ("Failed to create index %u", i)); - break; - } - } - - DBUG_RETURN(error); -} - -static void ndb_init_index(NDB_INDEX_DATA &data) -{ - data.type= UNDEFINED_INDEX; - data.status= UNDEFINED; - data.unique_index= NULL; - data.index= NULL; - data.unique_index_attrid_map= NULL; - data.index_stat=NULL; - data.index_stat_cache_entries=0; - data.index_stat_update_freq=0; - data.index_stat_query_count=0; -} - -static void ndb_clear_index(NDB_INDEX_DATA &data) -{ - if (data.unique_index_attrid_map) - { - my_free(data.unique_index_attrid_map); - } - if (data.index_stat) - { - delete data.index_stat; - } - ndb_init_index(data); -} - -/* - Associate a direct reference to an index handle - with an index (for faster access) - */ -int ha_ndbcluster::add_index_handle(THD *thd, NDBDICT *dict, KEY *key_info, - const char *index_name, uint index_no) -{ - int error= 0; - NDB_INDEX_TYPE idx_type= get_index_type_from_table(index_no); - m_index[index_no].type= idx_type; - DBUG_ENTER("ha_ndbcluster::add_index_handle"); - DBUG_PRINT("enter", ("table %s", m_tabname)); - - if (idx_type != PRIMARY_KEY_INDEX && idx_type != UNIQUE_INDEX) - { - DBUG_PRINT("info", ("Get handle to index %s", index_name)); - const NDBINDEX *index; - do - { - index= dict->getIndexGlobal(index_name, *m_table); - if (!index) - ERR_RETURN(dict->getNdbError()); - DBUG_PRINT("info", ("index: 0x%lx id: %d version: %d.%d status: %d", - (long) index, - index->getObjectId(), - index->getObjectVersion() & 0xFFFFFF, - index->getObjectVersion() >> 24, - index->getObjectStatus())); - DBUG_ASSERT(index->getObjectStatus() == - NdbDictionary::Object::Retrieved); - break; - } while (1); - m_index[index_no].index= index; - // ordered index - add stats - NDB_INDEX_DATA& d=m_index[index_no]; - delete d.index_stat; - d.index_stat=NULL; - if (THDVAR(thd, index_stat_enable)) - { - d.index_stat=new NdbIndexStat(index); - d.index_stat_cache_entries=THDVAR(thd, index_stat_cache_entries); - d.index_stat_update_freq=THDVAR(thd, index_stat_update_freq); - d.index_stat_query_count=0; - d.index_stat->alloc_cache(d.index_stat_cache_entries); - DBUG_PRINT("info", ("index %s stat=on cache_entries=%u update_freq=%u", - index->getName(), - d.index_stat_cache_entries, - d.index_stat_update_freq)); - } else - { - DBUG_PRINT("info", ("index %s stat=off", index->getName())); - } - } - if (idx_type == UNIQUE_ORDERED_INDEX || idx_type == UNIQUE_INDEX) - { - char unique_index_name[FN_LEN + 1]; - static const char* unique_suffix= "$unique"; - m_has_unique_index= TRUE; - strxnmov(unique_index_name, FN_LEN, index_name, unique_suffix, NullS); - DBUG_PRINT("info", ("Get handle to unique_index %s", unique_index_name)); - const NDBINDEX *index; - do - { - index= dict->getIndexGlobal(unique_index_name, *m_table); - if (!index) - ERR_RETURN(dict->getNdbError()); - DBUG_PRINT("info", ("index: 0x%lx id: %d version: %d.%d status: %d", - (long) index, - index->getObjectId(), - index->getObjectVersion() & 0xFFFFFF, - index->getObjectVersion() >> 24, - index->getObjectStatus())); - DBUG_ASSERT(index->getObjectStatus() == - NdbDictionary::Object::Retrieved); - break; - } while (1); - m_index[index_no].unique_index= index; - error= fix_unique_index_attr_order(m_index[index_no], index, key_info); - } - if (!error) - m_index[index_no].status= ACTIVE; - - DBUG_RETURN(error); -} - -/* - Associate index handles for each index of a table -*/ -int ha_ndbcluster::open_indexes(Ndb *ndb, TABLE *tab, bool ignore_error) -{ - uint i; - int error= 0; - THD *thd=current_thd; - NDBDICT *dict= ndb->getDictionary(); - KEY* key_info= tab->key_info; - const char **key_name= tab->s->keynames.type_names; - DBUG_ENTER("ha_ndbcluster::open_indexes"); - m_has_unique_index= FALSE; - for (i= 0; i < tab->s->keys; i++, key_info++, key_name++) - { - if ((error= add_index_handle(thd, dict, key_info, *key_name, i))) - { - if (ignore_error) - m_index[i].index= m_index[i].unique_index= NULL; - else - break; - } - m_index[i].null_in_unique_index= FALSE; - if (check_index_fields_not_null(key_info)) - m_index[i].null_in_unique_index= TRUE; - } - - if (error && !ignore_error) - { - while (i > 0) - { - i--; - if (m_index[i].index) - { - dict->removeIndexGlobal(*m_index[i].index, 1); - m_index[i].index= NULL; - } - if (m_index[i].unique_index) - { - dict->removeIndexGlobal(*m_index[i].unique_index, 1); - m_index[i].unique_index= NULL; - } - } - } - - DBUG_ASSERT(error == 0 || error == 4243); - - DBUG_RETURN(error); -} - -/* - Renumber indexes in index list by shifting out - indexes that are to be dropped - */ -void ha_ndbcluster::renumber_indexes(Ndb *ndb, TABLE *tab) -{ - uint i; - const char *index_name; - KEY* key_info= tab->key_info; - const char **key_name= tab->s->keynames.type_names; - DBUG_ENTER("ha_ndbcluster::renumber_indexes"); - - for (i= 0; i < tab->s->keys; i++, key_info++, key_name++) - { - index_name= *key_name; - NDB_INDEX_TYPE idx_type= get_index_type_from_table(i); - m_index[i].type= idx_type; - if (m_index[i].status == TO_BE_DROPPED) - { - DBUG_PRINT("info", ("Shifting index %s(%i) out of the list", - index_name, i)); - NDB_INDEX_DATA tmp; - uint j= i + 1; - // Shift index out of list - while(j != MAX_KEY && m_index[j].status != UNDEFINED) - { - tmp= m_index[j - 1]; - m_index[j - 1]= m_index[j]; - m_index[j]= tmp; - j++; - } - } - } - - DBUG_VOID_RETURN; -} - -/* - Drop all indexes that are marked for deletion -*/ -int ha_ndbcluster::drop_indexes(Ndb *ndb, TABLE *tab) -{ - uint i; - int error= 0; - const char *index_name; - KEY* key_info= tab->key_info; - NDBDICT *dict= ndb->getDictionary(); - DBUG_ENTER("ha_ndbcluster::drop_indexes"); - - for (i= 0; i < tab->s->keys; i++, key_info++) - { - NDB_INDEX_TYPE idx_type= get_index_type_from_table(i); - m_index[i].type= idx_type; - if (m_index[i].status == TO_BE_DROPPED) - { - const NdbDictionary::Index *index= m_index[i].index; - const NdbDictionary::Index *unique_index= m_index[i].unique_index; - - if (index) - { - index_name= index->getName(); - DBUG_PRINT("info", ("Dropping index %u: %s", i, index_name)); - // Drop ordered index from ndb - error= dict->dropIndexGlobal(*index); - if (!error) - { - dict->removeIndexGlobal(*index, 1); - m_index[i].index= NULL; - } - } - if (!error && unique_index) - { - index_name= unique_index->getName(); - DBUG_PRINT("info", ("Dropping unique index %u: %s", i, index_name)); - // Drop unique index from ndb - error= dict->dropIndexGlobal(*unique_index); - if (!error) - { - dict->removeIndexGlobal(*unique_index, 1); - m_index[i].unique_index= NULL; - } - } - if (error) - DBUG_RETURN(error); - ndb_clear_index(m_index[i]); - continue; - } - } - - DBUG_RETURN(error); -} - -/** - Decode the type of an index from information - provided in table object. -*/ -NDB_INDEX_TYPE ha_ndbcluster::get_index_type_from_table(uint inx) const -{ - return get_index_type_from_key(inx, table_share->key_info, - inx == table_share->primary_key); -} - -NDB_INDEX_TYPE ha_ndbcluster::get_index_type_from_key(uint inx, - KEY *key_info, - bool primary) const -{ - bool is_hash_index= (key_info[inx].algorithm == - HA_KEY_ALG_HASH); - if (primary) - return is_hash_index ? PRIMARY_KEY_INDEX : PRIMARY_KEY_ORDERED_INDEX; - - return ((key_info[inx].flags & HA_NOSAME) ? - (is_hash_index ? UNIQUE_INDEX : UNIQUE_ORDERED_INDEX) : - ORDERED_INDEX); -} - -bool ha_ndbcluster::check_index_fields_not_null(KEY* key_info) -{ - KEY_PART_INFO* key_part= key_info->key_part; - KEY_PART_INFO* end= key_part+key_info->user_defined_key_parts; - DBUG_ENTER("ha_ndbcluster::check_index_fields_not_null"); - - for (; key_part != end; key_part++) - { - Field* field= key_part->field; - if (field->maybe_null()) - DBUG_RETURN(TRUE); - } - - DBUG_RETURN(FALSE); -} - -void ha_ndbcluster::release_metadata(THD *thd, Ndb *ndb) -{ - uint i; - - DBUG_ENTER("release_metadata"); - DBUG_PRINT("enter", ("m_tabname: %s", m_tabname)); - - NDBDICT *dict= ndb->getDictionary(); - int invalidate_indexes= 0; - if (thd && thd->lex && thd->lex->sql_command == SQLCOM_FLUSH) - { - invalidate_indexes = 1; - } - if (m_table != NULL) - { - if (m_table->getObjectStatus() == NdbDictionary::Object::Invalid) - invalidate_indexes= 1; - dict->removeTableGlobal(*m_table, invalidate_indexes); - } - // TODO investigate - DBUG_ASSERT(m_table_info == NULL); - m_table_info= NULL; - - // Release index list - for (i= 0; i < MAX_KEY; i++) - { - if (m_index[i].unique_index) - { - DBUG_ASSERT(m_table != NULL); - dict->removeIndexGlobal(*m_index[i].unique_index, invalidate_indexes); - } - if (m_index[i].index) - { - DBUG_ASSERT(m_table != NULL); - dict->removeIndexGlobal(*m_index[i].index, invalidate_indexes); - } - ndb_clear_index(m_index[i]); - } - - m_table= NULL; - DBUG_VOID_RETURN; -} - -int ha_ndbcluster::get_ndb_lock_type(enum thr_lock_type type) -{ - if (type >= TL_WRITE_ALLOW_WRITE) - return NdbOperation::LM_Exclusive; - if (type == TL_READ_WITH_SHARED_LOCKS || - uses_blob_value()) - return NdbOperation::LM_Read; - return NdbOperation::LM_CommittedRead; -} - -static const ulong index_type_flags[]= -{ - /* UNDEFINED_INDEX */ - 0, - - /* PRIMARY_KEY_INDEX */ - HA_ONLY_WHOLE_INDEX, - - /* PRIMARY_KEY_ORDERED_INDEX */ - /* - Enable HA_KEYREAD_ONLY when "sorted" indexes are supported, - thus ORDERD BY clauses can be optimized by reading directly - through the index. - */ - // HA_KEYREAD_ONLY | - HA_READ_NEXT | - HA_READ_PREV | - HA_READ_RANGE | - HA_READ_ORDER, - - /* UNIQUE_INDEX */ - HA_ONLY_WHOLE_INDEX, - - /* UNIQUE_ORDERED_INDEX */ - HA_READ_NEXT | - HA_READ_PREV | - HA_READ_RANGE | - HA_READ_ORDER, - - /* ORDERED_INDEX */ - HA_READ_NEXT | - HA_READ_PREV | - HA_READ_RANGE | - HA_READ_ORDER -}; - -static const int index_flags_size= sizeof(index_type_flags)/sizeof(ulong); - -inline NDB_INDEX_TYPE ha_ndbcluster::get_index_type(uint idx_no) const -{ - DBUG_ASSERT(idx_no < MAX_KEY); - return m_index[idx_no].type; -} - -inline bool ha_ndbcluster::has_null_in_unique_index(uint idx_no) const -{ - DBUG_ASSERT(idx_no < MAX_KEY); - return m_index[idx_no].null_in_unique_index; -} - - -/** - Get the flags for an index. - - @return - flags depending on the type of the index. -*/ - -inline ulong ha_ndbcluster::index_flags(uint idx_no, uint part, - bool all_parts) const -{ - DBUG_ENTER("ha_ndbcluster::index_flags"); - DBUG_PRINT("enter", ("idx_no: %u", idx_no)); - DBUG_ASSERT(get_index_type_from_table(idx_no) < index_flags_size); - DBUG_RETURN(index_type_flags[get_index_type_from_table(idx_no)] | - HA_KEY_SCAN_NOT_ROR); -} - -static void shrink_varchar(Field* field, const uchar* & ptr, uchar* buf) -{ - if (field->type() == MYSQL_TYPE_VARCHAR && ptr != NULL) { - Field_varstring* f= (Field_varstring*)field; - if (f->length_bytes == 1) { - uint pack_len= field->pack_length(); - DBUG_ASSERT(1 <= pack_len && pack_len <= 256); - if (ptr[1] == 0) { - buf[0]= ptr[0]; - } else { - DBUG_ASSERT(FALSE); - buf[0]= 255; - } - memmove(buf + 1, ptr + 2, pack_len - 1); - ptr= buf; - } - } -} - -int ha_ndbcluster::set_primary_key(NdbOperation *op, const uchar *key) -{ - KEY* key_info= table->key_info + table_share->primary_key; - KEY_PART_INFO* key_part= key_info->key_part; - KEY_PART_INFO* end= key_part+key_info->user_defined_key_parts; - DBUG_ENTER("set_primary_key"); - - for (; key_part != end; key_part++) - { - Field* field= key_part->field; - const uchar* ptr= key; - uchar buf[256]; - shrink_varchar(field, ptr, buf); - if (set_ndb_key(op, field, - key_part->fieldnr-1, ptr)) - ERR_RETURN(op->getNdbError()); - key += key_part->store_length; - } - DBUG_RETURN(0); -} - - -int ha_ndbcluster::set_primary_key_from_record(NdbOperation *op, const uchar *record) -{ - KEY* key_info= table->key_info + table_share->primary_key; - KEY_PART_INFO* key_part= key_info->key_part; - KEY_PART_INFO* end= key_part+key_info->user_defined_key_parts; - DBUG_ENTER("set_primary_key_from_record"); - - for (; key_part != end; key_part++) - { - Field* field= key_part->field; - if (set_ndb_key(op, field, - key_part->fieldnr-1, record+key_part->offset)) - ERR_RETURN(op->getNdbError()); - } - DBUG_RETURN(0); -} - -bool ha_ndbcluster::check_index_fields_in_write_set(uint keyno) -{ - KEY* key_info= table->key_info + keyno; - KEY_PART_INFO* key_part= key_info->key_part; - KEY_PART_INFO* end= key_part+key_info->user_defined_key_parts; - uint i; - DBUG_ENTER("check_index_fields_in_write_set"); - - for (i= 0; key_part != end; key_part++, i++) - { - Field* field= key_part->field; - if (!bitmap_is_set(table->write_set, field->field_index)) - { - DBUG_RETURN(false); - } - } - - DBUG_RETURN(true); -} - -int ha_ndbcluster::set_index_key_from_record(NdbOperation *op, - const uchar *record, uint keyno) -{ - KEY* key_info= table->key_info + keyno; - KEY_PART_INFO* key_part= key_info->key_part; - KEY_PART_INFO* end= key_part+key_info->user_defined_key_parts; - uint i; - DBUG_ENTER("set_index_key_from_record"); - - for (i= 0; key_part != end; key_part++, i++) - { - Field* field= key_part->field; - if (set_ndb_key(op, field, m_index[keyno].unique_index_attrid_map[i], - record+key_part->offset)) - ERR_RETURN(m_active_trans->getNdbError()); - } - DBUG_RETURN(0); -} - -int -ha_ndbcluster::set_index_key(NdbOperation *op, - const KEY *key_info, - const uchar * key_ptr) -{ - DBUG_ENTER("ha_ndbcluster::set_index_key"); - uint i; - KEY_PART_INFO* key_part= key_info->key_part; - KEY_PART_INFO* end= key_part+key_info->user_defined_key_parts; - - for (i= 0; key_part != end; key_part++, i++) - { - Field* field= key_part->field; - const uchar* ptr= key_part->null_bit ? key_ptr + 1 : key_ptr; - uchar buf[256]; - shrink_varchar(field, ptr, buf); - if (set_ndb_key(op, field, m_index[active_index].unique_index_attrid_map[i], ptr)) - ERR_RETURN(m_active_trans->getNdbError()); - key_ptr+= key_part->store_length; - } - DBUG_RETURN(0); -} - -inline -int ha_ndbcluster::define_read_attrs(uchar* buf, NdbOperation* op) -{ - uint i; - DBUG_ENTER("define_read_attrs"); - - // Define attributes to read - for (i= 0; i < table_share->fields; i++) - { - Field *field= table->field[i]; - if (bitmap_is_set(table->read_set, i) || - ((field->flags & PRI_KEY_FLAG))) - { - if (get_ndb_value(op, field, i, buf)) - ERR_RETURN(op->getNdbError()); - } - else - { - m_value[i].ptr= NULL; - } - } - - if (table_share->primary_key == MAX_KEY) - { - DBUG_PRINT("info", ("Getting hidden key")); - // Scanning table with no primary key - int hidden_no= table_share->fields; -#ifndef DBUG_OFF - const NDBTAB *tab= (const NDBTAB *) m_table; - if (!tab->getColumn(hidden_no)) - DBUG_RETURN(1); -#endif - if (get_ndb_value(op, NULL, hidden_no, NULL)) - ERR_RETURN(op->getNdbError()); - } - DBUG_RETURN(0); -} - - -/** - Read one record from NDB using primary key. -*/ - -int ha_ndbcluster::pk_read(const uchar *key, uint key_len, uchar *buf, - uint32 part_id) -{ - uint no_fields= table_share->fields; - NdbConnection *trans= m_active_trans; - NdbOperation *op; - - int res; - DBUG_ENTER("pk_read"); - DBUG_PRINT("enter", ("key_len: %u", key_len)); - DBUG_DUMP("key", key, key_len); - m_write_op= FALSE; - - NdbOperation::LockMode lm= - (NdbOperation::LockMode)get_ndb_lock_type(m_lock.type); - if (!(op= trans->getNdbOperation((const NDBTAB *) m_table)) || - op->readTuple(lm) != 0) - ERR_RETURN(trans->getNdbError()); - - if (table_share->primary_key == MAX_KEY) - { - // This table has no primary key, use "hidden" primary key - DBUG_PRINT("info", ("Using hidden key")); - DBUG_DUMP("key", key, 8); - if (set_hidden_key(op, no_fields, key)) - ERR_RETURN(trans->getNdbError()); - - // Read key at the same time, for future reference - if (get_ndb_value(op, NULL, no_fields, NULL)) - ERR_RETURN(trans->getNdbError()); - } - else - { - if ((res= set_primary_key(op, key))) - return res; - } - - if ((res= define_read_attrs(buf, op))) - DBUG_RETURN(res); - - if (m_use_partition_function) - { - op->setPartitionId(part_id); - // If table has user defined partitioning - // and no indexes, we need to read the partition id - // to support ORDER BY queries - if (table_share->primary_key == MAX_KEY && - get_ndb_partition_id(op)) - ERR_RETURN(trans->getNdbError()); - } - - if ((res = execute_no_commit_ie(this,trans,FALSE)) != 0 || - op->getNdbError().code) - { - table->status= STATUS_NOT_FOUND; - DBUG_RETURN(ndb_err(trans)); - } - - // The value have now been fetched from NDB - unpack_record(buf); - table->status= 0; - DBUG_RETURN(0); -} - -/** - Read one complementing record from NDB using primary key from old_data - or hidden key. -*/ - -int ha_ndbcluster::complemented_read(const uchar *old_data, uchar *new_data, - uint32 old_part_id) -{ - uint no_fields= table_share->fields, i; - NdbTransaction *trans= m_active_trans; - NdbOperation *op; - DBUG_ENTER("complemented_read"); - m_write_op= FALSE; - - if (bitmap_is_set_all(table->read_set)) - { - // We have allready retrieved all fields, nothing to complement - DBUG_RETURN(0); - } - - NdbOperation::LockMode lm= - (NdbOperation::LockMode)get_ndb_lock_type(m_lock.type); - if (!(op= trans->getNdbOperation((const NDBTAB *) m_table)) || - op->readTuple(lm) != 0) - ERR_RETURN(trans->getNdbError()); - if (table_share->primary_key != MAX_KEY) - { - if (set_primary_key_from_record(op, old_data)) - ERR_RETURN(trans->getNdbError()); - } - else - { - // This table has no primary key, use "hidden" primary key - if (set_hidden_key(op, table->s->fields, m_ref)) - ERR_RETURN(op->getNdbError()); - } - - if (m_use_partition_function) - op->setPartitionId(old_part_id); - - // Read all unreferenced non-key field(s) - for (i= 0; i < no_fields; i++) - { - Field *field= table->field[i]; - if (!((field->flags & PRI_KEY_FLAG) || - bitmap_is_set(table->read_set, i)) && - !bitmap_is_set(table->write_set, i)) - { - if (get_ndb_value(op, field, i, new_data)) - ERR_RETURN(trans->getNdbError()); - } - } - - if (execute_no_commit(this,trans,FALSE) != 0) - { - table->status= STATUS_NOT_FOUND; - DBUG_RETURN(ndb_err(trans)); - } - - // The value have now been fetched from NDB - unpack_record(new_data); - table->status= 0; - - /* - * restore m_value - */ - for (i= 0; i < no_fields; i++) - { - Field *field= table->field[i]; - if (!((field->flags & PRI_KEY_FLAG) || - bitmap_is_set(table->read_set, i))) - { - m_value[i].ptr= NULL; - } - } - - DBUG_RETURN(0); -} - -/** - Check that all operations between first and last all - have gotten the errcode - If checking for HA_ERR_KEY_NOT_FOUND then update m_dupkey - for all succeeding operations -*/ -bool ha_ndbcluster::check_all_operations_for_error(NdbTransaction *trans, - const NdbOperation *first, - const NdbOperation *last, - uint errcode) -{ - const NdbOperation *op= first; - DBUG_ENTER("ha_ndbcluster::check_all_operations_for_error"); - - while(op) - { - NdbError err= op->getNdbError(); - if (err.status != NdbError::Success) - { - if (ndb_to_mysql_error(&err) != (int) errcode) - DBUG_RETURN(FALSE); - if (op == last) break; - op= trans->getNextCompletedOperation(op); - } - else - { - // We found a duplicate - if (op->getType() == NdbOperation::UniqueIndexAccess) - { - if (errcode == HA_ERR_KEY_NOT_FOUND) - { - NdbIndexOperation *iop= (NdbIndexOperation *) op; - const NDBINDEX *index= iop->getIndex(); - // Find the key_no of the index - for(uint i= 0; i<table->s->keys; i++) - { - if (m_index[i].unique_index == index) - { - m_dupkey= i; - break; - } - } - } - } - else - { - // Must have been primary key access - DBUG_ASSERT(op->getType() == NdbOperation::PrimaryKeyAccess); - if (errcode == HA_ERR_KEY_NOT_FOUND) - m_dupkey= table->s->primary_key; - } - DBUG_RETURN(FALSE); - } - } - DBUG_RETURN(TRUE); -} - - -/** - * Check if record contains any null valued columns that are part of a key - */ -static -int -check_null_in_record(const KEY* key_info, const uchar *record) -{ - KEY_PART_INFO *curr_part, *end_part; - curr_part= key_info->key_part; - end_part= curr_part + key_info->user_defined_key_parts; - - while (curr_part != end_part) - { - if (curr_part->null_bit && - (record[curr_part->null_offset] & curr_part->null_bit)) - return 1; - curr_part++; - } - return 0; - /* - We could instead pre-compute a bitmask in table_share with one bit for - every null-bit in the key, and so check this just by OR'ing the bitmask - with the null bitmap in the record. - But not sure it's worth it. - */ -} - -/** - Peek to check if any rows already exist with conflicting - primary key or unique index values -*/ - -int ha_ndbcluster::peek_indexed_rows(const uchar *record, - NDB_WRITE_OP write_op) -{ - NdbTransaction *trans= m_active_trans; - NdbOperation *op; - const NdbOperation *first, *last; - uint i; - int res; - DBUG_ENTER("peek_indexed_rows"); - - NdbOperation::LockMode lm= - (NdbOperation::LockMode)get_ndb_lock_type(m_lock.type); - first= NULL; - if (write_op != NDB_UPDATE && table->s->primary_key != MAX_KEY) - { - /* - * Fetch any row with colliding primary key - */ - if (!(op= trans->getNdbOperation((const NDBTAB *) m_table)) || - op->readTuple(lm) != 0) - ERR_RETURN(trans->getNdbError()); - - first= op; - if ((res= set_primary_key_from_record(op, record))) - ERR_RETURN(trans->getNdbError()); - - if (m_use_partition_function) - { - uint32 part_id; - int error; - longlong func_value; - my_bitmap_map *old_map= dbug_tmp_use_all_columns(table, table->read_set); - error= m_part_info->get_partition_id(m_part_info, &part_id, &func_value); - dbug_tmp_restore_column_map(table->read_set, old_map); - if (error) - { - m_part_info->err_value= func_value; - DBUG_RETURN(error); - } - op->setPartitionId(part_id); - } - } - /* - * Fetch any rows with colliding unique indexes - */ - KEY* key_info; - KEY_PART_INFO *key_part, *end; - for (i= 0, key_info= table->key_info; i < table->s->keys; i++, key_info++) - { - if (i != table->s->primary_key && - key_info->flags & HA_NOSAME) - { - /* - A unique index is defined on table. - We cannot look up a NULL field value in a unique index. But since - keys with NULLs are not indexed, such rows cannot conflict anyway, so - we just skip the index in this case. - */ - if (check_null_in_record(key_info, record)) - { - DBUG_PRINT("info", ("skipping check for key with NULL")); - continue; - } - if (write_op != NDB_INSERT && !check_index_fields_in_write_set(i)) - { - DBUG_PRINT("info", ("skipping check for key %u not in write_set", i)); - continue; - } - NdbIndexOperation *iop; - const NDBINDEX *unique_index = m_index[i].unique_index; - key_part= key_info->key_part; - end= key_part + key_info->user_defined_key_parts; - if (!(iop= trans->getNdbIndexOperation(unique_index, m_table)) || - iop->readTuple(lm) != 0) - ERR_RETURN(trans->getNdbError()); - - if (!first) - first= iop; - if ((res= set_index_key_from_record(iop, record, i))) - ERR_RETURN(trans->getNdbError()); - } - } - last= trans->getLastDefinedOperation(); - if (first) - res= execute_no_commit_ie(this,trans,FALSE); - else - { - // Table has no keys - table->status= STATUS_NOT_FOUND; - DBUG_RETURN(HA_ERR_KEY_NOT_FOUND); - } - if (check_all_operations_for_error(trans, first, last, - HA_ERR_KEY_NOT_FOUND)) - { - table->status= STATUS_NOT_FOUND; - DBUG_RETURN(ndb_err(trans)); - } - else - { - DBUG_PRINT("info", ("m_dupkey %d", m_dupkey)); - } - DBUG_RETURN(0); -} - - -/** - Read one record from NDB using unique secondary index. -*/ - -int ha_ndbcluster::unique_index_read(const uchar *key, - uint key_len, uchar *buf) -{ - int res; - NdbTransaction *trans= m_active_trans; - NdbIndexOperation *op; - DBUG_ENTER("ha_ndbcluster::unique_index_read"); - DBUG_PRINT("enter", ("key_len: %u, index: %u", key_len, active_index)); - DBUG_DUMP("key", key, key_len); - - NdbOperation::LockMode lm= - (NdbOperation::LockMode)get_ndb_lock_type(m_lock.type); - if (!(op= trans->getNdbIndexOperation(m_index[active_index].unique_index, - m_table)) || - op->readTuple(lm) != 0) - ERR_RETURN(trans->getNdbError()); - - // Set secondary index key(s) - if ((res= set_index_key(op, table->key_info + active_index, key))) - DBUG_RETURN(res); - - if ((res= define_read_attrs(buf, op))) - DBUG_RETURN(res); - - if (execute_no_commit_ie(this,trans,FALSE) != 0 || - op->getNdbError().code) - { - int err= ndb_err(trans); - if(err==HA_ERR_KEY_NOT_FOUND) - table->status= STATUS_NOT_FOUND; - else - table->status= STATUS_GARBAGE; - - DBUG_RETURN(err); - } - - // The value have now been fetched from NDB - unpack_record(buf); - table->status= 0; - DBUG_RETURN(0); -} - -inline int ha_ndbcluster::fetch_next(NdbScanOperation* cursor) -{ - DBUG_ENTER("fetch_next"); - int local_check; - NdbTransaction *trans= m_active_trans; - - if (m_lock_tuple) - { - /* - Lock level m_lock.type either TL_WRITE_ALLOW_WRITE - (SELECT FOR UPDATE) or TL_READ_WITH_SHARED_LOCKS (SELECT - LOCK WITH SHARE MODE) and row was not explictly unlocked - with unlock_row() call - */ - NdbConnection *con_trans= m_active_trans; - NdbOperation *op; - // Lock row - DBUG_PRINT("info", ("Keeping lock on scanned row")); - - if (!(op= m_active_cursor->lockCurrentTuple())) - { - /* purecov: begin inspected */ - m_lock_tuple= FALSE; - ERR_RETURN(con_trans->getNdbError()); - /* purecov: end */ - } - m_ops_pending++; - } - m_lock_tuple= FALSE; - - bool contact_ndb= m_lock.type < TL_WRITE_ALLOW_WRITE && - m_lock.type != TL_READ_WITH_SHARED_LOCKS;; - do { - DBUG_PRINT("info", ("Call nextResult, contact_ndb: %d", contact_ndb)); - /* - We can only handle one tuple with blobs at a time. - */ - if (m_ops_pending && m_blobs_pending) - { - if (execute_no_commit(this,trans,FALSE) != 0) - DBUG_RETURN(ndb_err(trans)); - m_ops_pending= 0; - m_blobs_pending= FALSE; - } - - if ((local_check= cursor->nextResult(contact_ndb, m_force_send)) == 0) - { - /* - Explicitly lock tuple if "select for update" or - "select lock in share mode" - */ - m_lock_tuple= (m_lock.type == TL_WRITE_ALLOW_WRITE - || - m_lock.type == TL_READ_WITH_SHARED_LOCKS); - DBUG_RETURN(0); - } - else if (local_check == 1 || local_check == 2) - { - // 1: No more records - // 2: No more cached records - - /* - Before fetching more rows and releasing lock(s), - all pending update or delete operations should - be sent to NDB - */ - DBUG_PRINT("info", ("ops_pending: %ld", (long) m_ops_pending)); - if (m_ops_pending) - { - if (m_transaction_on) - { - if (execute_no_commit(this,trans,FALSE) != 0) - DBUG_RETURN(-1); - } - else - { - if (execute_commit(this,trans) != 0) - DBUG_RETURN(-1); - if (trans->restart() != 0) - { - DBUG_ASSERT(0); - DBUG_RETURN(-1); - } - } - m_ops_pending= 0; - } - contact_ndb= (local_check == 2); - } - else - { - DBUG_RETURN(-1); - } - } while (local_check == 2); - - DBUG_RETURN(1); -} - -/** - Get the next record of a started scan. Try to fetch - it locally from NdbApi cached records if possible, - otherwise ask NDB for more. - - @note - If this is a update/delete make sure to not contact - NDB before any pending ops have been sent to NDB. -*/ - -inline int ha_ndbcluster::next_result(uchar *buf) -{ - int res; - DBUG_ENTER("next_result"); - - if (!m_active_cursor) - DBUG_RETURN(HA_ERR_END_OF_FILE); - - if ((res= fetch_next(m_active_cursor)) == 0) - { - DBUG_PRINT("info", ("One more record found")); - - unpack_record(buf); - table->status= 0; - DBUG_RETURN(0); - } - else if (res == 1) - { - // No more records - table->status= STATUS_NOT_FOUND; - - DBUG_PRINT("info", ("No more records")); - DBUG_RETURN(HA_ERR_END_OF_FILE); - } - else - { - DBUG_RETURN(ndb_err(m_active_trans)); - } -} - -/** - Set bounds for ordered index scan. -*/ - -int ha_ndbcluster::set_bounds(NdbIndexScanOperation *op, - uint inx, - bool rir, - const key_range *keys[2], - uint range_no) -{ - const KEY *const key_info= table->key_info + inx; - const uint key_parts= key_info->user_defined_key_parts; - uint key_tot_len[2]; - uint tot_len; - uint i, j; - - DBUG_ENTER("set_bounds"); - DBUG_PRINT("info", ("key_parts=%d", key_parts)); - - for (j= 0; j <= 1; j++) - { - const key_range *key= keys[j]; - if (key != NULL) - { - // for key->flag see ha_rkey_function - DBUG_PRINT("info", ("key %d length=%d flag=%d", - j, key->length, key->flag)); - key_tot_len[j]= key->length; - } - else - { - DBUG_PRINT("info", ("key %d not present", j)); - key_tot_len[j]= 0; - } - } - tot_len= 0; - - for (i= 0; i < key_parts; i++) - { - KEY_PART_INFO *key_part= &key_info->key_part[i]; - Field *field= key_part->field; -#ifndef DBUG_OFF - uint part_len= key_part->length; -#endif - uint part_store_len= key_part->store_length; - // Info about each key part - struct part_st { - bool part_last; - const key_range *key; - const uchar *part_ptr; - bool part_null; - int bound_type; - const uchar* bound_ptr; - }; - struct part_st part[2]; - - for (j= 0; j <= 1; j++) - { - struct part_st &p= part[j]; - p.key= NULL; - p.bound_type= -1; - if (tot_len < key_tot_len[j]) - { - p.part_last= (tot_len + part_store_len >= key_tot_len[j]); - p.key= keys[j]; - p.part_ptr= &p.key->key[tot_len]; - p.part_null= key_part->null_bit && *p.part_ptr; - p.bound_ptr= (const char *) - p.part_null ? 0 : key_part->null_bit ? p.part_ptr + 1 : p.part_ptr; - - if (j == 0) - { - switch (p.key->flag) - { - case HA_READ_KEY_EXACT: - if (! rir) - p.bound_type= NdbIndexScanOperation::BoundEQ; - else // differs for records_in_range - p.bound_type= NdbIndexScanOperation::BoundLE; - break; - // ascending - case HA_READ_KEY_OR_NEXT: - p.bound_type= NdbIndexScanOperation::BoundLE; - break; - case HA_READ_AFTER_KEY: - if (! p.part_last) - p.bound_type= NdbIndexScanOperation::BoundLE; - else - p.bound_type= NdbIndexScanOperation::BoundLT; - break; - // descending - case HA_READ_PREFIX_LAST: // weird - p.bound_type= NdbIndexScanOperation::BoundEQ; - break; - case HA_READ_PREFIX_LAST_OR_PREV: // weird - p.bound_type= NdbIndexScanOperation::BoundGE; - break; - case HA_READ_BEFORE_KEY: - if (! p.part_last) - p.bound_type= NdbIndexScanOperation::BoundGE; - else - p.bound_type= NdbIndexScanOperation::BoundGT; - break; - default: - break; - } - } - if (j == 1) { - switch (p.key->flag) - { - // ascending - case HA_READ_BEFORE_KEY: - if (! p.part_last) - p.bound_type= NdbIndexScanOperation::BoundGE; - else - p.bound_type= NdbIndexScanOperation::BoundGT; - break; - case HA_READ_AFTER_KEY: // weird - p.bound_type= NdbIndexScanOperation::BoundGE; - break; - default: - break; - // descending strangely sets no end key - } - } - - if (p.bound_type == -1) - { - DBUG_PRINT("error", ("key %d unknown flag %d", j, p.key->flag)); - DBUG_ASSERT(FALSE); - // Stop setting bounds but continue with what we have - DBUG_RETURN(op->end_of_bound(range_no)); - } - } - } - - // Seen with e.g. b = 1 and c > 1 - if (part[0].bound_type == NdbIndexScanOperation::BoundLE && - part[1].bound_type == NdbIndexScanOperation::BoundGE && - memcmp(part[0].part_ptr, part[1].part_ptr, part_store_len) == 0) - { - DBUG_PRINT("info", ("replace LE/GE pair by EQ")); - part[0].bound_type= NdbIndexScanOperation::BoundEQ; - part[1].bound_type= -1; - } - // Not seen but was in previous version - if (part[0].bound_type == NdbIndexScanOperation::BoundEQ && - part[1].bound_type == NdbIndexScanOperation::BoundGE && - memcmp(part[0].part_ptr, part[1].part_ptr, part_store_len) == 0) - { - DBUG_PRINT("info", ("remove GE from EQ/GE pair")); - part[1].bound_type= -1; - } - - for (j= 0; j <= 1; j++) - { - struct part_st &p= part[j]; - // Set bound if not done with this key - if (p.key != NULL) - { - DBUG_PRINT("info", ("key %d:%d offset: %d length: %d last: %d bound: %d", - j, i, tot_len, part_len, p.part_last, p.bound_type)); - DBUG_DUMP("info", p.part_ptr, part_store_len); - - // Set bound if not cancelled via type -1 - if (p.bound_type != -1) - { - const uchar* ptr= p.bound_ptr; - uchar buf[256]; - shrink_varchar(field, ptr, buf); - if (op->setBound(i, p.bound_type, ptr)) - ERR_RETURN(op->getNdbError()); - } - } - } - - tot_len+= part_store_len; - } - DBUG_RETURN(op->end_of_bound(range_no)); -} - -/** - Start ordered index scan in NDB. -*/ - -int ha_ndbcluster::ordered_index_scan(const key_range *start_key, - const key_range *end_key, - bool sorted, bool descending, - uchar* buf, part_id_range *part_spec) -{ - int res; - bool restart; - NdbTransaction *trans= m_active_trans; - NdbIndexScanOperation *op; - - DBUG_ENTER("ha_ndbcluster::ordered_index_scan"); - DBUG_PRINT("enter", ("index: %u, sorted: %d, descending: %d", - active_index, sorted, descending)); - DBUG_PRINT("enter", ("Starting new ordered scan on %s", m_tabname)); - m_write_op= FALSE; - - // Check that sorted seems to be initialised - DBUG_ASSERT(sorted == 0 || sorted == 1); - - if (m_active_cursor == 0) - { - restart= FALSE; - NdbOperation::LockMode lm= - (NdbOperation::LockMode)get_ndb_lock_type(m_lock.type); - bool need_pk = (lm == NdbOperation::LM_Read); - if (!(op= trans->getNdbIndexScanOperation(m_index[active_index].index, - m_table)) || - op->readTuples(lm, 0, parallelism, sorted, descending, FALSE, need_pk)) - ERR_RETURN(trans->getNdbError()); - if (m_use_partition_function && part_spec != NULL && - part_spec->start_part == part_spec->end_part) - op->setPartitionId(part_spec->start_part); - m_active_cursor= op; - } else { - restart= TRUE; - op= (NdbIndexScanOperation*)m_active_cursor; - - if (m_use_partition_function && part_spec != NULL && - part_spec->start_part == part_spec->end_part) - op->setPartitionId(part_spec->start_part); - DBUG_ASSERT(op->getSorted() == sorted); - DBUG_ASSERT(op->getLockMode() == - (NdbOperation::LockMode)get_ndb_lock_type(m_lock.type)); - if (op->reset_bounds(m_force_send)) - DBUG_RETURN(ndb_err(m_active_trans)); - } - - { - const key_range *keys[2]= { start_key, end_key }; - res= set_bounds(op, active_index, FALSE, keys); - if (res) - DBUG_RETURN(res); - } - - if (!restart) - { - if (m_cond && m_cond->generate_scan_filter(op)) - DBUG_RETURN(ndb_err(trans)); - - if ((res= define_read_attrs(buf, op))) - { - DBUG_RETURN(res); - } - - // If table has user defined partitioning - // and no primary key, we need to read the partition id - // to support ORDER BY queries - if (m_use_partition_function && - (table_share->primary_key == MAX_KEY) && - (get_ndb_partition_id(op))) - ERR_RETURN(trans->getNdbError()); - } - - if (execute_no_commit(this,trans,FALSE) != 0) - DBUG_RETURN(ndb_err(trans)); - - DBUG_RETURN(next_result(buf)); -} - -static -int -guess_scan_flags(NdbOperation::LockMode lm, - const NDBTAB* tab, const MY_BITMAP* readset) -{ - int flags= 0; - flags|= (lm == NdbOperation::LM_Read) ? NdbScanOperation::SF_KeyInfo : 0; - if (tab->checkColumns(0, 0) & 2) - { - int ret = tab->checkColumns(readset->bitmap, no_bytes_in_map(readset)); - - if (ret & 2) - { // If disk columns...use disk scan - flags |= NdbScanOperation::SF_DiskScan; - } - else if ((ret & 4) == 0 && (lm == NdbOperation::LM_Exclusive)) - { - // If no mem column is set and exclusive...guess disk scan - flags |= NdbScanOperation::SF_DiskScan; - } - } - return flags; -} - - -/* - Unique index scan in NDB (full table scan with scan filter) - */ - -int ha_ndbcluster::unique_index_scan(const KEY* key_info, - const uchar *key, - uint key_len, - uchar *buf) -{ - int res; - NdbScanOperation *op; - NdbTransaction *trans= m_active_trans; - part_id_range part_spec; - - DBUG_ENTER("unique_index_scan"); - DBUG_PRINT("enter", ("Starting new scan on %s", m_tabname)); - - NdbOperation::LockMode lm= - (NdbOperation::LockMode)get_ndb_lock_type(m_lock.type); - int flags= guess_scan_flags(lm, m_table, table->read_set); - if (!(op=trans->getNdbScanOperation((const NDBTAB *) m_table)) || - op->readTuples(lm, flags, parallelism)) - ERR_RETURN(trans->getNdbError()); - m_active_cursor= op; - - if (m_use_partition_function) - { - part_spec.start_part= 0; - part_spec.end_part= m_part_info->get_tot_partitions() - 1; - prune_partition_set(table, &part_spec); - DBUG_PRINT("info", ("part_spec.start_part = %u, part_spec.end_part = %u", - part_spec.start_part, part_spec.end_part)); - /* - If partition pruning has found no partition in set - we can return HA_ERR_END_OF_FILE - If partition pruning has found exactly one partition in set - we can optimize scan to run towards that partition only. - */ - if (part_spec.start_part > part_spec.end_part) - { - DBUG_RETURN(HA_ERR_END_OF_FILE); - } - else if (part_spec.start_part == part_spec.end_part) - { - /* - Only one partition is required to scan, if sorted is required we - don't need it any more since output from one ordered partitioned - index is always sorted. - */ - m_active_cursor->setPartitionId(part_spec.start_part); - } - // If table has user defined partitioning - // and no primary key, we need to read the partition id - // to support ORDER BY queries - if ((table_share->primary_key == MAX_KEY) && - (get_ndb_partition_id(op))) - ERR_RETURN(trans->getNdbError()); - } - if (!m_cond) - m_cond= new ha_ndbcluster_cond; - if (!m_cond) - { - my_errno= HA_ERR_OUT_OF_MEM; - DBUG_RETURN(my_errno); - } - if (m_cond->generate_scan_filter_from_key(op, key_info, key, key_len, buf)) - DBUG_RETURN(ndb_err(trans)); - if ((res= define_read_attrs(buf, op))) - DBUG_RETURN(res); - - if (execute_no_commit(this,trans,FALSE) != 0) - DBUG_RETURN(ndb_err(trans)); - DBUG_PRINT("exit", ("Scan started successfully")); - DBUG_RETURN(next_result(buf)); -} - - -/** - Start full table scan in NDB. -*/ -int ha_ndbcluster::full_table_scan(uchar *buf) -{ - int res; - NdbScanOperation *op; - NdbTransaction *trans= m_active_trans; - part_id_range part_spec; - - DBUG_ENTER("full_table_scan"); - DBUG_PRINT("enter", ("Starting new scan on %s", m_tabname)); - m_write_op= FALSE; - - NdbOperation::LockMode lm= - (NdbOperation::LockMode)get_ndb_lock_type(m_lock.type); - int flags= guess_scan_flags(lm, m_table, table->read_set); - if (!(op=trans->getNdbScanOperation(m_table)) || - op->readTuples(lm, flags, parallelism)) - ERR_RETURN(trans->getNdbError()); - m_active_cursor= op; - - if (m_use_partition_function) - { - part_spec.start_part= 0; - part_spec.end_part= m_part_info->get_tot_partitions() - 1; - prune_partition_set(table, &part_spec); - DBUG_PRINT("info", ("part_spec.start_part: %u part_spec.end_part: %u", - part_spec.start_part, part_spec.end_part)); - /* - If partition pruning has found no partition in set - we can return HA_ERR_END_OF_FILE - If partition pruning has found exactly one partition in set - we can optimize scan to run towards that partition only. - */ - if (part_spec.start_part > part_spec.end_part) - { - DBUG_RETURN(HA_ERR_END_OF_FILE); - } - else if (part_spec.start_part == part_spec.end_part) - { - /* - Only one partition is required to scan, if sorted is required we - don't need it any more since output from one ordered partitioned - index is always sorted. - */ - m_active_cursor->setPartitionId(part_spec.start_part); - } - // If table has user defined partitioning - // and no primary key, we need to read the partition id - // to support ORDER BY queries - if ((table_share->primary_key == MAX_KEY) && - (get_ndb_partition_id(op))) - ERR_RETURN(trans->getNdbError()); - } - - if (m_cond && m_cond->generate_scan_filter(op)) - DBUG_RETURN(ndb_err(trans)); - if ((res= define_read_attrs(buf, op))) - DBUG_RETURN(res); - - if (execute_no_commit(this,trans,FALSE) != 0) - DBUG_RETURN(ndb_err(trans)); - DBUG_PRINT("exit", ("Scan started successfully")); - DBUG_RETURN(next_result(buf)); -} - -int -ha_ndbcluster::set_auto_inc(Field *field) -{ - DBUG_ENTER("ha_ndbcluster::set_auto_inc"); - Ndb *ndb= get_ndb(); - bool read_bit= bitmap_is_set(table->read_set, field->field_index); - bitmap_set_bit(table->read_set, field->field_index); - Uint64 next_val= (Uint64) field->val_int() + 1; - if (!read_bit) - bitmap_clear_bit(table->read_set, field->field_index); -#ifndef DBUG_OFF - char buff[22]; - DBUG_PRINT("info", - ("Trying to set next auto increment value to %s", - llstr(next_val, buff))); -#endif - if (ndb->checkUpdateAutoIncrementValue(m_share->tuple_id_range, next_val)) - { - Ndb_tuple_id_range_guard g(m_share); - if (ndb->setAutoIncrementValue(m_table, g.range, next_val, TRUE) - == -1) - ERR_RETURN(ndb->getNdbError()); - } - DBUG_RETURN(0); -} - -/** - Insert one record into NDB. -*/ -int ha_ndbcluster::write_row(uchar *record) -{ - bool has_auto_increment; - uint i; - NdbTransaction *trans= m_active_trans; - NdbOperation *op; - int res; - THD *thd= table->in_use; - longlong func_value= 0; - DBUG_ENTER("ha_ndbcluster::write_row"); - - m_write_op= TRUE; - has_auto_increment= (table->next_number_field && record == table->record[0]); - if (table_share->primary_key != MAX_KEY) - { - /* - * Increase any auto_incremented primary key - */ - if (has_auto_increment) - { - int error; - - m_skip_auto_increment= FALSE; - if ((error= update_auto_increment())) - DBUG_RETURN(error); - m_skip_auto_increment= (insert_id_for_cur_row == 0); - } - } - - /* - * If IGNORE the ignore constraint violations on primary and unique keys - */ - if (!m_use_write && m_ignore_dup_key) - { - /* - compare if expression with that in start_bulk_insert() - start_bulk_insert will set parameters to ensure that each - write_row is committed individually - */ - int peek_res= peek_indexed_rows(record, NDB_INSERT); - - if (!peek_res) - { - DBUG_RETURN(HA_ERR_FOUND_DUPP_KEY); - } - if (peek_res != HA_ERR_KEY_NOT_FOUND) - DBUG_RETURN(peek_res); - } - - ha_statistic_increment(&SSV::ha_write_count); - - if (!(op= trans->getNdbOperation(m_table))) - ERR_RETURN(trans->getNdbError()); - - res= (m_use_write) ? op->writeTuple() :op->insertTuple(); - if (res != 0) - ERR_RETURN(trans->getNdbError()); - - if (m_use_partition_function) - { - uint32 part_id; - int error; - my_bitmap_map *old_map= dbug_tmp_use_all_columns(table, table->read_set); - error= m_part_info->get_partition_id(m_part_info, &part_id, &func_value); - dbug_tmp_restore_column_map(table->read_set, old_map); - if (error) - { - m_part_info->err_value= func_value; - DBUG_RETURN(error); - } - op->setPartitionId(part_id); - } - - if (table_share->primary_key == MAX_KEY) - { - // Table has hidden primary key - Ndb *ndb= get_ndb(); - Uint64 auto_value; - uint retries= NDB_AUTO_INCREMENT_RETRIES; - int retry_sleep= 30; /* 30 milliseconds, transaction */ - for (;;) - { - Ndb_tuple_id_range_guard g(m_share); - if (ndb->getAutoIncrementValue(m_table, g.range, auto_value, 1) == -1) - { - if (--retries && - ndb->getNdbError().status == NdbError::TemporaryError) - { - my_sleep(retry_sleep); - continue; - } - ERR_RETURN(ndb->getNdbError()); - } - break; - } - if (set_hidden_key(op, table_share->fields, (const uchar*)&auto_value)) - ERR_RETURN(op->getNdbError()); - } - else - { - int error; - if ((error= set_primary_key_from_record(op, record))) - DBUG_RETURN(error); - } - - // Set non-key attribute(s) - bool set_blob_value= FALSE; - my_bitmap_map *old_map= dbug_tmp_use_all_columns(table, table->read_set); - for (i= 0; i < table_share->fields; i++) - { - Field *field= table->field[i]; - if (!(field->flags & PRI_KEY_FLAG) && - (bitmap_is_set(table->write_set, i) || !m_use_write) && - set_ndb_value(op, field, i, record-table->record[0], &set_blob_value)) - { - m_skip_auto_increment= TRUE; - dbug_tmp_restore_column_map(table->read_set, old_map); - ERR_RETURN(op->getNdbError()); - } - } - dbug_tmp_restore_column_map(table->read_set, old_map); - - if (m_use_partition_function) - { - /* - We need to set the value of the partition function value in - NDB since the NDB kernel doesn't have easy access to the function - to calculate the value. - */ - if (func_value >= INT_MAX32) - func_value= INT_MAX32; - uint32 part_func_value= (uint32)func_value; - uint no_fields= table_share->fields; - if (table_share->primary_key == MAX_KEY) - no_fields++; - op->setValue(no_fields, part_func_value); - } - - if (unlikely(m_slow_path)) - { - /* - ignore TNTO_NO_LOGGING for slave thd. It is used to indicate - log-slave-updates option. This is instead handled in the - injector thread, by looking explicitly at the - opt_log_slave_updates flag. - */ - Thd_ndb *thd_ndb= get_thd_ndb(thd); - if (thd->slave_thread) - op->setAnyValue(thd->server_id); - else if (thd_ndb->trans_options & TNTO_NO_LOGGING) - op->setAnyValue(NDB_ANYVALUE_FOR_NOLOGGING); - } - m_rows_changed++; - - /* - Execute write operation - NOTE When doing inserts with many values in - each INSERT statement it should not be necessary - to NoCommit the transaction between each row. - Find out how this is detected! - */ - m_rows_inserted++; - no_uncommitted_rows_update(1); - m_bulk_insert_not_flushed= TRUE; - if ((m_rows_to_insert == (ha_rows) 1) || - ((m_rows_inserted % m_bulk_insert_rows) == 0) || - m_primary_key_update || - set_blob_value) - { - // Send rows to NDB - DBUG_PRINT("info", ("Sending inserts to NDB, "\ - "rows_inserted: %d bulk_insert_rows: %d", - (int)m_rows_inserted, (int)m_bulk_insert_rows)); - - m_bulk_insert_not_flushed= FALSE; - if (m_transaction_on) - { - if (execute_no_commit(this,trans,FALSE) != 0) - { - m_skip_auto_increment= TRUE; - no_uncommitted_rows_execute_failure(); - DBUG_RETURN(ndb_err(trans)); - } - } - else - { - if (execute_commit(this,trans) != 0) - { - m_skip_auto_increment= TRUE; - no_uncommitted_rows_execute_failure(); - DBUG_RETURN(ndb_err(trans)); - } - if (trans->restart() != 0) - { - DBUG_ASSERT(0); - DBUG_RETURN(-1); - } - } - } - if ((has_auto_increment) && (m_skip_auto_increment)) - { - int ret_val; - if ((ret_val= set_auto_inc(table->next_number_field))) - { - DBUG_RETURN(ret_val); - } - } - m_skip_auto_increment= TRUE; - - DBUG_PRINT("exit",("ok")); - DBUG_RETURN(0); -} - - -/** - Compare if a key in a row has changed. -*/ - -int ha_ndbcluster::key_cmp(uint keynr, const uchar * old_row, - const uchar * new_row) -{ - KEY_PART_INFO *key_part=table->key_info[keynr].key_part; - KEY_PART_INFO *end=key_part+table->key_info[keynr].key_parts; - - for (; key_part != end ; key_part++) - { - if (key_part->null_bit) - { - if ((old_row[key_part->null_offset] & key_part->null_bit) != - (new_row[key_part->null_offset] & key_part->null_bit)) - return 1; - } - if (key_part->key_part_flag & (HA_BLOB_PART | HA_VAR_LENGTH_PART)) - { - - if (key_part->field->cmp_binary((old_row + key_part->offset), - (new_row + key_part->offset), - (ulong) key_part->length)) - return 1; - } - else - { - if (memcmp(old_row+key_part->offset, new_row+key_part->offset, - key_part->length)) - return 1; - } - } - return 0; -} - -/** - Update one record in NDB using primary key. -*/ - -int ha_ndbcluster::update_row(const uchar *old_data, uchar *new_data) -{ - THD *thd= table->in_use; - NdbTransaction *trans= m_active_trans; - NdbScanOperation* cursor= m_active_cursor; - NdbOperation *op; - uint i; - uint32 old_part_id= 0, new_part_id= 0; - int error; - longlong func_value; - bool pk_update= (table_share->primary_key != MAX_KEY && - key_cmp(table_share->primary_key, old_data, new_data)); - DBUG_ENTER("update_row"); - m_write_op= TRUE; - - /* - * If IGNORE the ignore constraint violations on primary and unique keys, - * but check that it is not part of INSERT ... ON DUPLICATE KEY UPDATE - */ - if (m_ignore_dup_key && (thd->lex->sql_command == SQLCOM_UPDATE || - thd->lex->sql_command == SQLCOM_UPDATE_MULTI)) - { - NDB_WRITE_OP write_op= (pk_update) ? NDB_PK_UPDATE : NDB_UPDATE; - int peek_res= peek_indexed_rows(new_data, write_op); - - if (!peek_res) - { - DBUG_RETURN(HA_ERR_FOUND_DUPP_KEY); - } - if (peek_res != HA_ERR_KEY_NOT_FOUND) - DBUG_RETURN(peek_res); - } - - ha_statistic_increment(&SSV::ha_update_count); - - if (m_use_partition_function && - (error= get_parts_for_update(old_data, new_data, table->record[0], - m_part_info, &old_part_id, &new_part_id, - &func_value))) - { - m_part_info->err_value= func_value; - DBUG_RETURN(error); - } - - /* - * Check for update of primary key or partition change - * for special handling - */ - if (pk_update || old_part_id != new_part_id) - { - int read_res, insert_res, delete_res, undo_res; - - DBUG_PRINT("info", ("primary key update or partition change, " - "doing read+delete+insert")); - // Get all old fields, since we optimize away fields not in query - read_res= complemented_read(old_data, new_data, old_part_id); - if (read_res) - { - DBUG_PRINT("info", ("read failed")); - DBUG_RETURN(read_res); - } - // Delete old row - m_primary_key_update= TRUE; - delete_res= delete_row(old_data); - m_primary_key_update= FALSE; - if (delete_res) - { - DBUG_PRINT("info", ("delete failed")); - DBUG_RETURN(delete_res); - } - // Insert new row - DBUG_PRINT("info", ("delete succeded")); - m_primary_key_update= TRUE; - /* - If we are updating a primary key with auto_increment - then we need to update the auto_increment counter - */ - if (table->found_next_number_field && - bitmap_is_set(table->write_set, - table->found_next_number_field->field_index) && - (error= set_auto_inc(table->found_next_number_field))) - { - DBUG_RETURN(error); - } - insert_res= write_row(new_data); - m_primary_key_update= FALSE; - if (insert_res) - { - DBUG_PRINT("info", ("insert failed")); - if (trans->commitStatus() == NdbConnection::Started) - { - // Undo delete_row(old_data) - m_primary_key_update= TRUE; - undo_res= write_row((uchar *)old_data); - if (undo_res) - push_warning(current_thd, - Sql_condition::WARN_LEVEL_WARN, - undo_res, - "NDB failed undoing delete at primary key update"); - m_primary_key_update= FALSE; - } - DBUG_RETURN(insert_res); - } - DBUG_PRINT("info", ("delete+insert succeeded")); - DBUG_RETURN(0); - } - /* - If we are updating a unique key with auto_increment - then we need to update the auto_increment counter - */ - if (table->found_next_number_field && - bitmap_is_set(table->write_set, - table->found_next_number_field->field_index) && - (error= set_auto_inc(table->found_next_number_field))) - { - DBUG_RETURN(error); - } - if (cursor) - { - /* - We are scanning records and want to update the record - that was just found, call updateTuple on the cursor - to take over the lock to a new update operation - And thus setting the primary key of the record from - the active record in cursor - */ - DBUG_PRINT("info", ("Calling updateTuple on cursor")); - if (!(op= cursor->updateCurrentTuple())) - ERR_RETURN(trans->getNdbError()); - m_lock_tuple= FALSE; - m_ops_pending++; - if (uses_blob_value()) - m_blobs_pending= TRUE; - if (m_use_partition_function) - cursor->setPartitionId(new_part_id); - } - else - { - if (!(op= trans->getNdbOperation(m_table)) || - op->updateTuple() != 0) - ERR_RETURN(trans->getNdbError()); - - if (m_use_partition_function) - op->setPartitionId(new_part_id); - if (table_share->primary_key == MAX_KEY) - { - // This table has no primary key, use "hidden" primary key - DBUG_PRINT("info", ("Using hidden key")); - - // Require that the PK for this record has previously been - // read into m_ref - DBUG_DUMP("key", m_ref, NDB_HIDDEN_PRIMARY_KEY_LENGTH); - - if (set_hidden_key(op, table->s->fields, m_ref)) - ERR_RETURN(op->getNdbError()); - } - else - { - int res; - if ((res= set_primary_key_from_record(op, old_data))) - DBUG_RETURN(res); - } - } - - m_rows_changed++; - - // Set non-key attribute(s) - my_bitmap_map *old_map= dbug_tmp_use_all_columns(table, table->read_set); - for (i= 0; i < table_share->fields; i++) - { - Field *field= table->field[i]; - if (bitmap_is_set(table->write_set, i) && - (!(field->flags & PRI_KEY_FLAG)) && - set_ndb_value(op, field, i, new_data - table->record[0])) - { - dbug_tmp_restore_column_map(table->read_set, old_map); - ERR_RETURN(op->getNdbError()); - } - } - dbug_tmp_restore_column_map(table->read_set, old_map); - - if (m_use_partition_function) - { - if (func_value >= INT_MAX32) - func_value= INT_MAX32; - uint32 part_func_value= (uint32)func_value; - uint no_fields= table_share->fields; - if (table_share->primary_key == MAX_KEY) - no_fields++; - op->setValue(no_fields, part_func_value); - } - - if (unlikely(m_slow_path)) - { - /* - ignore TNTO_NO_LOGGING for slave thd. It is used to indicate - log-slave-updates option. This is instead handled in the - injector thread, by looking explicitly at the - opt_log_slave_updates flag. - */ - Thd_ndb *thd_ndb= get_thd_ndb(thd); - if (thd->slave_thread) - op->setAnyValue(thd->server_id); - else if (thd_ndb->trans_options & TNTO_NO_LOGGING) - op->setAnyValue(NDB_ANYVALUE_FOR_NOLOGGING); - } - /* - Execute update operation if we are not doing a scan for update - and there exist UPDATE AFTER triggers - */ - - if ((!cursor || m_update_cannot_batch) && - execute_no_commit(this,trans,false) != 0) { - no_uncommitted_rows_execute_failure(); - DBUG_RETURN(ndb_err(trans)); - } - - DBUG_RETURN(0); -} - - -/** - Delete one record from NDB, using primary key . -*/ - -int ha_ndbcluster::delete_row(const uchar *record) -{ - THD *thd= table->in_use; - NdbTransaction *trans= m_active_trans; - NdbScanOperation* cursor= m_active_cursor; - NdbOperation *op; - uint32 part_id; - int error; - DBUG_ENTER("delete_row"); - m_write_op= TRUE; - - ha_statistic_increment(&SSV::ha_delete_count); - m_rows_changed++; - - if (m_use_partition_function && - (error= get_part_for_delete(record, table->record[0], m_part_info, - &part_id))) - { - DBUG_RETURN(error); - } - - if (cursor) - { - /* - We are scanning records and want to delete the record - that was just found, call deleteTuple on the cursor - to take over the lock to a new delete operation - And thus setting the primary key of the record from - the active record in cursor - */ - DBUG_PRINT("info", ("Calling deleteTuple on cursor")); - if (cursor->deleteCurrentTuple() != 0) - ERR_RETURN(trans->getNdbError()); - m_lock_tuple= FALSE; - m_ops_pending++; - - if (m_use_partition_function) - cursor->setPartitionId(part_id); - - no_uncommitted_rows_update(-1); - - if (unlikely(m_slow_path)) - { - /* - ignore TNTO_NO_LOGGING for slave thd. It is used to indicate - log-slave-updates option. This is instead handled in the - injector thread, by looking explicitly at the - opt_log_slave_updates flag. - */ - Thd_ndb *thd_ndb= get_thd_ndb(thd); - if (thd->slave_thread) - ((NdbOperation *)trans->getLastDefinedOperation())-> - setAnyValue(thd->server_id); - else if (thd_ndb->trans_options & TNTO_NO_LOGGING) - ((NdbOperation *)trans->getLastDefinedOperation())-> - setAnyValue(NDB_ANYVALUE_FOR_NOLOGGING); - } - if (!(m_primary_key_update || m_delete_cannot_batch)) - // If deleting from cursor, NoCommit will be handled in next_result - DBUG_RETURN(0); - } - else - { - - if (!(op=trans->getNdbOperation(m_table)) || - op->deleteTuple() != 0) - ERR_RETURN(trans->getNdbError()); - - if (m_use_partition_function) - op->setPartitionId(part_id); - - no_uncommitted_rows_update(-1); - - if (table_share->primary_key == MAX_KEY) - { - // This table has no primary key, use "hidden" primary key - DBUG_PRINT("info", ("Using hidden key")); - - if (set_hidden_key(op, table->s->fields, m_ref)) - ERR_RETURN(op->getNdbError()); - } - else - { - if ((error= set_primary_key_from_record(op, record))) - DBUG_RETURN(error); - } - - if (unlikely(m_slow_path)) - { - /* - ignore TNTO_NO_LOGGING for slave thd. It is used to indicate - log-slave-updates option. This is instead handled in the - injector thread, by looking explicitly at the - opt_log_slave_updates flag. - */ - Thd_ndb *thd_ndb= get_thd_ndb(thd); - if (thd->slave_thread) - op->setAnyValue(thd->server_id); - else if (thd_ndb->trans_options & TNTO_NO_LOGGING) - op->setAnyValue(NDB_ANYVALUE_FOR_NOLOGGING); - } - } - - // Execute delete operation - if (execute_no_commit(this,trans,FALSE) != 0) { - no_uncommitted_rows_execute_failure(); - DBUG_RETURN(ndb_err(trans)); - } - DBUG_RETURN(0); -} - -/** - Unpack a record read from NDB. - - @param buf Buffer to store read row - - @note - The data for each row is read directly into the - destination buffer. This function is primarily - called in order to check if any fields should be - set to null. -*/ - -void ndb_unpack_record(TABLE *table, NdbValue *value, - MY_BITMAP *defined, uchar *buf) -{ - Field **p_field= table->field, *field= *p_field; - my_ptrdiff_t row_offset= (my_ptrdiff_t) (buf - table->record[0]); - my_bitmap_map *old_map= dbug_tmp_use_all_columns(table, table->write_set); - DBUG_ENTER("ndb_unpack_record"); - - /* - Set the filler bits of the null byte, since they are - not touched in the code below. - - The filler bits are the MSBs in the last null byte - */ - if (table->s->null_bytes > 0) - buf[table->s->null_bytes - 1]|= 256U - (1U << - table->s->last_null_bit_pos); - /* - Set null flag(s) - */ - for ( ; field; - p_field++, value++, field= *p_field) - { - field->set_notnull(row_offset); - if ((*value).ptr) - { - if (!(field->flags & BLOB_FLAG)) - { - int is_null= (*value).rec->isNULL(); - if (is_null) - { - if (is_null > 0) - { - DBUG_PRINT("info",("[%u] NULL", - (*value).rec->getColumn()->getColumnNo())); - field->set_null(row_offset); - } - else - { - DBUG_PRINT("info",("[%u] UNDEFINED", - (*value).rec->getColumn()->getColumnNo())); - bitmap_clear_bit(defined, - (*value).rec->getColumn()->getColumnNo()); - } - } - else if (field->type() == MYSQL_TYPE_BIT) - { - Field_bit *field_bit= static_cast<Field_bit*>(field); - - /* - Move internal field pointer to point to 'buf'. Calling - the correct member function directly since we know the - type of the object. - */ - field_bit->Field_bit::move_field_offset(row_offset); - if (field->pack_length() < 5) - { - DBUG_PRINT("info", ("bit field H'%.8X", - (*value).rec->u_32_value())); - field_bit->Field_bit::store((longlong) (*value).rec->u_32_value(), - FALSE); - } - else - { - DBUG_PRINT("info", ("bit field H'%.8X%.8X", - *(Uint32 *)(*value).rec->aRef(), - *((Uint32 *)(*value).rec->aRef()+1))); -#ifdef WORDS_BIGENDIAN - /* lsw is stored first */ - Uint32 *buf= (Uint32 *)(*value).rec->aRef(); - field_bit->Field_bit::store((((longlong)*buf) - & 0x000000000FFFFFFFFLL) - | - ((((longlong)*(buf+1)) << 32) - & 0xFFFFFFFF00000000LL), - TRUE); -#else - field_bit->Field_bit::store((longlong) - (*value).rec->u_64_value(), TRUE); -#endif - } - /* - Move back internal field pointer to point to original - value (usually record[0]). - */ - field_bit->Field_bit::move_field_offset(-row_offset); - DBUG_PRINT("info",("[%u] SET", - (*value).rec->getColumn()->getColumnNo())); - DBUG_DUMP("info", field->ptr, field->pack_length()); - } - else - { - DBUG_PRINT("info",("[%u] SET", - (*value).rec->getColumn()->getColumnNo())); - DBUG_DUMP("info", field->ptr, field->pack_length()); - } - } - else - { - NdbBlob *ndb_blob= (*value).blob; - uint col_no = ndb_blob->getColumn()->getColumnNo(); - int isNull; - ndb_blob->getDefined(isNull); - if (isNull == 1) - { - DBUG_PRINT("info",("[%u] NULL", col_no)); - field->set_null(row_offset); - } - else if (isNull == -1) - { - DBUG_PRINT("info",("[%u] UNDEFINED", col_no)); - bitmap_clear_bit(defined, col_no); - } - else - { -#ifndef DBUG_OFF - // pointer vas set in get_ndb_blobs_value - Field_blob *field_blob= (Field_blob*)field; - uchar *ptr; - field_blob->get_ptr(&ptr, row_offset); - uint32 len= field_blob->get_length(row_offset); - DBUG_PRINT("info",("[%u] SET ptr: 0x%lx len: %u", - col_no, (long) ptr, len)); -#endif - } - } - } - } - dbug_tmp_restore_column_map(table->write_set, old_map); - DBUG_VOID_RETURN; -} - -void ha_ndbcluster::unpack_record(uchar *buf) -{ - ndb_unpack_record(table, m_value, 0, buf); -#ifndef DBUG_OFF - // Read and print all values that was fetched - if (table_share->primary_key == MAX_KEY) - { - // Table with hidden primary key - int hidden_no= table_share->fields; - const NDBTAB *tab= m_table; - char buff[22]; - const NDBCOL *hidden_col= tab->getColumn(hidden_no); - const NdbRecAttr* rec= m_value[hidden_no].rec; - DBUG_ASSERT(rec); - DBUG_PRINT("hidden", ("%d: %s \"%s\"", hidden_no, - hidden_col->getName(), - llstr(rec->u_64_value(), buff))); - } - //DBUG_EXECUTE("value", print_results();); -#endif -} - -/** - Utility function to print/dump the fetched field. - - To avoid unnecessary work, wrap in DBUG_EXECUTE as in: - DBUG_EXECUTE("value", print_results();); -*/ - -void ha_ndbcluster::print_results() -{ - DBUG_ENTER("print_results"); - -#ifndef DBUG_OFF - - char buf_type[MAX_FIELD_WIDTH], buf_val[MAX_FIELD_WIDTH]; - String type(buf_type, sizeof(buf_type), &my_charset_bin); - String val(buf_val, sizeof(buf_val), &my_charset_bin); - for (uint f= 0; f < table_share->fields; f++) - { - /* Use DBUG_PRINT since DBUG_FILE cannot be filtered out */ - char buf[2000]; - Field *field; - void* ptr; - NdbValue value; - - buf[0]= 0; - field= table->field[f]; - if (!(value= m_value[f]).ptr) - { - strmov(buf, "not read"); - goto print_value; - } - - ptr= field->ptr; - - if (! (field->flags & BLOB_FLAG)) - { - if (value.rec->isNULL()) - { - strmov(buf, "NULL"); - goto print_value; - } - type.length(0); - val.length(0); - field->sql_type(type); - field->val_str(&val); - my_snprintf(buf, sizeof(buf), "%s %s", type.c_ptr(), val.c_ptr()); - } - else - { - NdbBlob *ndb_blob= value.blob; - bool isNull= TRUE; - ndb_blob->getNull(isNull); - if (isNull) - strmov(buf, "NULL"); - } - -print_value: - DBUG_PRINT("value", ("%u,%s: %s", f, field->field_name, buf)); - } -#endif - DBUG_VOID_RETURN; -} - - -int ha_ndbcluster::index_init(uint index, bool sorted) -{ - DBUG_ENTER("ha_ndbcluster::index_init"); - DBUG_PRINT("enter", ("index: %u sorted: %d", index, sorted)); - active_index= index; - m_sorted= sorted; - /* - Locks are are explicitly released in scan - unless m_lock.type == TL_READ_HIGH_PRIORITY - and no sub-sequent call to unlock_row() - */ - m_lock_tuple= FALSE; - DBUG_RETURN(0); -} - - -int ha_ndbcluster::index_end() -{ - DBUG_ENTER("ha_ndbcluster::index_end"); - DBUG_RETURN(close_scan()); -} - -/** - Check if key contains null. -*/ -static -int -check_null_in_key(const KEY* key_info, const uchar *key, uint key_len) -{ - KEY_PART_INFO *curr_part, *end_part; - const uchar* end_ptr= key + key_len; - curr_part= key_info->key_part; - end_part= curr_part + key_info->user_defined_key_parts; - - for (; curr_part != end_part && key < end_ptr; curr_part++) - { - if (curr_part->null_bit && *key) - return 1; - - key += curr_part->store_length; - } - return 0; -} - -int ha_ndbcluster::index_read(uchar *buf, - const uchar *key, uint key_len, - enum ha_rkey_function find_flag) -{ - key_range start_key; - bool descending= FALSE; - int rc; - DBUG_ENTER("ha_ndbcluster::index_read"); - DBUG_PRINT("enter", ("active_index: %u, key_len: %u, find_flag: %d", - active_index, key_len, find_flag)); - MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str); - - start_key.key= key; - start_key.length= key_len; - start_key.flag= find_flag; - descending= FALSE; - switch (find_flag) { - case HA_READ_KEY_OR_PREV: - case HA_READ_BEFORE_KEY: - case HA_READ_PREFIX_LAST: - case HA_READ_PREFIX_LAST_OR_PREV: - descending= TRUE; - break; - default: - break; - } - rc= read_range_first_to_buf(&start_key, 0, descending, - m_sorted, buf); - MYSQL_INDEX_READ_ROW_DONE(rc); - DBUG_RETURN(rc); -} - - -int ha_ndbcluster::index_next(uchar *buf) -{ - int rc; - DBUG_ENTER("ha_ndbcluster::index_next"); - MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str); - ha_statistic_increment(&SSV::ha_read_next_count); - rc= next_result(buf); - MYSQL_INDEX_READ_ROW_DONE(rc); - DBUG_RETURN(rc); -} - - -int ha_ndbcluster::index_prev(uchar *buf) -{ - int rc; - DBUG_ENTER("ha_ndbcluster::index_prev"); - MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str); - ha_statistic_increment(&SSV::ha_read_prev_count); - rc= next_result(buf); - MYSQL_INDEX_READ_ROW_DONE(rc); - DBUG_RETURN(rc); -} - - -int ha_ndbcluster::index_first(uchar *buf) -{ - int rc; - DBUG_ENTER("ha_ndbcluster::index_first"); - MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str); - ha_statistic_increment(&SSV::ha_read_first_count); - // Start the ordered index scan and fetch the first row - - // Only HA_READ_ORDER indexes get called by index_first - rc= ordered_index_scan(0, 0, TRUE, FALSE, buf, NULL); - MYSQL_INDEX_READ_ROW_DONE(rc); - DBUG_RETURN(rc); -} - - -int ha_ndbcluster::index_last(uchar *buf) -{ - int rc; - DBUG_ENTER("ha_ndbcluster::index_last"); - MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str); - ha_statistic_increment(&SSV::ha_read_last_count); - rc= ordered_index_scan(0, 0, TRUE, TRUE, buf, NULL); - MYSQL_INDEX_READ_ROW_DONE(rc); - DBUG_RETURN(rc); -} - -int ha_ndbcluster::index_read_last(uchar * buf, const uchar * key, uint key_len) -{ - DBUG_ENTER("ha_ndbcluster::index_read_last"); - DBUG_RETURN(index_read(buf, key, key_len, HA_READ_PREFIX_LAST)); -} - -int ha_ndbcluster::read_range_first_to_buf(const key_range *start_key, - const key_range *end_key, - bool desc, bool sorted, - uchar* buf) -{ - part_id_range part_spec; - ndb_index_type type= get_index_type(active_index); - const KEY* key_info= table->key_info+active_index; - int error; - DBUG_ENTER("ha_ndbcluster::read_range_first_to_buf"); - DBUG_PRINT("info", ("desc: %d, sorted: %d", desc, sorted)); - - if (m_use_partition_function) - { - get_partition_set(table, buf, active_index, start_key, &part_spec); - DBUG_PRINT("info", ("part_spec.start_part: %u part_spec.end_part: %u", - part_spec.start_part, part_spec.end_part)); - /* - If partition pruning has found no partition in set - we can return HA_ERR_END_OF_FILE - If partition pruning has found exactly one partition in set - we can optimize scan to run towards that partition only. - */ - if (part_spec.start_part > part_spec.end_part) - { - DBUG_RETURN(HA_ERR_END_OF_FILE); - } - else if (part_spec.start_part == part_spec.end_part) - { - /* - Only one partition is required to scan, if sorted is required we - don't need it any more since output from one ordered partitioned - index is always sorted. - */ - sorted= FALSE; - } - } - - m_write_op= FALSE; - switch (type){ - case PRIMARY_KEY_ORDERED_INDEX: - case PRIMARY_KEY_INDEX: - if (start_key && - start_key->length == key_info->key_length && - start_key->flag == HA_READ_KEY_EXACT) - { - if (m_active_cursor && (error= close_scan())) - DBUG_RETURN(error); - error= pk_read(start_key->key, start_key->length, buf, - part_spec.start_part); - DBUG_RETURN(error == HA_ERR_KEY_NOT_FOUND ? HA_ERR_END_OF_FILE : error); - } - break; - case UNIQUE_ORDERED_INDEX: - case UNIQUE_INDEX: - if (start_key && start_key->length == key_info->key_length && - start_key->flag == HA_READ_KEY_EXACT && - !check_null_in_key(key_info, start_key->key, start_key->length)) - { - if (m_active_cursor && (error= close_scan())) - DBUG_RETURN(error); - - error= unique_index_read(start_key->key, start_key->length, buf); - DBUG_RETURN(error == HA_ERR_KEY_NOT_FOUND ? HA_ERR_END_OF_FILE : error); - } - else if (type == UNIQUE_INDEX) - DBUG_RETURN(unique_index_scan(key_info, - start_key->key, - start_key->length, - buf)); - break; - default: - break; - } - // Start the ordered index scan and fetch the first row - DBUG_RETURN(ordered_index_scan(start_key, end_key, sorted, desc, buf, - &part_spec)); -} - -int ha_ndbcluster::read_range_first(const key_range *start_key, - const key_range *end_key, - bool eq_r, bool sorted) -{ - int rc; - uchar* buf= table->record[0]; - DBUG_ENTER("ha_ndbcluster::read_range_first"); - MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str); - rc= read_range_first_to_buf(start_key, end_key, FALSE, - sorted, buf); - MYSQL_INDEX_READ_ROW_DONE(rc); - DBUG_RETURN(rc); -} - -int ha_ndbcluster::read_range_next() -{ - int rc; - DBUG_ENTER("ha_ndbcluster::read_range_next"); - MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str); - rc= next_result(table->record[0]); - MYSQL_INDEX_READ_ROW_DONE(rc); - DBUG_RETURN(rc); -} - - -int ha_ndbcluster::rnd_init(bool scan) -{ - NdbScanOperation *cursor= m_active_cursor; - DBUG_ENTER("rnd_init"); - DBUG_PRINT("enter", ("scan: %d", scan)); - // Check if scan is to be restarted - if (cursor) - { - if (!scan) - DBUG_RETURN(1); - if (cursor->restart(m_force_send) != 0) - { - DBUG_ASSERT(0); - DBUG_RETURN(-1); - } - } - index_init(table_share->primary_key, 0); - DBUG_RETURN(0); -} - -int ha_ndbcluster::close_scan() -{ - NdbTransaction *trans= m_active_trans; - DBUG_ENTER("close_scan"); - - m_multi_cursor= 0; - if (!m_active_cursor && !m_multi_cursor) - DBUG_RETURN(0); - - NdbScanOperation *cursor= m_active_cursor ? m_active_cursor : m_multi_cursor; - - if (m_lock_tuple) - { - /* - Lock level m_lock.type either TL_WRITE_ALLOW_WRITE - (SELECT FOR UPDATE) or TL_READ_WITH_SHARED_LOCKS (SELECT - LOCK WITH SHARE MODE) and row was not explictly unlocked - with unlock_row() call - */ - NdbOperation *op; - // Lock row - DBUG_PRINT("info", ("Keeping lock on scanned row")); - - if (!(op= cursor->lockCurrentTuple())) - { - m_lock_tuple= FALSE; - ERR_RETURN(trans->getNdbError()); - } - m_ops_pending++; - } - m_lock_tuple= FALSE; - if (m_ops_pending) - { - /* - Take over any pending transactions to the - deleteing/updating transaction before closing the scan - */ - DBUG_PRINT("info", ("ops_pending: %ld", (long) m_ops_pending)); - if (execute_no_commit(this,trans,FALSE) != 0) { - no_uncommitted_rows_execute_failure(); - DBUG_RETURN(ndb_err(trans)); - } - m_ops_pending= 0; - } - - cursor->close(m_force_send, TRUE); - m_active_cursor= m_multi_cursor= NULL; - DBUG_RETURN(0); -} - -int ha_ndbcluster::rnd_end() -{ - DBUG_ENTER("rnd_end"); - DBUG_RETURN(close_scan()); -} - - -int ha_ndbcluster::rnd_next(uchar *buf) -{ - int rc; - DBUG_ENTER("rnd_next"); - MYSQL_READ_ROW_START(table_share->db.str, table_share->table_name.str, - TRUE); - ha_statistic_increment(&SSV::ha_read_rnd_next_count); - - if (!m_active_cursor) - rc= full_table_scan(buf); - else - rc= next_result(buf); - MYSQL_READ_ROW_DONE(rc); - DBUG_RETURN(rc); -} - - -/** - An "interesting" record has been found and it's pk - retrieved by calling position. Now it's time to read - the record from db once again. -*/ - -int ha_ndbcluster::rnd_pos(uchar *buf, uchar *pos) -{ - int rc; - DBUG_ENTER("rnd_pos"); - MYSQL_READ_ROW_START(table_share->db.str, table_share->table_name.str, - FALSE); - ha_statistic_increment(&SSV::ha_read_rnd_count); - // The primary key for the record is stored in pos - // Perform a pk_read using primary key "index" - { - part_id_range part_spec; - uint key_length= ref_length; - if (m_use_partition_function) - { - if (table_share->primary_key == MAX_KEY) - { - /* - The partition id has been fetched from ndb - and has been stored directly after the hidden key - */ - DBUG_DUMP("key+part", pos, key_length); - key_length= ref_length - sizeof(m_part_id); - part_spec.start_part= part_spec.end_part= *(uint32 *)(pos + key_length); - } - else - { - key_range key_spec; - KEY *key_info= table->key_info + table_share->primary_key; - key_spec.key= pos; - key_spec.length= key_length; - key_spec.flag= HA_READ_KEY_EXACT; - get_full_part_id_from_key(table, buf, key_info, - &key_spec, &part_spec); - DBUG_ASSERT(part_spec.start_part == part_spec.end_part); - } - DBUG_PRINT("info", ("partition id %u", part_spec.start_part)); - } - DBUG_DUMP("key", pos, key_length); - rc= pk_read(pos, key_length, buf, part_spec.start_part); - MYSQL_READ_ROW_DONE(rc); - DBUG_RETURN(rc); - } -} - - -/** - Store the primary key of this record in ref - variable, so that the row can be retrieved again later - using "reference" in rnd_pos. -*/ - -void ha_ndbcluster::position(const uchar *record) -{ - KEY *key_info; - KEY_PART_INFO *key_part; - KEY_PART_INFO *end; - uchar *buff; - uint key_length; - - DBUG_ENTER("position"); - - if (table_share->primary_key != MAX_KEY) - { - key_length= ref_length; - key_info= table->key_info + table_share->primary_key; - key_part= key_info->key_part; - end= key_part + key_info->user_defined_key_parts; - buff= ref; - - for (; key_part != end; key_part++) - { - if (key_part->null_bit) { - /* Store 0 if the key part is a NULL part */ - if (record[key_part->null_offset] - & key_part->null_bit) { - *buff++= 1; - continue; - } - *buff++= 0; - } - - size_t len = key_part->length; - const uchar * ptr = record + key_part->offset; - Field *field = key_part->field; - if (field->type() == MYSQL_TYPE_VARCHAR) - { - if (((Field_varstring*)field)->length_bytes == 1) - { - /** - * Keys always use 2 bytes length - */ - buff[0] = ptr[0]; - buff[1] = 0; - memcpy(buff+2, ptr + 1, len); - } - else - { - memcpy(buff, ptr, len + 2); - } - len += 2; - } - else - { - memcpy(buff, ptr, len); - } - buff += len; - } - } - else - { - // No primary key, get hidden key - DBUG_PRINT("info", ("Getting hidden key")); - // If table has user defined partition save the partition id as well - if(m_use_partition_function) - { - DBUG_PRINT("info", ("Saving partition id %u", m_part_id)); - key_length= ref_length - sizeof(m_part_id); - memcpy(ref+key_length, (void *)&m_part_id, sizeof(m_part_id)); - } - else - key_length= ref_length; -#ifndef DBUG_OFF - int hidden_no= table->s->fields; - const NDBTAB *tab= m_table; - const NDBCOL *hidden_col= tab->getColumn(hidden_no); - DBUG_ASSERT(hidden_col->getPrimaryKey() && - hidden_col->getAutoIncrement() && - key_length == NDB_HIDDEN_PRIMARY_KEY_LENGTH); -#endif - memcpy(ref, m_ref, key_length); - } -#ifndef DBUG_OFF - if (table_share->primary_key == MAX_KEY && m_use_partition_function) - DBUG_DUMP("key+part", ref, key_length+sizeof(m_part_id)); -#endif - DBUG_DUMP("ref", ref, key_length); - DBUG_VOID_RETURN; -} - - -int ha_ndbcluster::info(uint flag) -{ - int result= 0; - DBUG_ENTER("info"); - DBUG_PRINT("enter", ("flag: %d", flag)); - - if (flag & HA_STATUS_POS) - DBUG_PRINT("info", ("HA_STATUS_POS")); - if (flag & HA_STATUS_NO_LOCK) - DBUG_PRINT("info", ("HA_STATUS_NO_LOCK")); - if (flag & HA_STATUS_TIME) - DBUG_PRINT("info", ("HA_STATUS_TIME")); - if (flag & HA_STATUS_VARIABLE) - { - DBUG_PRINT("info", ("HA_STATUS_VARIABLE")); - if (m_table_info) - { - if (m_ha_not_exact_count) - stats.records= 100; - else - result= records_update(); - } - else - { - if ((my_errno= check_ndb_connection())) - DBUG_RETURN(my_errno); - Ndb *ndb= get_ndb(); - ndb->setDatabaseName(m_dbname); - struct Ndb_statistics stat; - if (ndb->setDatabaseName(m_dbname)) - { - DBUG_RETURN(my_errno= HA_ERR_OUT_OF_MEM); - } - if (THDVAR(current_thd, use_exact_count) && - (result= ndb_get_table_statistics(this, TRUE, ndb, m_table, &stat)) - == 0) - { - stats.mean_rec_length= stat.row_size; - stats.data_file_length= stat.fragment_memory; - stats.records= stat.row_count; - } - else - { - stats.mean_rec_length= 0; - stats.records= 100; - } - } - } - if (flag & HA_STATUS_CONST) - { - DBUG_PRINT("info", ("HA_STATUS_CONST")); - set_rec_per_key(); - } - if (flag & HA_STATUS_ERRKEY) - { - DBUG_PRINT("info", ("HA_STATUS_ERRKEY")); - errkey= m_dupkey; - } - if (flag & HA_STATUS_AUTO) - { - DBUG_PRINT("info", ("HA_STATUS_AUTO")); - if (m_table && table->found_next_number_field) - { - if ((my_errno= check_ndb_connection())) - DBUG_RETURN(my_errno); - Ndb *ndb= get_ndb(); - Ndb_tuple_id_range_guard g(m_share); - - Uint64 auto_increment_value64; - if (ndb->readAutoIncrementValue(m_table, g.range, - auto_increment_value64) == -1) - { - const NdbError err= ndb->getNdbError(); - sql_print_error("Error %lu in readAutoIncrementValue(): %s", - (ulong) err.code, err.message); - stats.auto_increment_value= ~(ulonglong)0; - } - else - stats.auto_increment_value= (ulonglong)auto_increment_value64; - } - } - - if(result == -1) - result= HA_ERR_NO_CONNECTION; - - DBUG_RETURN(result); -} - - -void ha_ndbcluster::get_dynamic_partition_info(PARTITION_STATS *stat_info, - uint part_id) -{ - /* - This functions should be fixed. Suggested fix: to - implement ndb function which retrives the statistics - about ndb partitions. - */ - bzero((char*) stat_info, sizeof(PARTITION_STATS)); - return; -} - - -int ha_ndbcluster::extra(enum ha_extra_function operation) -{ - DBUG_ENTER("extra"); - switch (operation) { - case HA_EXTRA_IGNORE_DUP_KEY: /* Dup keys don't rollback everything*/ - DBUG_PRINT("info", ("HA_EXTRA_IGNORE_DUP_KEY")); - DBUG_PRINT("info", ("Ignoring duplicate key")); - m_ignore_dup_key= TRUE; - break; - case HA_EXTRA_NO_IGNORE_DUP_KEY: - DBUG_PRINT("info", ("HA_EXTRA_NO_IGNORE_DUP_KEY")); - m_ignore_dup_key= FALSE; - break; - case HA_EXTRA_IGNORE_NO_KEY: - DBUG_PRINT("info", ("HA_EXTRA_IGNORE_NO_KEY")); - DBUG_PRINT("info", ("Turning on AO_IgnoreError at Commit/NoCommit")); - m_ignore_no_key= TRUE; - break; - case HA_EXTRA_NO_IGNORE_NO_KEY: - DBUG_PRINT("info", ("HA_EXTRA_NO_IGNORE_NO_KEY")); - DBUG_PRINT("info", ("Turning on AO_IgnoreError at Commit/NoCommit")); - m_ignore_no_key= FALSE; - break; - case HA_EXTRA_WRITE_CAN_REPLACE: - DBUG_PRINT("info", ("HA_EXTRA_WRITE_CAN_REPLACE")); - if (!m_has_unique_index || - current_thd->slave_thread) /* always set if slave, quick fix for bug 27378 */ - { - DBUG_PRINT("info", ("Turning ON use of write instead of insert")); - m_use_write= TRUE; - } - break; - case HA_EXTRA_WRITE_CANNOT_REPLACE: - DBUG_PRINT("info", ("HA_EXTRA_WRITE_CANNOT_REPLACE")); - DBUG_PRINT("info", ("Turning OFF use of write instead of insert")); - m_use_write= FALSE; - break; - case HA_EXTRA_DELETE_CANNOT_BATCH: - DBUG_PRINT("info", ("HA_EXTRA_DELETE_CANNOT_BATCH")); - m_delete_cannot_batch= TRUE; - break; - case HA_EXTRA_UPDATE_CANNOT_BATCH: - DBUG_PRINT("info", ("HA_EXTRA_UPDATE_CANNOT_BATCH")); - m_update_cannot_batch= TRUE; - break; - default: - break; - } - - DBUG_RETURN(0); -} - - -int ha_ndbcluster::reset() -{ - DBUG_ENTER("ha_ndbcluster::reset"); - if (m_cond) - { - m_cond->cond_clear(); - } - - /* - Regular partition pruning will set the bitmap appropriately. - Some queries like ALTER TABLE doesn't use partition pruning and - thus the 'used_partitions' bitmap needs to be initialized - */ - if (m_part_info) - bitmap_set_all(&m_part_info->used_partitions); - - /* reset flags set by extra calls */ - m_ignore_dup_key= FALSE; - m_use_write= FALSE; - m_ignore_no_key= FALSE; - m_delete_cannot_batch= FALSE; - m_update_cannot_batch= FALSE; - - DBUG_RETURN(0); -} - - -/** - Start of an insert, remember number of rows to be inserted, it will - be used in write_row and get_autoincrement to send an optimal number - of rows in each roundtrip to the server. - - @param - rows number of rows to insert, 0 if unknown -*/ - -void ha_ndbcluster::start_bulk_insert(ha_rows rows) -{ - int bytes, batch; - const NDBTAB *tab= m_table; - - DBUG_ENTER("start_bulk_insert"); - DBUG_PRINT("enter", ("rows: %d", (int)rows)); - - m_rows_inserted= (ha_rows) 0; - if (!m_use_write && m_ignore_dup_key) - { - /* - compare if expression with that in write_row - we have a situation where peek_indexed_rows() will be called - so we cannot batch - */ - DBUG_PRINT("info", ("Batching turned off as duplicate key is " - "ignored by using peek_row")); - m_rows_to_insert= 1; - m_bulk_insert_rows= 1; - DBUG_VOID_RETURN; - } - if (rows == (ha_rows) 0) - { - /* We don't know how many will be inserted, guess */ - m_rows_to_insert= m_autoincrement_prefetch; - } - else - m_rows_to_insert= rows; - - /* - Calculate how many rows that should be inserted - per roundtrip to NDB. This is done in order to minimize the - number of roundtrips as much as possible. However performance will - degrade if too many bytes are inserted, thus it's limited by this - calculation. - */ - const int bytesperbatch= 8192; - bytes= 12 + tab->getRowSizeInBytes() + 4 * tab->getNoOfColumns(); - batch= bytesperbatch/bytes; - batch= batch == 0 ? 1 : batch; - DBUG_PRINT("info", ("batch: %d, bytes: %d", batch, bytes)); - m_bulk_insert_rows= batch; - - DBUG_VOID_RETURN; -} - -/** - End of an insert. -*/ -int ha_ndbcluster::end_bulk_insert() -{ - int error= 0; - DBUG_ENTER("end_bulk_insert"); - - // Check if last inserts need to be flushed - if (m_bulk_insert_not_flushed) - { - NdbTransaction *trans= m_active_trans; - // Send rows to NDB - DBUG_PRINT("info", ("Sending inserts to NDB, "\ - "rows_inserted: %d bulk_insert_rows: %d", - (int) m_rows_inserted, (int) m_bulk_insert_rows)); - m_bulk_insert_not_flushed= FALSE; - if (m_transaction_on) - { - if (execute_no_commit(this, trans,FALSE) != 0) - { - no_uncommitted_rows_execute_failure(); - my_errno= error= ndb_err(trans); - } - } - else - { - if (execute_commit(this, trans) != 0) - { - no_uncommitted_rows_execute_failure(); - my_errno= error= ndb_err(trans); - } - else - { - int res __attribute__((unused))= trans->restart(); - DBUG_ASSERT(res == 0); - } - } - } - - m_rows_inserted= (ha_rows) 0; - m_rows_to_insert= (ha_rows) 1; - DBUG_RETURN(error); -} - - -int ha_ndbcluster::extra_opt(enum ha_extra_function operation, ulong cache_size) -{ - DBUG_ENTER("extra_opt"); - DBUG_PRINT("enter", ("cache_size: %lu", cache_size)); - DBUG_RETURN(extra(operation)); -} - -static const char *ha_ndbcluster_exts[] = { - ha_ndb_ext, - NullS -}; - -const char** ha_ndbcluster::bas_ext() const -{ - return ha_ndbcluster_exts; -} - -/** - How many seeks it will take to read through the table. - - This is to be comparable to the number returned by records_in_range so - that we can decide if we should scan the table or use keys. -*/ - -double ha_ndbcluster::scan_time() -{ - DBUG_ENTER("ha_ndbcluster::scan_time()"); - double res= rows2double(stats.records*1000); - DBUG_PRINT("exit", ("table: %s value: %f", - m_tabname, res)); - DBUG_RETURN(res); -} - -/* - Convert MySQL table locks into locks supported by Ndb Cluster. - Note that MySQL Cluster does currently not support distributed - table locks, so to be safe one should set cluster in Single - User Mode, before relying on table locks when updating tables - from several MySQL servers -*/ - -THR_LOCK_DATA **ha_ndbcluster::store_lock(THD *thd, - THR_LOCK_DATA **to, - enum thr_lock_type lock_type) -{ - DBUG_ENTER("store_lock"); - if (lock_type != TL_IGNORE && m_lock.type == TL_UNLOCK) - { - - /* If we are not doing a LOCK TABLE, then allow multiple - writers */ - - /* Since NDB does not currently have table locks - this is treated as a ordinary lock */ - - if ((lock_type >= TL_WRITE_CONCURRENT_INSERT && - lock_type <= TL_WRITE) && !thd->in_lock_tables) - lock_type= TL_WRITE_ALLOW_WRITE; - - /* In queries of type INSERT INTO t1 SELECT ... FROM t2 ... - MySQL would use the lock TL_READ_NO_INSERT on t2, and that - would conflict with TL_WRITE_ALLOW_WRITE, blocking all inserts - to t2. Convert the lock to a normal read lock to allow - concurrent inserts to t2. */ - - if (lock_type == TL_READ_NO_INSERT && !thd->in_lock_tables) - lock_type= TL_READ; - - m_lock.type=lock_type; - } - *to++= &m_lock; - - DBUG_PRINT("exit", ("lock_type: %d", lock_type)); - - DBUG_RETURN(to); -} - -#ifndef DBUG_OFF -#define PRINT_OPTION_FLAGS(t) { \ - if (t->variables.option_bits & OPTION_NOT_AUTOCOMMIT) \ - DBUG_PRINT("thd->variables.option_bits", ("OPTION_NOT_AUTOCOMMIT")); \ - if (t->variables.option_bits & OPTION_BEGIN) \ - DBUG_PRINT("thd->variables.option_bits", ("OPTION_BEGIN")); \ - if (t->variables.option_bits & OPTION_TABLE_LOCK) \ - DBUG_PRINT("thd->variables.option_bits", ("OPTION_TABLE_LOCK")); \ -} -#else -#define PRINT_OPTION_FLAGS(t) -#endif - - -/* - As MySQL will execute an external lock for every new table it uses - we can use this to start the transactions. - If we are in auto_commit mode we just need to start a transaction - for the statement, this will be stored in thd_ndb.stmt. - If not, we have to start a master transaction if there doesn't exist - one from before, this will be stored in thd_ndb.all - - When a table lock is held one transaction will be started which holds - the table lock and for each statement a hupp transaction will be started - If we are locking the table then: - - save the NdbDictionary::Table for easy access - - save reference to table statistics - - refresh list of the indexes for the table if needed (if altered) - */ - -#ifdef HAVE_NDB_BINLOG -extern Master_info *active_mi; -static int ndbcluster_update_apply_status(THD *thd, int do_update) -{ - Thd_ndb *thd_ndb= get_thd_ndb(thd); - Ndb *ndb= thd_ndb->ndb; - NDBDICT *dict= ndb->getDictionary(); - const NDBTAB *ndbtab; - NdbTransaction *trans= thd_ndb->trans; - ndb->setDatabaseName(NDB_REP_DB); - Ndb_table_guard ndbtab_g(dict, NDB_APPLY_TABLE); - if (!(ndbtab= ndbtab_g.get_table())) - { - return -1; - } - NdbOperation *op= 0; - int r= 0; - r|= (op= trans->getNdbOperation(ndbtab)) == 0; - DBUG_ASSERT(r == 0); - if (do_update) - r|= op->updateTuple(); - else - r|= op->writeTuple(); - DBUG_ASSERT(r == 0); - // server_id - r|= op->equal(0u, (Uint32)thd->server_id); - DBUG_ASSERT(r == 0); - if (!do_update) - { - // epoch - r|= op->setValue(1u, (Uint64)0); - DBUG_ASSERT(r == 0); - } - // log_name - char tmp_buf[FN_REFLEN]; - ndb_pack_varchar(ndbtab->getColumn(2u), tmp_buf, - active_mi->rli.group_master_log_name, - strlen(active_mi->rli.group_master_log_name)); - r|= op->setValue(2u, tmp_buf); - DBUG_ASSERT(r == 0); - // start_pos - r|= op->setValue(3u, (Uint64)active_mi->rli.group_master_log_pos); - DBUG_ASSERT(r == 0); - // end_pos - r|= op->setValue(4u, (Uint64)active_mi->rli.group_master_log_pos + - ((Uint64)active_mi->rli.future_event_relay_log_pos - - (Uint64)active_mi->rli.group_relay_log_pos)); - DBUG_ASSERT(r == 0); - return 0; -} -#endif /* HAVE_NDB_BINLOG */ - -void ha_ndbcluster::transaction_checks(THD *thd) -{ - if (thd->lex->sql_command == SQLCOM_LOAD) - { - m_transaction_on= FALSE; - /* Would be simpler if has_transactions() didn't always say "yes" */ - thd->transaction.all.modified_non_trans_table= - thd->transaction.stmt.modified_non_trans_table= TRUE; - } - else if (!thd->transaction.on) - m_transaction_on= FALSE; - else - m_transaction_on= THDVAR(thd, use_transactions); -} - -int ha_ndbcluster::start_statement(THD *thd, - Thd_ndb *thd_ndb, - Ndb *ndb) -{ - DBUG_ENTER("ha_ndbcluster::start_statement"); - PRINT_OPTION_FLAGS(thd); - - trans_register_ha(thd, FALSE, ndbcluster_hton); - if (!thd_ndb->trans) - { - if (thd->in_multi_stmt_transaction_mode()) - trans_register_ha(thd, TRUE, ndbcluster_hton); - DBUG_PRINT("trans",("Starting transaction")); - thd_ndb->trans= ndb->startTransaction(); - if (thd_ndb->trans == NULL) - ERR_RETURN(ndb->getNdbError()); - thd_ndb->init_open_tables(); - thd_ndb->query_state&= NDB_QUERY_NORMAL; - thd_ndb->trans_options= 0; - thd_ndb->m_slow_path= FALSE; - if (!(thd->variables.option_bits & OPTION_BIN_LOG) || - thd->variables.binlog_format == BINLOG_FORMAT_STMT) - { - thd_ndb->trans_options|= TNTO_NO_LOGGING; - thd_ndb->m_slow_path= TRUE; - } - else if (thd->slave_thread) - thd_ndb->m_slow_path= TRUE; - } - /* - If this is the start of a LOCK TABLE, a table look - should be taken on the table in NDB - - Check if it should be read or write lock - */ - if (thd->variables.option_bits & OPTION_TABLE_LOCK) - { - //lockThisTable(); - DBUG_PRINT("info", ("Locking the table..." )); - } - DBUG_RETURN(0); -} - -int ha_ndbcluster::init_handler_for_statement(THD *thd, Thd_ndb *thd_ndb) -{ - /* - This is the place to make sure this handler instance - has a started transaction. - - The transaction is started by the first handler on which - MySQL Server calls external lock - - Other handlers in the same stmt or transaction should use - the same NDB transaction. This is done by setting up the m_active_trans - pointer to point to the NDB transaction. - */ - - DBUG_ENTER("ha_ndbcluster::init_handler_for_statement"); - // store thread specific data first to set the right context - m_force_send= THDVAR(thd, force_send); - m_ha_not_exact_count= !THDVAR(thd, use_exact_count); - m_autoincrement_prefetch= - (THDVAR(thd, autoincrement_prefetch_sz) > - DEFAULT_AUTO_PREFETCH) ? - (ha_rows) THDVAR(thd, autoincrement_prefetch_sz) - : (ha_rows) DEFAULT_AUTO_PREFETCH; - m_active_trans= thd_ndb->trans; - DBUG_ASSERT(m_active_trans); - // Start of transaction - m_rows_changed= 0; - m_ops_pending= 0; - m_slow_path= thd_ndb->m_slow_path; -#ifdef HAVE_NDB_BINLOG - if (unlikely(m_slow_path)) - { - if (m_share == ndb_apply_status_share && thd->slave_thread) - thd_ndb->trans_options|= TNTO_INJECTED_APPLY_STATUS; - } -#endif - - if (thd->in_multi_stmt_transaction_mode()) - { - const void *key= m_table; - HASH_SEARCH_STATE state; - THD_NDB_SHARE *thd_ndb_share= - (THD_NDB_SHARE*)my_hash_first(&thd_ndb->open_tables, (uchar *)&key, sizeof(key), &state); - while (thd_ndb_share && thd_ndb_share->key != key) - thd_ndb_share= (THD_NDB_SHARE*)my_hash_next(&thd_ndb->open_tables, (uchar *)&key, sizeof(key), &state); - if (thd_ndb_share == 0) - { - thd_ndb_share= (THD_NDB_SHARE *) alloc_root(&thd->transaction.mem_root, - sizeof(THD_NDB_SHARE)); - if (!thd_ndb_share) - { - mem_alloc_error(sizeof(THD_NDB_SHARE)); - DBUG_RETURN(1); - } - thd_ndb_share->key= key; - thd_ndb_share->stat.last_count= thd_ndb->count; - thd_ndb_share->stat.no_uncommitted_rows_count= 0; - thd_ndb_share->stat.records= ~(ha_rows)0; - my_hash_insert(&thd_ndb->open_tables, (uchar *)thd_ndb_share); - } - else if (thd_ndb_share->stat.last_count != thd_ndb->count) - { - thd_ndb_share->stat.last_count= thd_ndb->count; - thd_ndb_share->stat.no_uncommitted_rows_count= 0; - thd_ndb_share->stat.records= ~(ha_rows)0; - } - DBUG_PRINT("exit", ("thd_ndb_share: 0x%lx key: 0x%lx", - (long) thd_ndb_share, (long) key)); - m_table_info= &thd_ndb_share->stat; - } - else - { - struct Ndb_local_table_statistics &stat= m_table_info_instance; - stat.last_count= thd_ndb->count; - stat.no_uncommitted_rows_count= 0; - stat.records= ~(ha_rows)0; - m_table_info= &stat; - } - DBUG_RETURN(0); -} - -int ha_ndbcluster::external_lock(THD *thd, int lock_type) -{ - int error=0; - DBUG_ENTER("external_lock"); - - /* - Check that this handler instance has a connection - set up to the Ndb object of thd - */ - if (check_ndb_connection(thd)) - DBUG_RETURN(1); - - Thd_ndb *thd_ndb= get_thd_ndb(thd); - Ndb *ndb= thd_ndb->ndb; - - DBUG_PRINT("enter", ("this: 0x%lx thd: 0x%lx thd_ndb: 0x%lx " - "thd_ndb->lock_count: %d", - (long) this, (long) thd, (long) thd_ndb, - thd_ndb->lock_count)); - - if (lock_type != F_UNLCK) - { - DBUG_PRINT("info", ("lock_type != F_UNLCK")); - transaction_checks(thd); - if (!thd_ndb->lock_count++) - { - if ((error= start_statement(thd, thd_ndb, ndb))) - goto error; - } - if ((error= init_handler_for_statement(thd, thd_ndb))) - goto error; - DBUG_RETURN(0); - } - else - { - DBUG_PRINT("info", ("lock_type == F_UNLCK")); - - if (opt_ndb_cache_check_time && m_rows_changed) - { - DBUG_PRINT("info", ("Rows has changed and util thread is running")); - if (thd->in_multi_stmt_transaction_mode()) - { - DBUG_PRINT("info", ("Add share to list of tables to be invalidated")); - /* NOTE push_back allocates memory using transactions mem_root! */ - thd_ndb->changed_tables.push_back(m_share, &thd->transaction.mem_root); - } - - mysql_mutex_lock(&m_share->mutex); - DBUG_PRINT("info", ("Invalidating commit_count")); - m_share->commit_count= 0; - m_share->commit_count_lock++; - mysql_mutex_unlock(&m_share->mutex); - } - - if (!--thd_ndb->lock_count) - { - DBUG_PRINT("trans", ("Last external_lock")); - PRINT_OPTION_FLAGS(thd); - - if (!thd->in_multi_stmt_transaction_mode()) - { - if (thd_ndb->trans) - { - /* - Unlock is done without a transaction commit / rollback. - This happens if the thread didn't update any rows - We must in this case close the transaction to release resources - */ - DBUG_PRINT("trans",("ending non-updating transaction")); - ndb->closeTransaction(thd_ndb->trans); - thd_ndb->trans= NULL; - } - } - } - m_table_info= NULL; - - /* - This is the place to make sure this handler instance - no longer are connected to the active transaction. - - And since the handler is no longer part of the transaction - it can't have open cursors, ops or blobs pending. - */ - m_active_trans= NULL; - - if (m_active_cursor) - DBUG_PRINT("warning", ("m_active_cursor != NULL")); - m_active_cursor= NULL; - - if (m_multi_cursor) - DBUG_PRINT("warning", ("m_multi_cursor != NULL")); - m_multi_cursor= NULL; - - if (m_blobs_pending) - DBUG_PRINT("warning", ("blobs_pending != 0")); - m_blobs_pending= 0; - - if (m_ops_pending) - DBUG_PRINT("warning", ("ops_pending != 0L")); - m_ops_pending= 0; - DBUG_RETURN(0); - } -error: - thd_ndb->lock_count--; - DBUG_RETURN(error); -} - -/** - Unlock the last row read in an open scan. - Rows are unlocked by default in ndb, but - for SELECT FOR UPDATE and SELECT LOCK WIT SHARE MODE - locks are kept if unlock_row() is not called. -*/ - -void ha_ndbcluster::unlock_row() -{ - DBUG_ENTER("unlock_row"); - - DBUG_PRINT("info", ("Unlocking row")); - m_lock_tuple= FALSE; - DBUG_VOID_RETURN; -} - -/** - Start a transaction for running a statement if one is not - already running in a transaction. This will be the case in - a BEGIN; COMMIT; block - When using LOCK TABLE's external_lock will start a transaction - since ndb does not currently does not support table locking. -*/ - -int ha_ndbcluster::start_stmt(THD *thd, thr_lock_type lock_type) -{ - int error=0; - DBUG_ENTER("start_stmt"); - - Thd_ndb *thd_ndb= get_thd_ndb(thd); - transaction_checks(thd); - if (!thd_ndb->start_stmt_count++) - { - Ndb *ndb= thd_ndb->ndb; - if ((error= start_statement(thd, thd_ndb, ndb))) - goto error; - } - if ((error= init_handler_for_statement(thd, thd_ndb))) - goto error; - DBUG_RETURN(0); -error: - thd_ndb->start_stmt_count--; - DBUG_RETURN(error); -} - - -/** - Commit a transaction started in NDB. -*/ - -static int ndbcluster_commit(handlerton *hton, THD *thd, bool all) -{ - int res= 0; - Thd_ndb *thd_ndb= get_thd_ndb(thd); - Ndb *ndb= thd_ndb->ndb; - NdbTransaction *trans= thd_ndb->trans; - - DBUG_ENTER("ndbcluster_commit"); - DBUG_ASSERT(ndb); - PRINT_OPTION_FLAGS(thd); - DBUG_PRINT("enter", ("Commit %s", (all ? "all" : "stmt"))); - thd_ndb->start_stmt_count= 0; - if (trans == NULL || (!all && thd->in_multi_stmt_transaction_mode())) - { - /* - An odditity in the handler interface is that commit on handlerton - is called to indicate end of statement only in cases where - autocommit isn't used and the all flag isn't set. - - We also leave quickly when a transaction haven't even been started, - in this case we are safe that no clean up is needed. In this case - the MySQL Server could handle the query without contacting the - NDB kernel. - */ - DBUG_PRINT("info", ("Commit before start or end-of-statement only")); - DBUG_RETURN(0); - } - -#ifdef HAVE_NDB_BINLOG - if (unlikely(thd_ndb->m_slow_path)) - { - if (thd->slave_thread) - ndbcluster_update_apply_status - (thd, thd_ndb->trans_options & TNTO_INJECTED_APPLY_STATUS); - } -#endif /* HAVE_NDB_BINLOG */ - - if (execute_commit(thd,trans) != 0) - { - const NdbError err= trans->getNdbError(); - const NdbOperation *error_op= trans->getNdbErrorOperation(); - set_ndb_err(thd, err); - res= ndb_to_mysql_error(&err); - if (res != -1) - ndbcluster_print_error(res, error_op); - } - ndb->closeTransaction(trans); - thd_ndb->trans= NULL; - - /* Clear commit_count for tables changed by transaction */ - NDB_SHARE* share; - List_iterator_fast<NDB_SHARE> it(thd_ndb->changed_tables); - while ((share= it++)) - { - mysql_mutex_lock(&share->mutex); - DBUG_PRINT("info", ("Invalidate commit_count for %s, share->commit_count: %lu", - share->table_name, (ulong) share->commit_count)); - share->commit_count= 0; - share->commit_count_lock++; - mysql_mutex_unlock(&share->mutex); - } - thd_ndb->changed_tables.empty(); - - DBUG_RETURN(res); -} - - -/** - Rollback a transaction started in NDB. -*/ - -static int ndbcluster_rollback(handlerton *hton, THD *thd, bool all) -{ - int res= 0; - Thd_ndb *thd_ndb= get_thd_ndb(thd); - Ndb *ndb= thd_ndb->ndb; - NdbTransaction *trans= thd_ndb->trans; - - DBUG_ENTER("ndbcluster_rollback"); - DBUG_ASSERT(ndb); - thd_ndb->start_stmt_count= 0; - if (trans == NULL || (!all && - thd->in_multi_stmt_transaction_mode())) - { - /* Ignore end-of-statement until real rollback or commit is called */ - DBUG_PRINT("info", ("Rollback before start or end-of-statement only")); - DBUG_RETURN(0); - } - - if (trans->execute(NdbTransaction::Rollback) != 0) - { - const NdbError err= trans->getNdbError(); - const NdbOperation *error_op= trans->getNdbErrorOperation(); - set_ndb_err(thd, err); - res= ndb_to_mysql_error(&err); - if (res != -1) - ndbcluster_print_error(res, error_op); - } - ndb->closeTransaction(trans); - thd_ndb->trans= NULL; - - /* Clear list of tables changed by transaction */ - thd_ndb->changed_tables.empty(); - - DBUG_RETURN(res); -} - - -/** - Define NDB column based on Field. - - Not member of ha_ndbcluster because NDBCOL cannot be declared. - - MySQL text types with character set "binary" are mapped to true - NDB binary types without a character set. This may change. - - @return - Returns 0 or mysql error code. -*/ - -static int create_ndb_column(NDBCOL &col, - Field *field, - HA_CREATE_INFO *info) -{ - // Set name - if (col.setName(field->field_name)) - { - return (my_errno= errno); - } - // Get char set - CHARSET_INFO *cs= field->charset(); - // Set type and sizes - const enum enum_field_types mysql_type= field->real_type(); - switch (mysql_type) { - // Numeric types - case MYSQL_TYPE_TINY: - if (field->flags & UNSIGNED_FLAG) - col.setType(NDBCOL::Tinyunsigned); - else - col.setType(NDBCOL::Tinyint); - col.setLength(1); - break; - case MYSQL_TYPE_SHORT: - if (field->flags & UNSIGNED_FLAG) - col.setType(NDBCOL::Smallunsigned); - else - col.setType(NDBCOL::Smallint); - col.setLength(1); - break; - case MYSQL_TYPE_LONG: - if (field->flags & UNSIGNED_FLAG) - col.setType(NDBCOL::Unsigned); - else - col.setType(NDBCOL::Int); - col.setLength(1); - break; - case MYSQL_TYPE_INT24: - if (field->flags & UNSIGNED_FLAG) - col.setType(NDBCOL::Mediumunsigned); - else - col.setType(NDBCOL::Mediumint); - col.setLength(1); - break; - case MYSQL_TYPE_LONGLONG: - if (field->flags & UNSIGNED_FLAG) - col.setType(NDBCOL::Bigunsigned); - else - col.setType(NDBCOL::Bigint); - col.setLength(1); - break; - case MYSQL_TYPE_FLOAT: - col.setType(NDBCOL::Float); - col.setLength(1); - break; - case MYSQL_TYPE_DOUBLE: - col.setType(NDBCOL::Double); - col.setLength(1); - break; - case MYSQL_TYPE_DECIMAL: - { - Field_decimal *f= (Field_decimal*)field; - uint precision= f->pack_length(); - uint scale= f->decimals(); - if (field->flags & UNSIGNED_FLAG) - { - col.setType(NDBCOL::Olddecimalunsigned); - precision-= (scale > 0); - } - else - { - col.setType(NDBCOL::Olddecimal); - precision-= 1 + (scale > 0); - } - col.setPrecision(precision); - col.setScale(scale); - col.setLength(1); - } - break; - case MYSQL_TYPE_NEWDECIMAL: - { - Field_new_decimal *f= (Field_new_decimal*)field; - uint precision= f->precision; - uint scale= f->decimals(); - if (field->flags & UNSIGNED_FLAG) - { - col.setType(NDBCOL::Decimalunsigned); - } - else - { - col.setType(NDBCOL::Decimal); - } - col.setPrecision(precision); - col.setScale(scale); - col.setLength(1); - } - break; - // Date types - case MYSQL_TYPE_DATETIME: - col.setType(NDBCOL::Datetime); - col.setLength(1); - break; - case MYSQL_TYPE_DATE: // ? - col.setType(NDBCOL::Char); - col.setLength(field->pack_length()); - break; - case MYSQL_TYPE_NEWDATE: - col.setType(NDBCOL::Date); - col.setLength(1); - break; - case MYSQL_TYPE_TIME: - col.setType(NDBCOL::Time); - col.setLength(1); - break; - case MYSQL_TYPE_YEAR: - col.setType(NDBCOL::Year); - col.setLength(1); - break; - case MYSQL_TYPE_TIMESTAMP: - col.setType(NDBCOL::Timestamp); - col.setLength(1); - break; - // Char types - case MYSQL_TYPE_STRING: - if (field->pack_length() == 0) - { - col.setType(NDBCOL::Bit); - col.setLength(1); - } - else if ((field->flags & BINARY_FLAG) && cs == &my_charset_bin) - { - col.setType(NDBCOL::Binary); - col.setLength(field->pack_length()); - } - else - { - col.setType(NDBCOL::Char); - col.setCharset(cs); - col.setLength(field->pack_length()); - } - break; - case MYSQL_TYPE_VAR_STRING: // ? - case MYSQL_TYPE_VARCHAR: - { - Field_varstring* f= (Field_varstring*)field; - if (f->length_bytes == 1) - { - if ((field->flags & BINARY_FLAG) && cs == &my_charset_bin) - col.setType(NDBCOL::Varbinary); - else { - col.setType(NDBCOL::Varchar); - col.setCharset(cs); - } - } - else if (f->length_bytes == 2) - { - if ((field->flags & BINARY_FLAG) && cs == &my_charset_bin) - col.setType(NDBCOL::Longvarbinary); - else { - col.setType(NDBCOL::Longvarchar); - col.setCharset(cs); - } - } - else - { - return HA_ERR_UNSUPPORTED; - } - col.setLength(field->field_length); - } - break; - // Blob types (all come in as MYSQL_TYPE_BLOB) - mysql_type_tiny_blob: - case MYSQL_TYPE_TINY_BLOB: - if ((field->flags & BINARY_FLAG) && cs == &my_charset_bin) - col.setType(NDBCOL::Blob); - else { - col.setType(NDBCOL::Text); - col.setCharset(cs); - } - col.setInlineSize(256); - // No parts - col.setPartSize(0); - col.setStripeSize(0); - break; - //mysql_type_blob: - case MYSQL_TYPE_GEOMETRY: - case MYSQL_TYPE_BLOB: - if ((field->flags & BINARY_FLAG) && cs == &my_charset_bin) - col.setType(NDBCOL::Blob); - else { - col.setType(NDBCOL::Text); - col.setCharset(cs); - } - { - Field_blob *field_blob= (Field_blob *)field; - /* - * max_data_length is 2^8-1, 2^16-1, 2^24-1 for tiny, blob, medium. - * Tinyblob gets no blob parts. The other cases are just a crude - * way to control part size and striping. - * - * In mysql blob(256) is promoted to blob(65535) so it does not - * in fact fit "inline" in NDB. - */ - if (field_blob->max_data_length() < (1 << 8)) - goto mysql_type_tiny_blob; - else if (field_blob->max_data_length() < (1 << 16)) - { - col.setInlineSize(256); - col.setPartSize(2000); - col.setStripeSize(16); - } - else if (field_blob->max_data_length() < (1 << 24)) - goto mysql_type_medium_blob; - else - goto mysql_type_long_blob; - } - break; - mysql_type_medium_blob: - case MYSQL_TYPE_MEDIUM_BLOB: - if ((field->flags & BINARY_FLAG) && cs == &my_charset_bin) - col.setType(NDBCOL::Blob); - else { - col.setType(NDBCOL::Text); - col.setCharset(cs); - } - col.setInlineSize(256); - col.setPartSize(4000); - col.setStripeSize(8); - break; - mysql_type_long_blob: - case MYSQL_TYPE_LONG_BLOB: - if ((field->flags & BINARY_FLAG) && cs == &my_charset_bin) - col.setType(NDBCOL::Blob); - else { - col.setType(NDBCOL::Text); - col.setCharset(cs); - } - col.setInlineSize(256); - col.setPartSize(8000); - col.setStripeSize(4); - break; - // Other types - case MYSQL_TYPE_ENUM: - col.setType(NDBCOL::Char); - col.setLength(field->pack_length()); - break; - case MYSQL_TYPE_SET: - col.setType(NDBCOL::Char); - col.setLength(field->pack_length()); - break; - case MYSQL_TYPE_BIT: - { - int no_of_bits= field->field_length; - col.setType(NDBCOL::Bit); - if (!no_of_bits) - col.setLength(1); - else - col.setLength(no_of_bits); - break; - } - case MYSQL_TYPE_NULL: - goto mysql_type_unsupported; - mysql_type_unsupported: - default: - return HA_ERR_UNSUPPORTED; - } - // Set nullable and pk - col.setNullable(field->maybe_null()); - col.setPrimaryKey(field->flags & PRI_KEY_FLAG); - // Set autoincrement - if (field->flags & AUTO_INCREMENT_FLAG) - { -#ifndef DBUG_OFF - char buff[22]; -#endif - col.setAutoIncrement(TRUE); - ulonglong value= info->auto_increment_value ? - info->auto_increment_value : (ulonglong) 1; - DBUG_PRINT("info", ("Autoincrement key, initial: %s", llstr(value, buff))); - col.setAutoIncrementInitialValue(value); - } - else - col.setAutoIncrement(FALSE); - return 0; -} - -/** - Create a table in NDB Cluster -*/ - -int ha_ndbcluster::create(const char *name, - TABLE *form, - HA_CREATE_INFO *create_info) -{ - THD *thd= current_thd; - NDBTAB tab; - NDBCOL col; - size_t pack_length, length; - uint i, pk_length= 0; - uchar *data= NULL, *pack_data= NULL; - bool create_from_engine= (create_info->table_options & HA_OPTION_CREATE_FROM_ENGINE); - bool is_truncate= (thd->lex->sql_command == SQLCOM_TRUNCATE); - char tablespace[FN_LEN + 1]; - NdbDictionary::Table::SingleUserMode single_user_mode= NdbDictionary::Table::SingleUserModeLocked; - - DBUG_ENTER("ha_ndbcluster::create"); - DBUG_PRINT("enter", ("name: %s", name)); - - DBUG_ASSERT(*fn_rext((char*)name) == 0); - set_dbname(name); - set_tabname(name); - - if ((my_errno= check_ndb_connection())) - DBUG_RETURN(my_errno); - - Ndb *ndb= get_ndb(); - NDBDICT *dict= ndb->getDictionary(); - - if (is_truncate) - { - { - Ndb_table_guard ndbtab_g(dict, m_tabname); - if (!(m_table= ndbtab_g.get_table())) - ERR_RETURN(dict->getNdbError()); - if ((get_tablespace_name(thd, tablespace, FN_LEN))) - create_info->tablespace= tablespace; - m_table= NULL; - } - DBUG_PRINT("info", ("Dropping and re-creating table for TRUNCATE")); - if ((my_errno= delete_table(name))) - DBUG_RETURN(my_errno); - } - table= form; - if (create_from_engine) - { - /* - Table already exists in NDB and frm file has been created by - caller. - Do Ndb specific stuff, such as create a .ndb file - */ - if ((my_errno= write_ndb_file(name))) - DBUG_RETURN(my_errno); -#ifdef HAVE_NDB_BINLOG - ndbcluster_create_binlog_setup(get_ndb(), name, strlen(name), - m_dbname, m_tabname, FALSE); -#endif /* HAVE_NDB_BINLOG */ - DBUG_RETURN(my_errno); - } - -#ifdef HAVE_NDB_BINLOG - /* - Don't allow table creation unless - schema distribution table is setup - ( unless it is a creation of the schema dist table itself ) - */ - if (!ndb_schema_share) - { - if (!(strcmp(m_dbname, NDB_REP_DB) == 0 && - strcmp(m_tabname, NDB_SCHEMA_TABLE) == 0)) - { - DBUG_PRINT("info", ("Schema distribution table not setup")); - DBUG_ASSERT(ndb_schema_share); - DBUG_RETURN(HA_ERR_NO_CONNECTION); - } - single_user_mode = NdbDictionary::Table::SingleUserModeReadWrite; - } -#endif /* HAVE_NDB_BINLOG */ - - DBUG_PRINT("table", ("name: %s", m_tabname)); - if (tab.setName(m_tabname)) - { - DBUG_RETURN(my_errno= errno); - } - tab.setLogging(!(create_info->options & HA_LEX_CREATE_TMP_TABLE)); - tab.setSingleUserMode(single_user_mode); - - // Save frm data for this table - if (readfrm(name, &data, &length)) - DBUG_RETURN(1); - if (packfrm(data, length, &pack_data, &pack_length)) - { - my_free(data); - DBUG_RETURN(2); - } - DBUG_PRINT("info", - ("setFrm data: 0x%lx len: %lu", (long) pack_data, - (ulong) pack_length)); - tab.setFrm(pack_data, pack_length); - my_free(data); - my_free(pack_data); - - /* - Check for disk options - */ - if (create_info->storage_media == HA_SM_DISK) - { - if (create_info->tablespace) - tab.setTablespaceName(create_info->tablespace); - else - tab.setTablespaceName("DEFAULT-TS"); - } - else if (create_info->tablespace) - { - if (create_info->storage_media == HA_SM_MEMORY) - { - push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, - ER_ILLEGAL_HA_CREATE_OPTION, - ER(ER_ILLEGAL_HA_CREATE_OPTION), - ndbcluster_hton_name, - "TABLESPACE currently only supported for " - "STORAGE DISK"); - DBUG_RETURN(HA_ERR_UNSUPPORTED); - } - tab.setTablespaceName(create_info->tablespace); - create_info->storage_media = HA_SM_DISK; //if use tablespace, that also means store on disk - } - - /* - Handle table row type - - Default is to let table rows have var part reference so that online - add column can be performed in the future. Explicitly setting row - type to fixed will omit var part reference, which will save data - memory in ndb, but at the cost of not being able to online add - column to this table - */ - switch (create_info->row_type) { - case ROW_TYPE_FIXED: - tab.setForceVarPart(FALSE); - break; - case ROW_TYPE_DYNAMIC: - /* fall through, treat as default */ - default: - /* fall through, treat as default */ - case ROW_TYPE_DEFAULT: - tab.setForceVarPart(TRUE); - break; - } - - /* - Setup columns - */ - for (i= 0; i < form->s->fields; i++) - { - Field *field= form->field[i]; - DBUG_PRINT("info", ("name: %s type: %u pack_length: %d", - field->field_name, field->real_type(), - field->pack_length())); - if ((my_errno= create_ndb_column(col, field, create_info))) - DBUG_RETURN(my_errno); - - if (create_info->storage_media == HA_SM_DISK) - col.setStorageType(NdbDictionary::Column::StorageTypeDisk); - else - col.setStorageType(NdbDictionary::Column::StorageTypeMemory); - - switch (create_info->row_type) { - case ROW_TYPE_FIXED: - if (field_type_forces_var_part(field->type())) - { - push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, - ER_ILLEGAL_HA_CREATE_OPTION, - ER(ER_ILLEGAL_HA_CREATE_OPTION), - ndbcluster_hton_name, - "Row format FIXED incompatible with " - "variable sized attribute"); - DBUG_RETURN(HA_ERR_UNSUPPORTED); - } - break; - case ROW_TYPE_DYNAMIC: - /* - Future: make columns dynamic in this case - */ - break; - default: - break; - } - if (tab.addColumn(col)) - { - DBUG_RETURN(my_errno= errno); - } - if (col.getPrimaryKey()) - pk_length += (field->pack_length() + 3) / 4; - } - - KEY* key_info; - for (i= 0, key_info= form->key_info; i < form->s->keys; i++, key_info++) - { - KEY_PART_INFO *key_part= key_info->key_part; - KEY_PART_INFO *end= key_part + key_info->user_defined_key_parts; - for (; key_part != end; key_part++) - tab.getColumn(key_part->fieldnr-1)->setStorageType( - NdbDictionary::Column::StorageTypeMemory); - } - - // No primary key, create shadow key as 64 bit, auto increment - if (form->s->primary_key == MAX_KEY) - { - DBUG_PRINT("info", ("Generating shadow key")); - if (col.setName("$PK")) - { - DBUG_RETURN(my_errno= errno); - } - col.setType(NdbDictionary::Column::Bigunsigned); - col.setLength(1); - col.setNullable(FALSE); - col.setPrimaryKey(TRUE); - col.setAutoIncrement(TRUE); - if (tab.addColumn(col)) - { - DBUG_RETURN(my_errno= errno); - } - pk_length += 2; - } - - // Make sure that blob tables don't have to big part size - for (i= 0; i < form->s->fields; i++) - { - /** - * The extra +7 concists - * 2 - words from pk in blob table - * 5 - from extra words added by tup/dict?? - */ - switch (form->field[i]->real_type()) { - case MYSQL_TYPE_GEOMETRY: - case MYSQL_TYPE_BLOB: - case MYSQL_TYPE_MEDIUM_BLOB: - case MYSQL_TYPE_LONG_BLOB: - { - NdbDictionary::Column * column= tab.getColumn(i); - int size= pk_length + (column->getPartSize()+3)/4 + 7; - if (size > NDB_MAX_TUPLE_SIZE_IN_WORDS && - (pk_length+7) < NDB_MAX_TUPLE_SIZE_IN_WORDS) - { - size= NDB_MAX_TUPLE_SIZE_IN_WORDS - pk_length - 7; - column->setPartSize(4*size); - } - /** - * If size > NDB_MAX and pk_length+7 >= NDB_MAX - * then the table can't be created anyway, so skip - * changing part size, and have error later - */ - } - default: - break; - } - } - - // Check partition info - partition_info *part_info= form->part_info; - if ((my_errno= set_up_partition_info(part_info, form, (void*)&tab))) - { - DBUG_RETURN(my_errno); - } - - // Create the table in NDB - if (dict->createTable(tab) != 0) - { - const NdbError err= dict->getNdbError(); - set_ndb_err(thd, err); - my_errno= ndb_to_mysql_error(&err); - DBUG_RETURN(my_errno); - } - - Ndb_table_guard ndbtab_g(dict, m_tabname); - // temporary set m_table during create - // reset at return - m_table= ndbtab_g.get_table(); - // TODO check also that we have the same frm... - if (!m_table) - { - /* purecov: begin deadcode */ - const NdbError err= dict->getNdbError(); - set_ndb_err(thd, err); - my_errno= ndb_to_mysql_error(&err); - DBUG_RETURN(my_errno); - /* purecov: end */ - } - - DBUG_PRINT("info", ("Table %s/%s created successfully", - m_dbname, m_tabname)); - - // Create secondary indexes - my_errno= create_indexes(ndb, form); - - if (!my_errno) - my_errno= write_ndb_file(name); - else - { - /* - Failed to create an index, - drop the table (and all it's indexes) - */ - while (dict->dropTableGlobal(*m_table)) - { - switch (dict->getNdbError().status) - { - case NdbError::TemporaryError: - if (!thd->killed) - continue; // retry indefinitly - break; - default: - break; - } - break; - } - m_table = 0; - DBUG_RETURN(my_errno); - } - -#ifdef HAVE_NDB_BINLOG - if (!my_errno) - { - NDB_SHARE *share= 0; - mysql_mutex_lock(&ndbcluster_mutex); - /* - First make sure we get a "fresh" share here, not an old trailing one... - */ - { - uint length= (uint) strlen(name); - if ((share= (NDB_SHARE*) my_hash_search(&ndbcluster_open_tables, - (uchar*) name, length))) - handle_trailing_share(share); - } - /* - get a new share - */ - - /* ndb_share reference create */ - if (!(share= get_share(name, form, TRUE, TRUE))) - { - sql_print_error("NDB: allocating table share for %s failed", name); - /* my_errno is set */ - } - else - { - DBUG_PRINT("NDB_SHARE", ("%s binlog create use_count: %u", - share->key, share->use_count)); - } - mysql_mutex_unlock(&ndbcluster_mutex); - - while (!IS_TMP_PREFIX(m_tabname)) - { - String event_name(INJECTOR_EVENT_LEN); - ndb_rep_event_name(&event_name,m_dbname,m_tabname); - int do_event_op= ndb_binlog_running; - - if (!ndb_schema_share && - strcmp(share->db, NDB_REP_DB) == 0 && - strcmp(share->table_name, NDB_SCHEMA_TABLE) == 0) - do_event_op= 1; - - /* - Always create an event for the table, as other mysql servers - expect it to be there. - */ - if (!ndbcluster_create_event(ndb, m_table, event_name.c_ptr(), share, - share && do_event_op ? 2 : 1/* push warning */)) - { - if (opt_ndb_extra_logging) - sql_print_information("NDB Binlog: CREATE TABLE Event: %s", - event_name.c_ptr()); - if (share && - ndbcluster_create_event_ops(share, m_table, event_name.c_ptr())) - { - sql_print_error("NDB Binlog: FAILED CREATE TABLE event operations." - " Event: %s", name); - /* a warning has been issued to the client */ - } - } - /* - warning has been issued if ndbcluster_create_event failed - and (share && do_event_op) - */ - if (share && !do_event_op) - share->flags|= NSF_NO_BINLOG; - ndbcluster_log_schema_op(thd, share, - thd->query(), thd->query_length(), - share->db, share->table_name, - m_table->getObjectId(), - m_table->getObjectVersion(), - (is_truncate) ? - SOT_TRUNCATE_TABLE : SOT_CREATE_TABLE, - 0, 0); - break; - } - } -#endif /* HAVE_NDB_BINLOG */ - - m_table= 0; - DBUG_RETURN(my_errno); -} - -int ha_ndbcluster::create_handler_files(const char *file, - const char *old_name, - int action_flag, - HA_CREATE_INFO *create_info) -{ - Ndb* ndb; - const NDBTAB *tab; - uchar *data= NULL, *pack_data= NULL; - size_t length, pack_length; - int error= 0; - - DBUG_ENTER("create_handler_files"); - - if (action_flag != CHF_INDEX_FLAG) - { - DBUG_RETURN(FALSE); - } - DBUG_PRINT("enter", ("file: %s", file)); - if (!(ndb= get_ndb())) - DBUG_RETURN(HA_ERR_NO_CONNECTION); - - NDBDICT *dict= ndb->getDictionary(); - if (!create_info->frm_only) - DBUG_RETURN(0); // Must be a create, ignore since frm is saved in create - - // TODO handle this - DBUG_ASSERT(m_table != 0); - - set_dbname(file); - set_tabname(file); - Ndb_table_guard ndbtab_g(dict, m_tabname); - DBUG_PRINT("info", ("m_dbname: %s, m_tabname: %s", m_dbname, m_tabname)); - if (!(tab= ndbtab_g.get_table())) - DBUG_RETURN(0); // Unkown table, must be temporary table - - DBUG_ASSERT(get_ndb_share_state(m_share) == NSS_ALTERED); - if (readfrm(file, &data, &length) || - packfrm(data, length, &pack_data, &pack_length)) - { - DBUG_PRINT("info", ("Missing frm for %s", m_tabname)); - my_free(data); - my_free(pack_data); - error= 1; - } - else - { - DBUG_PRINT("info", ("Table %s has changed, altering frm in ndb", - m_tabname)); - NdbDictionary::Table new_tab= *tab; - new_tab.setFrm(pack_data, pack_length); - if (dict->alterTableGlobal(*tab, new_tab)) - { - set_ndb_err(current_thd, dict->getNdbError()); - error= ndb_to_mysql_error(&dict->getNdbError()); - } - my_free(data); - my_free(pack_data); - } - - set_ndb_share_state(m_share, NSS_INITIAL); - /* ndb_share reference schema(?) free */ - DBUG_PRINT("NDB_SHARE", ("%s binlog schema(?) free use_count: %u", - m_share->key, m_share->use_count)); - free_share(&m_share); // Decrease ref_count - - DBUG_RETURN(error); -} - -int ha_ndbcluster::create_index(const char *name, KEY *key_info, - NDB_INDEX_TYPE idx_type, uint idx_no) -{ - int error= 0; - char unique_name[FN_LEN + 1]; - static const char* unique_suffix= "$unique"; - DBUG_ENTER("ha_ndbcluster::create_ordered_index"); - DBUG_PRINT("info", ("Creating index %u: %s", idx_no, name)); - - if (idx_type == UNIQUE_ORDERED_INDEX || idx_type == UNIQUE_INDEX) - { - strxnmov(unique_name, FN_LEN, name, unique_suffix, NullS); - DBUG_PRINT("info", ("Created unique index name \'%s\' for index %d", - unique_name, idx_no)); - } - - switch (idx_type){ - case PRIMARY_KEY_INDEX: - // Do nothing, already created - break; - case PRIMARY_KEY_ORDERED_INDEX: - error= create_ordered_index(name, key_info); - break; - case UNIQUE_ORDERED_INDEX: - if (!(error= create_ordered_index(name, key_info))) - error= create_unique_index(unique_name, key_info); - break; - case UNIQUE_INDEX: - if (check_index_fields_not_null(key_info)) - { - push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN, - ER_NULL_COLUMN_IN_INDEX, - "Ndb does not support unique index on NULL valued attributes, index access with NULL value will become full table scan"); - } - error= create_unique_index(unique_name, key_info); - break; - case ORDERED_INDEX: - if (key_info->algorithm == HA_KEY_ALG_HASH) - { - push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN, - ER_ILLEGAL_HA_CREATE_OPTION, - ER(ER_ILLEGAL_HA_CREATE_OPTION), - ndbcluster_hton_name, - "Ndb does not support non-unique " - "hash based indexes"); - error= HA_ERR_UNSUPPORTED; - break; - } - error= create_ordered_index(name, key_info); - break; - default: - DBUG_ASSERT(FALSE); - break; - } - - DBUG_RETURN(error); -} - -int ha_ndbcluster::create_ordered_index(const char *name, - KEY *key_info) -{ - DBUG_ENTER("ha_ndbcluster::create_ordered_index"); - DBUG_RETURN(create_ndb_index(name, key_info, FALSE)); -} - -int ha_ndbcluster::create_unique_index(const char *name, - KEY *key_info) -{ - - DBUG_ENTER("ha_ndbcluster::create_unique_index"); - DBUG_RETURN(create_ndb_index(name, key_info, TRUE)); -} - - -/** - Create an index in NDB Cluster. - - @todo - Only temporary ordered indexes supported -*/ - -int ha_ndbcluster::create_ndb_index(const char *name, - KEY *key_info, - bool unique) -{ - Ndb *ndb= get_ndb(); - NdbDictionary::Dictionary *dict= ndb->getDictionary(); - KEY_PART_INFO *key_part= key_info->key_part; - KEY_PART_INFO *end= key_part + key_info->user_defined_key_parts; - - DBUG_ENTER("ha_ndbcluster::create_index"); - DBUG_PRINT("enter", ("name: %s ", name)); - - NdbDictionary::Index ndb_index(name); - if (unique) - ndb_index.setType(NdbDictionary::Index::UniqueHashIndex); - else - { - ndb_index.setType(NdbDictionary::Index::OrderedIndex); - // TODO Only temporary ordered indexes supported - ndb_index.setLogging(FALSE); - } - if (ndb_index.setTable(m_tabname)) - { - DBUG_RETURN(my_errno= errno); - } - - for (; key_part != end; key_part++) - { - Field *field= key_part->field; - DBUG_PRINT("info", ("attr: %s", field->field_name)); - if (ndb_index.addColumnName(field->field_name)) - { - DBUG_RETURN(my_errno= errno); - } - } - - if (dict->createIndex(ndb_index, *m_table)) - ERR_RETURN(dict->getNdbError()); - - // Success - DBUG_PRINT("info", ("Created index %s", name)); - DBUG_RETURN(0); -} - -/* - Prepare for an on-line alter table -*/ -void ha_ndbcluster::prepare_for_alter() -{ - /* ndb_share reference schema */ - ndbcluster_get_share(m_share); // Increase ref_count - DBUG_PRINT("NDB_SHARE", ("%s binlog schema use_count: %u", - m_share->key, m_share->use_count)); - set_ndb_share_state(m_share, NSS_ALTERED); -} - -/* - Add an index on-line to a table -*/ -int ha_ndbcluster::add_index(TABLE *table_arg, - KEY *key_info, uint num_of_keys) -{ - int error= 0; - uint idx; - DBUG_ENTER("ha_ndbcluster::add_index"); - DBUG_PRINT("enter", ("table %s", table_arg->s->table_name.str)); - DBUG_ASSERT(m_share->state == NSS_ALTERED); - - for (idx= 0; idx < num_of_keys; idx++) - { - KEY *key= key_info + idx; - KEY_PART_INFO *key_part= key->key_part; - KEY_PART_INFO *end= key_part + key->key_parts; - NDB_INDEX_TYPE idx_type= get_index_type_from_key(idx, key_info, false); - DBUG_PRINT("info", ("Adding index: '%s'", key_info[idx].name)); - // Add fields to key_part struct - for (; key_part != end; key_part++) - key_part->field= table->field[key_part->fieldnr]; - // Check index type - // Create index in ndb - if((error= create_index(key_info[idx].name, key, idx_type, idx))) - break; - } - if (error) - { - set_ndb_share_state(m_share, NSS_INITIAL); - /* ndb_share reference schema free */ - DBUG_PRINT("NDB_SHARE", ("%s binlog schema free use_count: %u", - m_share->key, m_share->use_count)); - free_share(&m_share); // Decrease ref_count - } - DBUG_RETURN(error); -} - -/* - Mark one or several indexes for deletion. and - renumber the remaining indexes -*/ -int ha_ndbcluster::prepare_drop_index(TABLE *table_arg, - uint *key_num, uint num_of_keys) -{ - DBUG_ENTER("ha_ndbcluster::prepare_drop_index"); - DBUG_ASSERT(m_share->state == NSS_ALTERED); - // Mark indexes for deletion - uint idx; - for (idx= 0; idx < num_of_keys; idx++) - { - DBUG_PRINT("info", ("ha_ndbcluster::prepare_drop_index %u", *key_num)); - m_index[*key_num++].status= TO_BE_DROPPED; - } - // Renumber indexes - THD *thd= current_thd; - Thd_ndb *thd_ndb= get_thd_ndb(thd); - Ndb *ndb= thd_ndb->ndb; - renumber_indexes(ndb, table_arg); - DBUG_RETURN(0); -} - -/* - Really drop all indexes marked for deletion -*/ -int ha_ndbcluster::final_drop_index(TABLE *table_arg) -{ - int error; - DBUG_ENTER("ha_ndbcluster::final_drop_index"); - DBUG_PRINT("info", ("ha_ndbcluster::final_drop_index")); - // Really drop indexes - THD *thd= current_thd; - Thd_ndb *thd_ndb= get_thd_ndb(thd); - Ndb *ndb= thd_ndb->ndb; - if((error= drop_indexes(ndb, table_arg))) - { - m_share->state= NSS_INITIAL; - /* ndb_share reference schema free */ - DBUG_PRINT("NDB_SHARE", ("%s binlog schema free use_count: %u", - m_share->key, m_share->use_count)); - free_share(&m_share); // Decrease ref_count - } - DBUG_RETURN(error); -} - -/** - Rename a table in NDB Cluster. -*/ - -int ha_ndbcluster::rename_table(const char *from, const char *to) -{ - NDBDICT *dict; - char old_dbname[FN_HEADLEN]; - char new_dbname[FN_HEADLEN]; - char new_tabname[FN_HEADLEN]; - const NDBTAB *orig_tab; - int result; - bool recreate_indexes= FALSE; - NDBDICT::List index_list; - - DBUG_ENTER("ha_ndbcluster::rename_table"); - DBUG_PRINT("info", ("Renaming %s to %s", from, to)); - set_dbname(from, old_dbname); - set_dbname(to, new_dbname); - set_tabname(from); - set_tabname(to, new_tabname); - - if (check_ndb_connection()) - DBUG_RETURN(my_errno= HA_ERR_NO_CONNECTION); - - Ndb *ndb= get_ndb(); - ndb->setDatabaseName(old_dbname); - dict= ndb->getDictionary(); - Ndb_table_guard ndbtab_g(dict, m_tabname); - if (!(orig_tab= ndbtab_g.get_table())) - ERR_RETURN(dict->getNdbError()); - -#ifdef HAVE_NDB_BINLOG - int ndb_table_id= orig_tab->getObjectId(); - int ndb_table_version= orig_tab->getObjectVersion(); - - /* ndb_share reference temporary */ - NDB_SHARE *share= get_share(from, 0, FALSE); - if (share) - { - DBUG_PRINT("NDB_SHARE", ("%s temporary use_count: %u", - share->key, share->use_count)); - int r __attribute__((unused))= rename_share(share, to); - DBUG_ASSERT(r == 0); - } -#endif - if (my_strcasecmp(system_charset_info, new_dbname, old_dbname)) - { - dict->listIndexes(index_list, *orig_tab); - recreate_indexes= TRUE; - } - // Change current database to that of target table - set_dbname(to); - if (ndb->setDatabaseName(m_dbname)) - { - ERR_RETURN(ndb->getNdbError()); - } - - NdbDictionary::Table new_tab= *orig_tab; - new_tab.setName(new_tabname); - if (dict->alterTableGlobal(*orig_tab, new_tab) != 0) - { - NdbError ndb_error= dict->getNdbError(); -#ifdef HAVE_NDB_BINLOG - if (share) - { - int ret __attribute__((unused))= rename_share(share, from); - DBUG_ASSERT(ret == 0); - /* ndb_share reference temporary free */ - DBUG_PRINT("NDB_SHARE", ("%s temporary free use_count: %u", - share->key, share->use_count)); - free_share(&share); - } -#endif - ERR_RETURN(ndb_error); - } - - // Rename .ndb file - if ((result= handler::rename_table(from, to))) - { - // ToDo in 4.1 should rollback alter table... -#ifdef HAVE_NDB_BINLOG - if (share) - { - /* ndb_share reference temporary free */ - DBUG_PRINT("NDB_SHARE", ("%s temporary use_count: %u", - share->key, share->use_count)); - free_share(&share); - } -#endif - DBUG_RETURN(result); - } - -#ifdef HAVE_NDB_BINLOG - int is_old_table_tmpfile= 1; - if (share && share->op) - dict->forceGCPWait(); - - /* handle old table */ - if (!IS_TMP_PREFIX(m_tabname)) - { - is_old_table_tmpfile= 0; - String event_name(INJECTOR_EVENT_LEN); - ndb_rep_event_name(&event_name, from + sizeof(share_prefix) - 1, 0); - ndbcluster_handle_drop_table(ndb, event_name.c_ptr(), share, - "rename table"); - } - - if (!result && !IS_TMP_PREFIX(new_tabname)) - { - /* always create an event for the table */ - String event_name(INJECTOR_EVENT_LEN); - ndb_rep_event_name(&event_name, to + sizeof(share_prefix) - 1, 0); - Ndb_table_guard ndbtab_g2(dict, new_tabname); - const NDBTAB *ndbtab= ndbtab_g2.get_table(); - - if (!ndbcluster_create_event(ndb, ndbtab, event_name.c_ptr(), share, - share && ndb_binlog_running ? 2 : 1/* push warning */)) - { - if (opt_ndb_extra_logging) - sql_print_information("NDB Binlog: RENAME Event: %s", - event_name.c_ptr()); - if (share && - ndbcluster_create_event_ops(share, ndbtab, event_name.c_ptr())) - { - sql_print_error("NDB Binlog: FAILED create event operations " - "during RENAME. Event %s", event_name.c_ptr()); - /* a warning has been issued to the client */ - } - } - /* - warning has been issued if ndbcluster_create_event failed - and (share && ndb_binlog_running) - */ - if (!is_old_table_tmpfile) - ndbcluster_log_schema_op(current_thd, share, - current_thd->query(), - current_thd->query_length(), - old_dbname, m_tabname, - ndb_table_id, ndb_table_version, - SOT_RENAME_TABLE, - m_dbname, new_tabname); - } - - // If we are moving tables between databases, we need to recreate - // indexes - if (recreate_indexes) - { - for (unsigned i = 0; i < index_list.count; i++) - { - NDBDICT::List::Element& index_el = index_list.elements[i]; - // Recreate any indexes not stored in the system database - if (my_strcasecmp(system_charset_info, - index_el.database, NDB_SYSTEM_DATABASE)) - { - set_dbname(from); - ndb->setDatabaseName(m_dbname); - const NDBINDEX * index= dict->getIndexGlobal(index_el.name, new_tab); - DBUG_PRINT("info", ("Creating index %s/%s", - index_el.database, index->getName())); - dict->createIndex(*index, new_tab); - DBUG_PRINT("info", ("Dropping index %s/%s", - index_el.database, index->getName())); - set_dbname(from); - ndb->setDatabaseName(m_dbname); - dict->dropIndexGlobal(*index); - } - } - } - if (share) - { - /* ndb_share reference temporary free */ - DBUG_PRINT("NDB_SHARE", ("%s temporary free use_count: %u", - share->key, share->use_count)); - free_share(&share); - } -#endif - - DBUG_RETURN(result); -} - - -/** - Delete table from NDB Cluster. -*/ - -/* static version which does not need a handler */ - -int -ha_ndbcluster::delete_table(ha_ndbcluster *h, Ndb *ndb, - const char *path, - const char *db, - const char *table_name) -{ - THD *thd= current_thd; - DBUG_ENTER("ha_ndbcluster::ndbcluster_delete_table"); - NDBDICT *dict= ndb->getDictionary(); - int ndb_table_id= 0; - int ndb_table_version= 0; -#ifdef HAVE_NDB_BINLOG - /* - Don't allow drop table unless - schema distribution table is setup - */ - if (!ndb_schema_share) - { - DBUG_PRINT("info", ("Schema distribution table not setup")); - DBUG_ASSERT(ndb_schema_share); - DBUG_RETURN(HA_ERR_NO_CONNECTION); - } - /* ndb_share reference temporary */ - NDB_SHARE *share= get_share(path, 0, FALSE); - if (share) - { - DBUG_PRINT("NDB_SHARE", ("%s temporary use_count: %u", - share->key, share->use_count)); - } -#endif - - /* Drop the table from NDB */ - - int res= 0; - if (h && h->m_table) - { -retry_temporary_error1: - if (dict->dropTableGlobal(*h->m_table) == 0) - { - ndb_table_id= h->m_table->getObjectId(); - ndb_table_version= h->m_table->getObjectVersion(); - DBUG_PRINT("info", ("success 1")); - } - else - { - switch (dict->getNdbError().status) - { - case NdbError::TemporaryError: - if (!thd->killed) - goto retry_temporary_error1; // retry indefinitly - break; - default: - break; - } - set_ndb_err(thd, dict->getNdbError()); - res= ndb_to_mysql_error(&dict->getNdbError()); - DBUG_PRINT("info", ("error(1) %u", res)); - } - h->release_metadata(thd, ndb); - } - else - { - ndb->setDatabaseName(db); - while (1) - { - Ndb_table_guard ndbtab_g(dict, table_name); - if (ndbtab_g.get_table()) - { - retry_temporary_error2: - if (dict->dropTableGlobal(*ndbtab_g.get_table()) == 0) - { - ndb_table_id= ndbtab_g.get_table()->getObjectId(); - ndb_table_version= ndbtab_g.get_table()->getObjectVersion(); - DBUG_PRINT("info", ("success 2")); - break; - } - else - { - switch (dict->getNdbError().status) - { - case NdbError::TemporaryError: - if (!thd->killed) - goto retry_temporary_error2; // retry indefinitly - break; - default: - if (dict->getNdbError().code == NDB_INVALID_SCHEMA_OBJECT) - { - ndbtab_g.invalidate(); - continue; - } - break; - } - } - } - set_ndb_err(thd, dict->getNdbError()); - res= ndb_to_mysql_error(&dict->getNdbError()); - DBUG_PRINT("info", ("error(2) %u", res)); - break; - } - } - - if (res) - { -#ifdef HAVE_NDB_BINLOG - /* the drop table failed for some reason, drop the share anyways */ - if (share) - { - mysql_mutex_lock(&ndbcluster_mutex); - if (share->state != NSS_DROPPED) - { - /* - The share kept by the server has not been freed, free it - */ - share->state= NSS_DROPPED; - /* ndb_share reference create free */ - DBUG_PRINT("NDB_SHARE", ("%s create free use_count: %u", - share->key, share->use_count)); - free_share(&share, TRUE); - } - /* ndb_share reference temporary free */ - DBUG_PRINT("NDB_SHARE", ("%s temporary free use_count: %u", - share->key, share->use_count)); - free_share(&share, TRUE); - mysql_mutex_unlock(&ndbcluster_mutex); - } -#endif - DBUG_RETURN(res); - } - -#ifdef HAVE_NDB_BINLOG - /* stop the logging of the dropped table, and cleanup */ - - /* - drop table is successful even if table does not exist in ndb - and in case table was actually not dropped, there is no need - to force a gcp, and setting the event_name to null will indicate - that there is no event to be dropped - */ - int table_dropped= dict->getNdbError().code != 709; - - if (!IS_TMP_PREFIX(table_name) && share && - current_thd->lex->sql_command != SQLCOM_TRUNCATE) - { - ndbcluster_log_schema_op(thd, share, - thd->query(), thd->query_length(), - share->db, share->table_name, - ndb_table_id, ndb_table_version, - SOT_DROP_TABLE, 0, 0); - } - else if (table_dropped && share && share->op) /* ndbcluster_log_schema_op - will do a force GCP */ - dict->forceGCPWait(); - - if (!IS_TMP_PREFIX(table_name)) - { - String event_name(INJECTOR_EVENT_LEN); - ndb_rep_event_name(&event_name, path + sizeof(share_prefix) - 1, 0); - ndbcluster_handle_drop_table(ndb, - table_dropped ? event_name.c_ptr() : 0, - share, "delete table"); - } - - if (share) - { - mysql_mutex_lock(&ndbcluster_mutex); - if (share->state != NSS_DROPPED) - { - /* - The share kept by the server has not been freed, free it - */ - share->state= NSS_DROPPED; - /* ndb_share reference create free */ - DBUG_PRINT("NDB_SHARE", ("%s create free use_count: %u", - share->key, share->use_count)); - free_share(&share, TRUE); - } - /* ndb_share reference temporary free */ - DBUG_PRINT("NDB_SHARE", ("%s temporary free use_count: %u", - share->key, share->use_count)); - free_share(&share, TRUE); - mysql_mutex_unlock(&ndbcluster_mutex); - } -#endif - DBUG_RETURN(0); -} - -int ha_ndbcluster::delete_table(const char *name) -{ - DBUG_ENTER("ha_ndbcluster::delete_table"); - DBUG_PRINT("enter", ("name: %s", name)); - set_dbname(name); - set_tabname(name); - -#ifdef HAVE_NDB_BINLOG - /* - Don't allow drop table unless - schema distribution table is setup - */ - if (!ndb_schema_share) - { - DBUG_PRINT("info", ("Schema distribution table not setup")); - DBUG_ASSERT(ndb_schema_share); - DBUG_RETURN(HA_ERR_NO_CONNECTION); - } -#endif - - if (check_ndb_connection()) - DBUG_RETURN(HA_ERR_NO_CONNECTION); - - /* Call ancestor function to delete .ndb file */ - handler::delete_table(name); - - DBUG_RETURN(delete_table(this, get_ndb(),name, m_dbname, m_tabname)); -} - - -void ha_ndbcluster::get_auto_increment(ulonglong offset, ulonglong increment, - ulonglong nb_desired_values, - ulonglong *first_value, - ulonglong *nb_reserved_values) -{ - uint cache_size; - Uint64 auto_value; - THD *thd= current_thd; - DBUG_ENTER("get_auto_increment"); - DBUG_PRINT("enter", ("m_tabname: %s", m_tabname)); - Ndb *ndb= get_ndb(); - - if (m_rows_inserted > m_rows_to_insert) - { - /* We guessed too low */ - m_rows_to_insert+= m_autoincrement_prefetch; - } - uint remaining= m_rows_to_insert - m_rows_inserted; - ha_rows prefetch= THDVAR(thd, autoincrement_prefetch_sz); - uint min_prefetch= - (remaining < prefetch) ? prefetch : remaining; - cache_size= ((remaining < m_autoincrement_prefetch) ? - min_prefetch - : remaining); - uint retries= NDB_AUTO_INCREMENT_RETRIES; - int retry_sleep= 30; /* 30 milliseconds, transaction */ - for (;;) - { - Ndb_tuple_id_range_guard g(m_share); - if ((m_skip_auto_increment && - ndb->readAutoIncrementValue(m_table, g.range, auto_value)) || - ndb->getAutoIncrementValue(m_table, g.range, auto_value, cache_size, increment, offset)) - { - if (--retries && - ndb->getNdbError().status == NdbError::TemporaryError) - { - my_sleep(retry_sleep); - continue; - } - const NdbError err= ndb->getNdbError(); - sql_print_error("Error %lu in ::get_auto_increment(): %s", - (ulong) err.code, err.message); - *first_value= ~(ulonglong) 0; - DBUG_VOID_RETURN; - } - break; - } - *first_value= (longlong)auto_value; - /* From the point of view of MySQL, NDB reserves one row at a time */ - *nb_reserved_values= 1; - DBUG_VOID_RETURN; -} - - -/** - Constructor for the NDB Cluster table handler . -*/ - -/* - Normal flags for binlogging is that ndb has HA_HAS_OWN_BINLOGGING - and preferes HA_BINLOG_ROW_CAPABLE - Other flags are set under certain circumstaces in table_flags() -*/ -#define HA_NDBCLUSTER_TABLE_FLAGS \ - HA_REC_NOT_IN_SEQ | \ - HA_NULL_IN_KEY | \ - HA_AUTO_PART_KEY | \ - HA_NO_PREFIX_CHAR_KEYS | \ - HA_NEED_READ_RANGE_BUFFER | \ - HA_CAN_GEOMETRY | \ - HA_CAN_BIT_FIELD | \ - HA_PRIMARY_KEY_REQUIRED_FOR_POSITION | \ - HA_PRIMARY_KEY_REQUIRED_FOR_DELETE | \ - HA_PARTIAL_COLUMN_READ | \ - HA_HAS_OWN_BINLOGGING | \ - HA_BINLOG_ROW_CAPABLE | \ - HA_HAS_RECORDS - -ha_ndbcluster::ha_ndbcluster(handlerton *hton, TABLE_SHARE *table_arg): - handler(hton, table_arg), - m_active_trans(NULL), - m_active_cursor(NULL), - m_table(NULL), - m_table_info(NULL), - m_table_flags(HA_NDBCLUSTER_TABLE_FLAGS), - m_share(0), - m_part_info(NULL), - m_use_partition_function(FALSE), - m_sorted(FALSE), - m_use_write(FALSE), - m_ignore_dup_key(FALSE), - m_has_unique_index(FALSE), - m_primary_key_update(FALSE), - m_ignore_no_key(FALSE), - m_rows_to_insert((ha_rows) 1), - m_rows_inserted((ha_rows) 0), - m_bulk_insert_rows((ha_rows) 1024), - m_rows_changed((ha_rows) 0), - m_bulk_insert_not_flushed(FALSE), - m_delete_cannot_batch(FALSE), - m_update_cannot_batch(FALSE), - m_ops_pending(0), - m_skip_auto_increment(TRUE), - m_blobs_pending(0), - m_blobs_offset(0), - m_blobs_buffer(0), - m_blobs_buffer_size(0), - m_dupkey((uint) -1), - m_ha_not_exact_count(FALSE), - m_force_send(TRUE), - m_autoincrement_prefetch(DEFAULT_AUTO_PREFETCH), - m_transaction_on(TRUE), - m_cond(NULL), - m_multi_cursor(NULL) -{ - int i; - - DBUG_ENTER("ha_ndbcluster"); - - m_tabname[0]= '\0'; - m_dbname[0]= '\0'; - - stats.records= ~(ha_rows)0; // uninitialized - stats.block_size= 1024; - - for (i= 0; i < MAX_KEY; i++) - ndb_init_index(m_index[i]); - - DBUG_VOID_RETURN; -} - - -int ha_ndbcluster::ha_initialise() -{ - DBUG_ENTER("ha_ndbcluster::ha_initialise"); - if (check_ndb_in_thd(current_thd)) - { - DBUG_RETURN(FALSE); - } - DBUG_RETURN(TRUE); -} - -/** - Destructor for NDB Cluster table handler. -*/ - -ha_ndbcluster::~ha_ndbcluster() -{ - THD *thd= current_thd; - Ndb *ndb= thd ? check_ndb_in_thd(thd) : g_ndb; - DBUG_ENTER("~ha_ndbcluster"); - - if (m_share) - { - /* ndb_share reference handler free */ - DBUG_PRINT("NDB_SHARE", ("%s handler free use_count: %u", - m_share->key, m_share->use_count)); - free_share(&m_share); - } - release_metadata(thd, ndb); - my_free(m_blobs_buffer); - m_blobs_buffer= 0; - - // Check for open cursor/transaction - if (m_active_cursor) { - } - DBUG_ASSERT(m_active_cursor == NULL); - if (m_active_trans) { - } - DBUG_ASSERT(m_active_trans == NULL); - - // Discard any generated condition - DBUG_PRINT("info", ("Deleting generated condition")); - if (m_cond) - { - delete m_cond; - m_cond= NULL; - } - - DBUG_VOID_RETURN; -} - - - -/** - Open a table for further use. - - - fetch metadata for this table from NDB - - check that table exists - - @retval - 0 ok - @retval - < 0 Table has changed -*/ - -int ha_ndbcluster::open(const char *name, int mode, uint test_if_locked) -{ - int res; - KEY *key; - DBUG_ENTER("ha_ndbcluster::open"); - DBUG_PRINT("enter", ("name: %s mode: %d test_if_locked: %d", - name, mode, test_if_locked)); - - /* - Setup ref_length to make room for the whole - primary key to be written in the ref variable - */ - - if (table_share->primary_key != MAX_KEY) - { - key= table->key_info+table_share->primary_key; - ref_length= key->key_length; - } - else // (table_share->primary_key == MAX_KEY) - { - if (m_use_partition_function) - { - ref_length+= sizeof(m_part_id); - } - } - - DBUG_PRINT("info", ("ref_length: %d", ref_length)); - - // Init table lock structure - /* ndb_share reference handler */ - if (!(m_share=get_share(name, table))) - DBUG_RETURN(1); - DBUG_PRINT("NDB_SHARE", ("%s handler use_count: %u", - m_share->key, m_share->use_count)); - thr_lock_data_init(&m_share->lock,&m_lock,(void*) 0); - - set_dbname(name); - set_tabname(name); - - if ((res= check_ndb_connection()) || - (res= get_metadata(name))) - { - /* ndb_share reference handler free */ - DBUG_PRINT("NDB_SHARE", ("%s handler free use_count: %u", - m_share->key, m_share->use_count)); - free_share(&m_share); - m_share= 0; - DBUG_RETURN(res); - } - while (1) - { - Ndb *ndb= get_ndb(); - if (ndb->setDatabaseName(m_dbname)) - { - set_ndb_err(current_thd, ndb->getNdbError()); - res= ndb_to_mysql_error(&ndb->getNdbError()); - break; - } - struct Ndb_statistics stat; - res= ndb_get_table_statistics(NULL, FALSE, ndb, m_table, &stat); - stats.mean_rec_length= stat.row_size; - stats.data_file_length= stat.fragment_memory; - stats.records= stat.row_count; - if(!res) - res= info(HA_STATUS_CONST); - break; - } - if (res) - { - free_share(&m_share); - m_share= 0; - release_metadata(current_thd, get_ndb()); - DBUG_RETURN(res); - } -#ifdef HAVE_NDB_BINLOG - if (!ndb_binlog_tables_inited) - { - table->db_stat|= HA_READ_ONLY; - sql_print_information("table '%s' opened read only", name); - } -#endif - DBUG_RETURN(0); -} - -/* - Set partition info - - SYNOPSIS - set_part_info() - part_info - - RETURN VALUE - NONE - - DESCRIPTION - Set up partition info when handler object created -*/ - -void ha_ndbcluster::set_part_info(partition_info *part_info) -{ - m_part_info= part_info; - if (!(m_part_info->part_type == HASH_PARTITION && - m_part_info->list_of_part_fields && - !m_part_info->is_sub_partitioned())) - m_use_partition_function= TRUE; -} - -/** - Close the table; release resources setup by open(). -*/ - -int ha_ndbcluster::close(void) -{ - DBUG_ENTER("close"); - THD *thd= table->in_use; - Ndb *ndb= thd ? check_ndb_in_thd(thd) : g_ndb; - /* ndb_share reference handler free */ - DBUG_PRINT("NDB_SHARE", ("%s handler free use_count: %u", - m_share->key, m_share->use_count)); - free_share(&m_share); - m_share= 0; - release_metadata(thd, ndb); - DBUG_RETURN(0); -} - - -/** - @todo - - Alt.1 If init fails because to many allocated Ndb - wait on condition for a Ndb object to be released. - - Alt.2 Seize/release from pool, wait until next release -*/ -Thd_ndb* ha_ndbcluster::seize_thd_ndb() -{ - Thd_ndb *thd_ndb; - DBUG_ENTER("seize_thd_ndb"); - - thd_ndb= new Thd_ndb(); - if (thd_ndb == NULL) - { - my_errno= HA_ERR_OUT_OF_MEM; - return NULL; - } - if (thd_ndb->ndb->init(max_transactions) != 0) - { - ERR_PRINT(thd_ndb->ndb->getNdbError()); - /* - TODO - Alt.1 If init fails because to many allocated Ndb - wait on condition for a Ndb object to be released. - Alt.2 Seize/release from pool, wait until next release - */ - delete thd_ndb; - thd_ndb= NULL; - } - DBUG_RETURN(thd_ndb); -} - - -void ha_ndbcluster::release_thd_ndb(Thd_ndb* thd_ndb) -{ - DBUG_ENTER("release_thd_ndb"); - delete thd_ndb; - DBUG_VOID_RETURN; -} - - -/** - If this thread already has a Thd_ndb object allocated - in current THD, reuse it. Otherwise - seize a Thd_ndb object, assign it to current THD and use it. - -*/ - -Ndb* check_ndb_in_thd(THD* thd) -{ - Thd_ndb *thd_ndb= get_thd_ndb(thd); - if (!thd_ndb) - { - if (!(thd_ndb= ha_ndbcluster::seize_thd_ndb())) - return NULL; - set_thd_ndb(thd, thd_ndb); - } - return thd_ndb->ndb; -} - - - -int ha_ndbcluster::check_ndb_connection(THD* thd) -{ - Ndb *ndb; - DBUG_ENTER("check_ndb_connection"); - - if (!(ndb= check_ndb_in_thd(thd))) - DBUG_RETURN(HA_ERR_NO_CONNECTION); - if (ndb->setDatabaseName(m_dbname)) - { - ERR_RETURN(ndb->getNdbError()); - } - DBUG_RETURN(0); -} - - -static int ndbcluster_close_connection(handlerton *hton, THD *thd) -{ - Thd_ndb *thd_ndb= get_thd_ndb(thd); - DBUG_ENTER("ndbcluster_close_connection"); - if (thd_ndb) - { - ha_ndbcluster::release_thd_ndb(thd_ndb); - set_thd_ndb(thd, NULL); // not strictly required but does not hurt either - } - DBUG_RETURN(0); -} - - -/** - Try to discover one table from NDB. -*/ - -int ndbcluster_discover(handlerton *hton, THD* thd, const char *db, - const char *name, - uchar **frmblob, - size_t *frmlen) -{ - int error= 0; - NdbError ndb_error; - size_t len; - uchar* data= NULL; - Ndb* ndb; - char key[FN_REFLEN + 1]; - DBUG_ENTER("ndbcluster_discover"); - DBUG_PRINT("enter", ("db: %s, name: %s", db, name)); - - if (!(ndb= check_ndb_in_thd(thd))) - DBUG_RETURN(HA_ERR_NO_CONNECTION); - if (ndb->setDatabaseName(db)) - { - ERR_RETURN(ndb->getNdbError()); - } - NDBDICT* dict= ndb->getDictionary(); - build_table_filename(key, sizeof(key) - 1, db, name, "", 0); - /* ndb_share reference temporary */ - NDB_SHARE *share= get_share(key, 0, FALSE); - if (share) - { - DBUG_PRINT("NDB_SHARE", ("%s temporary use_count: %u", - share->key, share->use_count)); - } - if (share && get_ndb_share_state(share) == NSS_ALTERED) - { - // Frm has been altered on disk, but not yet written to ndb - if (readfrm(key, &data, &len)) - { - DBUG_PRINT("error", ("Could not read frm")); - error= 1; - goto err; - } - } - else - { - Ndb_table_guard ndbtab_g(dict, name); - const NDBTAB *tab= ndbtab_g.get_table(); - if (!tab) - { - const NdbError err= dict->getNdbError(); - if (err.code == 709 || err.code == 723) - { - error= -1; - DBUG_PRINT("info", ("ndb_error.code: %u", ndb_error.code)); - } - else - { - error= -1; - ndb_error= err; - DBUG_PRINT("info", ("ndb_error.code: %u", ndb_error.code)); - } - goto err; - } - DBUG_PRINT("info", ("Found table %s", tab->getName())); - - len= tab->getFrmLength(); - if (len == 0 || tab->getFrmData() == NULL) - { - DBUG_PRINT("error", ("No frm data found.")); - error= 1; - goto err; - } - - if (unpackfrm(&data, &len, (uchar*) tab->getFrmData())) - { - DBUG_PRINT("error", ("Could not unpack table")); - error= 1; - goto err; - } - } - - *frmlen= len; - *frmblob= data; - - if (share) - { - /* ndb_share reference temporary free */ - DBUG_PRINT("NDB_SHARE", ("%s temporary free use_count: %u", - share->key, share->use_count)); - free_share(&share); - } - - DBUG_RETURN(0); -err: - my_free(data); - if (share) - { - /* ndb_share reference temporary free */ - DBUG_PRINT("NDB_SHARE", ("%s temporary free use_count: %u", - share->key, share->use_count)); - free_share(&share); - } - if (ndb_error.code) - { - ERR_RETURN(ndb_error); - } - DBUG_RETURN(error); -} - -/** - Check if a table exists in NDB. -*/ - -int ndbcluster_table_exists_in_engine(handlerton *hton, THD* thd, - const char *db, - const char *name) -{ - Ndb* ndb; - DBUG_ENTER("ndbcluster_table_exists_in_engine"); - DBUG_PRINT("enter", ("db: %s name: %s", db, name)); - - if (!(ndb= check_ndb_in_thd(thd))) - DBUG_RETURN(HA_ERR_NO_CONNECTION); - NDBDICT* dict= ndb->getDictionary(); - NdbDictionary::Dictionary::List list; - if (dict->listObjects(list, NdbDictionary::Object::UserTable) != 0) - ERR_RETURN(dict->getNdbError()); - for (uint i= 0 ; i < list.count ; i++) - { - NdbDictionary::Dictionary::List::Element& elmt= list.elements[i]; - if (my_strcasecmp(system_charset_info, elmt.database, db)) - continue; - if (my_strcasecmp(system_charset_info, elmt.name, name)) - continue; - DBUG_PRINT("info", ("Found table")); - DBUG_RETURN(HA_ERR_TABLE_EXIST); - } - DBUG_RETURN(HA_ERR_NO_SUCH_TABLE); -} - - - -extern "C" uchar* tables_get_key(const char *entry, size_t *length, - my_bool not_used __attribute__((unused))) -{ - *length= strlen(entry); - return (uchar*) entry; -} - - -/** - Drop a database in NDB Cluster - - @note - add a dummy void function, since stupid handlerton is returning void instead of int... -*/ -int ndbcluster_drop_database_impl(const char *path) -{ - DBUG_ENTER("ndbcluster_drop_database"); - THD *thd= current_thd; - char dbname[FN_HEADLEN]; - Ndb* ndb; - NdbDictionary::Dictionary::List list; - uint i; - char *tabname; - List<char> drop_list; - int ret= 0; - ha_ndbcluster::set_dbname(path, (char *)&dbname); - DBUG_PRINT("enter", ("db: %s", dbname)); - - if (!(ndb= check_ndb_in_thd(thd))) - DBUG_RETURN(-1); - - // List tables in NDB - NDBDICT *dict= ndb->getDictionary(); - if (dict->listObjects(list, - NdbDictionary::Object::UserTable) != 0) - DBUG_RETURN(-1); - for (i= 0 ; i < list.count ; i++) - { - NdbDictionary::Dictionary::List::Element& elmt= list.elements[i]; - DBUG_PRINT("info", ("Found %s/%s in NDB", elmt.database, elmt.name)); - - // Add only tables that belongs to db - if (my_strcasecmp(system_charset_info, elmt.database, dbname)) - continue; - DBUG_PRINT("info", ("%s must be dropped", elmt.name)); - drop_list.push_back(thd->strdup(elmt.name)); - } - // Drop any tables belonging to database - char full_path[FN_REFLEN + 1]; - char *tmp= full_path + - build_table_filename(full_path, sizeof(full_path) - 1, dbname, "", "", 0); - if (ndb->setDatabaseName(dbname)) - { - ERR_RETURN(ndb->getNdbError()); - } - List_iterator_fast<char> it(drop_list); - while ((tabname=it++)) - { - tablename_to_filename(tabname, tmp, FN_REFLEN - (tmp - full_path)-1); - if (ha_ndbcluster::delete_table(0, ndb, full_path, dbname, tabname)) - { - const NdbError err= dict->getNdbError(); - if (err.code != 709 && err.code != 723) - { - set_ndb_err(thd, err); - ret= ndb_to_mysql_error(&err); - } - } - } - DBUG_RETURN(ret); -} - -static void ndbcluster_drop_database(handlerton *hton, char *path) -{ - DBUG_ENTER("ndbcluster_drop_database"); -#ifdef HAVE_NDB_BINLOG - /* - Don't allow drop database unless - schema distribution table is setup - */ - if (!ndb_schema_share) - { - DBUG_PRINT("info", ("Schema distribution table not setup")); - DBUG_ASSERT(ndb_schema_share); - DBUG_VOID_RETURN; - } -#endif - ndbcluster_drop_database_impl(path); -#ifdef HAVE_NDB_BINLOG - char db[FN_REFLEN]; - THD *thd= current_thd; - ha_ndbcluster::set_dbname(path, db); - ndbcluster_log_schema_op(thd, 0, - thd->query(), thd->query_length(), - db, "", 0, 0, SOT_DROP_DB, 0, 0); -#endif - DBUG_VOID_RETURN; -} - -int ndb_create_table_from_engine(THD *thd, const char *db, - const char *table_name) -{ - LEX *old_lex= thd->lex, newlex; - thd->lex= &newlex; - newlex.current_select= NULL; - int res= ha_create_table_from_engine(thd, db, table_name); - thd->lex= old_lex; - return res; -} - -/* - find all tables in ndb and discover those needed -*/ -int ndbcluster_find_all_files(THD *thd) -{ - Ndb* ndb; - char key[FN_REFLEN + 1]; - NDBDICT *dict; - int unhandled, retries= 5, skipped; - DBUG_ENTER("ndbcluster_find_all_files"); - - if (!(ndb= check_ndb_in_thd(thd))) - DBUG_RETURN(HA_ERR_NO_CONNECTION); - - dict= ndb->getDictionary(); - - LINT_INIT(unhandled); - LINT_INIT(skipped); - do - { - NdbDictionary::Dictionary::List list; - if (dict->listObjects(list, NdbDictionary::Object::UserTable) != 0) - ERR_RETURN(dict->getNdbError()); - unhandled= 0; - skipped= 0; - retries--; - for (uint i= 0 ; i < list.count ; i++) - { - NDBDICT::List::Element& elmt= list.elements[i]; - if (IS_TMP_PREFIX(elmt.name) || IS_NDB_BLOB_PREFIX(elmt.name)) - { - DBUG_PRINT("info", ("Skipping %s.%s in NDB", elmt.database, elmt.name)); - continue; - } - DBUG_PRINT("info", ("Found %s.%s in NDB", elmt.database, elmt.name)); - if (elmt.state != NDBOBJ::StateOnline && - elmt.state != NDBOBJ::StateBackup && - elmt.state != NDBOBJ::StateBuilding) - { - sql_print_information("NDB: skipping setup table %s.%s, in state %d", - elmt.database, elmt.name, elmt.state); - skipped++; - continue; - } - - ndb->setDatabaseName(elmt.database); - Ndb_table_guard ndbtab_g(dict, elmt.name); - const NDBTAB *ndbtab= ndbtab_g.get_table(); - if (!ndbtab) - { - if (retries == 0) - sql_print_error("NDB: failed to setup table %s.%s, error: %d, %s", - elmt.database, elmt.name, - dict->getNdbError().code, - dict->getNdbError().message); - unhandled++; - continue; - } - - if (ndbtab->getFrmLength() == 0) - continue; - - /* check if database exists */ - char *end= key + - build_table_filename(key, sizeof(key) - 1, elmt.database, "", "", 0); - if (my_access(key, F_OK)) - { - /* no such database defined, skip table */ - continue; - } - /* finalize construction of path */ - end+= tablename_to_filename(elmt.name, end, - sizeof(key)-(end-key)); - uchar *data= 0, *pack_data= 0; - size_t length, pack_length; - int discover= 0; - if (readfrm(key, &data, &length) || - packfrm(data, length, &pack_data, &pack_length)) - { - discover= 1; - sql_print_information("NDB: missing frm for %s.%s, discovering...", - elmt.database, elmt.name); - } - else if (cmp_frm(ndbtab, pack_data, pack_length)) - { - /* ndb_share reference temporary */ - NDB_SHARE *share= get_share(key, 0, FALSE); - if (share) - { - DBUG_PRINT("NDB_SHARE", ("%s temporary use_count: %u", - share->key, share->use_count)); - } - if (!share || get_ndb_share_state(share) != NSS_ALTERED) - { - discover= 1; - sql_print_information("NDB: mismatch in frm for %s.%s, discovering...", - elmt.database, elmt.name); - } - if (share) - { - /* ndb_share reference temporary free */ - DBUG_PRINT("NDB_SHARE", ("%s temporary free use_count: %u", - share->key, share->use_count)); - free_share(&share); - } - } - my_free(data); - my_free(pack_data); - - if (discover) - { - /* ToDo 4.1 database needs to be created if missing */ - if (ndb_create_table_from_engine(thd, elmt.database, elmt.name)) - { - /* ToDo 4.1 handle error */ - } - } -#ifdef HAVE_NDB_BINLOG - else - { - /* set up replication for this table */ - ndbcluster_create_binlog_setup(ndb, key, end-key, - elmt.database, elmt.name, - TRUE); - } -#endif - } - } - while (unhandled && retries); - - DBUG_RETURN(-(skipped + unhandled)); -} - -int ndbcluster_find_files(handlerton *hton, THD *thd, - const char *db, - const char *path, - const char *wild, bool dir, List<LEX_STRING> *files) -{ - DBUG_ENTER("ndbcluster_find_files"); - DBUG_PRINT("enter", ("db: %s", db)); - { // extra bracket to avoid gcc 2.95.3 warning - uint i; - Ndb* ndb; - char name[FN_REFLEN + 1]; - HASH ndb_tables, ok_tables; - NDBDICT::List list; - - if (!(ndb= check_ndb_in_thd(thd))) - DBUG_RETURN(HA_ERR_NO_CONNECTION); - - if (dir) - DBUG_RETURN(0); // Discover of databases not yet supported - - // List tables in NDB - NDBDICT *dict= ndb->getDictionary(); - if (dict->listObjects(list, - NdbDictionary::Object::UserTable) != 0) - ERR_RETURN(dict->getNdbError()); - - if (my_hash_init(&ndb_tables, system_charset_info,list.count,0,0, - (my_hash_get_key)tables_get_key,0,0)) - { - DBUG_PRINT("error", ("Failed to init HASH ndb_tables")); - DBUG_RETURN(-1); - } - - if (my_hash_init(&ok_tables, system_charset_info,32,0,0, - (my_hash_get_key)tables_get_key,0,0)) - { - DBUG_PRINT("error", ("Failed to init HASH ok_tables")); - my_hash_free(&ndb_tables); - DBUG_RETURN(-1); - } - - for (i= 0 ; i < list.count ; i++) - { - NDBDICT::List::Element& elmt= list.elements[i]; - if (IS_TMP_PREFIX(elmt.name) || IS_NDB_BLOB_PREFIX(elmt.name)) - { - DBUG_PRINT("info", ("Skipping %s.%s in NDB", elmt.database, elmt.name)); - continue; - } - DBUG_PRINT("info", ("Found %s/%s in NDB", elmt.database, elmt.name)); - - // Add only tables that belongs to db - if (my_strcasecmp(system_charset_info, elmt.database, db)) - continue; - - // Apply wildcard to list of tables in NDB - if (wild) - { - if (lower_case_table_names) - { - if (wild_case_compare(files_charset_info, elmt.name, wild)) - continue; - } - else if (wild_compare(elmt.name,wild,0)) - continue; - } - DBUG_PRINT("info", ("Inserting %s into ndb_tables hash", elmt.name)); - my_hash_insert(&ndb_tables, (uchar*)thd->strdup(elmt.name)); - } - - LEX_STRING *file_name; - List_iterator<LEX_STRING> it(*files); - List<char> delete_list; - char *file_name_str; - while ((file_name=it++)) - { - bool file_on_disk= FALSE; - DBUG_PRINT("info", ("%s", file_name->str)); - if (my_hash_search(&ndb_tables, (uchar*) file_name->str, - file_name->length)) - { - build_table_filename(name, sizeof(name) - 1, db, - file_name->str, reg_ext, 0); - if (my_access(name, F_OK)) - { - DBUG_PRINT("info", ("Table %s listed and need discovery", - file_name->str)); - if (ndb_create_table_from_engine(thd, db, file_name->str)) - { - push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN, - ER_TABLE_EXISTS_ERROR, - "Discover of table %s.%s failed", - db, file_name->str); - continue; - } - } - DBUG_PRINT("info", ("%s existed in NDB _and_ on disk ", file_name->str)); - file_on_disk= TRUE; - } - - // Check for .ndb file with this name - build_table_filename(name, sizeof(name) - 1, db, - file_name->str, ha_ndb_ext, 0); - DBUG_PRINT("info", ("Check access for %s", name)); - if (my_access(name, F_OK)) - { - DBUG_PRINT("info", ("%s did not exist on disk", name)); - // .ndb file did not exist on disk, another table type - if (file_on_disk) - { - // Ignore this ndb table - uchar *record= my_hash_search(&ndb_tables, (uchar*) file_name->str, - file_name->length); - DBUG_ASSERT(record); - my_hash_delete(&ndb_tables, record); - push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN, - ER_TABLE_EXISTS_ERROR, - "Local table %s.%s shadows ndb table", - db, file_name->str); - } - continue; - } - if (file_on_disk) - { - // File existed in NDB and as frm file, put in ok_tables list - my_hash_insert(&ok_tables, (uchar*) file_name->str); - continue; - } - DBUG_PRINT("info", ("%s existed on disk", name)); - // The .ndb file exists on disk, but it's not in list of tables in ndb - // Verify that handler agrees table is gone. - if (ndbcluster_table_exists_in_engine(hton, thd, db, file_name->str) == - HA_ERR_NO_SUCH_TABLE) - { - DBUG_PRINT("info", ("NDB says %s does not exists", file_name->str)); - it.remove(); - // Put in list of tables to remove from disk - delete_list.push_back(thd->strdup(file_name->str)); - } - } - -#ifdef HAVE_NDB_BINLOG - /* setup logging to binlog for all discovered tables */ - { - char *end, *end1= name + - build_table_filename(name, sizeof(name) - 1, db, "", "", 0); - for (i= 0; i < ok_tables.records; i++) - { - file_name_str= (char*)my_hash_element(&ok_tables, i); - end= end1 + - tablename_to_filename(file_name_str, end1, sizeof(name) - (end1 - name)); - ndbcluster_create_binlog_setup(ndb, name, end-name, - db, file_name_str, TRUE); - } - } -#endif - - // Check for new files to discover - DBUG_PRINT("info", ("Checking for new files to discover")); - List<char> create_list; - for (i= 0 ; i < ndb_tables.records ; i++) - { - file_name_str= (char*) my_hash_element(&ndb_tables, i); - if (!my_hash_search(&ok_tables, (uchar*) file_name_str, - strlen(file_name_str))) - { - build_table_filename(name, sizeof(name) - 1, - db, file_name_str, reg_ext, 0); - if (my_access(name, F_OK)) - { - DBUG_PRINT("info", ("%s must be discovered", file_name_str)); - // File is in list of ndb tables and not in ok_tables - // This table need to be created - create_list.push_back(thd->strdup(file_name_str)); - } - } - } - - /* - Delete old files. - - ndbcluster_find_files() may be called from I_S code and ndbcluster_binlog - thread in situations when some tables are already open. This means that - code below will try to obtain exclusive metadata lock on some table - while holding shared meta-data lock on other tables. This might lead to a - deadlock but such a deadlock should be detected by MDL deadlock detector. - - XXX: the scenario described above is not covered with any test. - */ - List_iterator_fast<char> it3(delete_list); - while ((file_name_str= it3++)) - { - DBUG_PRINT("info", ("Remove table %s/%s", db, file_name_str)); - /* Delete the table and all related files. */ - TABLE_LIST table_list; - table_list.init_one_table(db, strlen(db), file_name_str, - strlen(file_name_str), file_name_str, - TL_WRITE); - table_list.mdl_request.set_type(MDL_EXCLUSIVE); - (void)mysql_rm_table_part2(thd, &table_list, - FALSE, /* if_exists */ - FALSE, /* drop_temporary */ - FALSE, /* drop_view */ - TRUE /* dont_log_query*/); - trans_commit_implicit(thd); /* Safety, should be unnecessary. */ - thd->mdl_context.release_transactional_locks(); - /* Clear error message that is returned when table is deleted */ - thd->clear_error(); - } - - /* Lock mutex before creating .FRM files. */ - /* Create new files. */ - List_iterator_fast<char> it2(create_list); - while ((file_name_str=it2++)) - { - DBUG_PRINT("info", ("Table %s need discovery", file_name_str)); - if (ndb_create_table_from_engine(thd, db, file_name_str) == 0) - { - LEX_STRING *tmp_file_name= 0; - tmp_file_name= thd->make_lex_string(tmp_file_name, file_name_str, - strlen(file_name_str), TRUE); - files->push_back(tmp_file_name); - } - } - - my_hash_free(&ok_tables); - my_hash_free(&ndb_tables); - - // Delete schema file from files - if (!strcmp(db, NDB_REP_DB)) - { - uint count = 0; - while (count++ < files->elements) - { - file_name = (LEX_STRING *)files->pop(); - if (!strcmp(file_name->str, NDB_SCHEMA_TABLE)) - { - DBUG_PRINT("info", ("skip %s.%s table, it should be hidden to user", - NDB_REP_DB, NDB_SCHEMA_TABLE)); - continue; - } - files->push_back(file_name); - } - } - } // extra bracket to avoid gcc 2.95.3 warning - DBUG_RETURN(0); -} - - -/* - Initialise all gloal variables before creating - a NDB Cluster table handler - */ - -/* Call back after cluster connect */ -static int connect_callback() -{ - mysql_mutex_lock(&LOCK_ndb_util_thread); - update_status_variables(g_ndb_cluster_connection); - - uint node_id, i= 0; - Ndb_cluster_connection_node_iter node_iter; - memset((void *)g_node_id_map, 0xFFFF, sizeof(g_node_id_map)); - while ((node_id= g_ndb_cluster_connection->get_next_node(node_iter))) - g_node_id_map[node_id]= i++; - - mysql_cond_signal(&COND_ndb_util_thread); - mysql_mutex_unlock(&LOCK_ndb_util_thread); - return 0; -} - -extern int ndb_dictionary_is_mysqld; - -#ifdef HAVE_PSI_INTERFACE - -#ifdef HAVE_NDB_BINLOG -PSI_mutex_key key_injector_mutex, key_ndb_schema_share_mutex, - key_ndb_schema_object_mutex; -#endif /* HAVE_NDB_BINLOG */ - -PSI_mutex_key key_NDB_SHARE_mutex, key_ndbcluster_mutex, - key_LOCK_ndb_util_thread; - -static PSI_mutex_info all_ndbcluster_mutexes[]= -{ -#ifdef HAVE_NDB_BINLOG - {& key_injector_mutex, "injector_mutex", PSI_FLAG_GLOBAL}, - {& key_ndb_schema_share_mutex, "ndb_schema_share_mutex", PSI_FLAG_GLOBAL}, - {& key_ndb_schema_object_mutex, "ndb_schema_object_mutex", PSI_FLAG_GLOBAL}, -#endif /* HAVE_NDB_BINLOG */ - {& key_NDB_SHARE_mutex, "NDB_SHARE::mutex", PSI_FLAG_GLOBAL}, - {& key_ndbcluster_mutex, "ndbcluster_mutex", PSI_FLAG_GLOBAL}, - {& key_LOCK_ndb_util_thread, "LOCK_ndb_util_thread", PSI_FLAG_GLOBAL} -}; - -#ifdef HAVE_NDB_BINLOG -PSI_cond_key key_injector_cond; -#endif /* HAVE_NDB_BINLOG */ - -PSI_cond_key key_COND_ndb_util_thread, key_COND_ndb_util_ready; - -static PSI_cond_info all_ndbcluster_conds[]= -{ -#ifdef HAVE_NDB_BINLOG - {& key_injector_cond, "injector_cond", PSI_FLAG_GLOBAL}, -#endif /* HAVE_NDB_BINLOG */ - {& key_COND_ndb_util_thread, "COND_ndb_util_thread", PSI_FLAG_GLOBAL}, - {& key_COND_ndb_util_ready, "COND_ndb_util_ready", PSI_FLAG_GLOBAL} -}; - -#ifdef HAVE_NDB_BINLOG -PSI_thread_key key_thread_ndb_binlog; -#endif /* HAVE_NDB_BINLOG */ -PSI_thread_key key_thread_ndb_util; - -static PSI_thread_info all_ndbcluster_threads[]= -{ -#ifdef HAVE_NDB_BINLOG - { &key_thread_ndb_binlog, "ndb_binlog", PSI_FLAG_GLOBAL}, -#endif /* HAVE_NDB_BINLOG */ - { &key_thread_ndb_util, "ndb_util", PSI_FLAG_GLOBAL} -}; - -PSI_file_key key_file_ndb; - -static PSI_file_info all_ndbcluster_files[]= -{ - { &key_file_ndb, "ndb", 0} -}; - -void init_ndbcluster_psi_keys() -{ - const char* category= "ndbcluster"; - int count; - - if (PSI_server == NULL) - return; - - count= array_elements(all_ndbcluster_mutexes); - PSI_server->register_mutex(category, all_ndbcluster_mutexes, count); - - count= array_elements(all_ndbcluster_conds); - PSI_server->register_cond(category, all_ndbcluster_conds, count); - - count= array_elements(all_ndbcluster_threads); - PSI_server->register_thread(category, all_ndbcluster_threads, count); - - count= array_elements(all_ndbcluster_files); - PSI_server->register_file(category, all_ndbcluster_files, count); -} -#endif /* HAVE_PSI_INTERFACE */ - -static int ndbcluster_init(void *p) -{ - int res; - DBUG_ENTER("ndbcluster_init"); - - if (ndbcluster_inited) - DBUG_RETURN(FALSE); - -#ifdef HAVE_PSI_INTERFACE - init_ndbcluster_psi_keys(); -#endif - - mysql_mutex_init(key_ndbcluster_mutex, - &ndbcluster_mutex, MY_MUTEX_INIT_FAST); - mysql_mutex_init(key_LOCK_ndb_util_thread, - &LOCK_ndb_util_thread, MY_MUTEX_INIT_FAST); - mysql_cond_init(key_COND_ndb_util_thread, &COND_ndb_util_thread, NULL); - mysql_cond_init(key_COND_ndb_util_ready, &COND_ndb_util_ready, NULL); - ndb_util_thread_running= -1; - ndbcluster_terminating= 0; - ndb_dictionary_is_mysqld= 1; - ndbcluster_hton= (handlerton *)p; - - { - handlerton *h= ndbcluster_hton; - h->state= SHOW_OPTION_YES; - h->db_type= DB_TYPE_NDBCLUSTER; - h->close_connection= ndbcluster_close_connection; - h->commit= ndbcluster_commit; - h->rollback= ndbcluster_rollback; - h->create= ndbcluster_create_handler; /* Create a new handler */ - h->drop_database= ndbcluster_drop_database; /* Drop a database */ - h->panic= ndbcluster_end; /* Panic call */ - h->show_status= ndbcluster_show_status; /* Show status */ - h->alter_tablespace= ndbcluster_alter_tablespace; /* Show status */ - h->partition_flags= ndbcluster_partition_flags; /* Partition flags */ - h->alter_table_flags=ndbcluster_alter_table_flags; /* Alter table flags */ - h->fill_is_table= ndbcluster_fill_is_table; -#ifdef HAVE_NDB_BINLOG - ndbcluster_binlog_init_handlerton(); -#endif - h->flags= HTON_CAN_RECREATE | HTON_TEMPORARY_NOT_SUPPORTED; - h->discover= ndbcluster_discover; - h->find_files= ndbcluster_find_files; - h->table_exists_in_engine= ndbcluster_table_exists_in_engine; - } - - // Format the connect string to be used for connecting to the cluster - int pos= 0; - char connectstring_buf[1024] = {0}; - if (opt_ndb_nodeid != 0) - pos+= my_snprintf(connectstring_buf, sizeof(connectstring_buf), - "nodeid=%u", opt_ndb_nodeid); - if (opt_ndb_mgmd_host) - pos+= my_snprintf(connectstring_buf+pos, sizeof(connectstring_buf)-pos, - "%s%s", pos ? "," : "", opt_ndb_mgmd_host); - if (opt_ndb_connectstring) - pos+= my_snprintf(connectstring_buf+pos, sizeof(connectstring_buf)-pos, - "%s%s", pos ? "," : "", opt_ndb_connectstring); - - - // Initialize ndb interface - ndb_init_internal(); - - // Set connectstring if specified - if (opt_ndb_connectstring != 0) - DBUG_PRINT("connectstring", ("%s", opt_ndb_connectstring)); - if ((g_ndb_cluster_connection= - new Ndb_cluster_connection(opt_ndb_connectstring)) == 0) - { - DBUG_PRINT("error",("Ndb_cluster_connection(%s)", - opt_ndb_connectstring)); - my_errno= HA_ERR_OUT_OF_MEM; - goto ndbcluster_init_error; - } - { - char buf[128]; - my_snprintf(buf, sizeof(buf), "mysqld --server-id=%lu", server_id); - g_ndb_cluster_connection->set_name(buf); - } - g_ndb_cluster_connection->set_optimized_node_selection - (THDVAR(0, optimized_node_selection)); - - // Create a Ndb object to open the connection to NDB - if ( (g_ndb= new Ndb(g_ndb_cluster_connection, "sys")) == 0 ) - { - DBUG_PRINT("error", ("failed to create global ndb object")); - my_errno= HA_ERR_OUT_OF_MEM; - goto ndbcluster_init_error; - } - if (g_ndb->init() != 0) - { - ERR_PRINT (g_ndb->getNdbError()); - goto ndbcluster_init_error; - } - - if ((res= g_ndb_cluster_connection->connect(0,0,0)) == 0) - { - connect_callback(); - DBUG_PRINT("info",("NDBCLUSTER storage engine at %s on port %d", - g_ndb_cluster_connection->get_connected_host(), - g_ndb_cluster_connection->get_connected_port())); - g_ndb_cluster_connection->wait_until_ready(10,3); - } - else if (res == 1) - { - if (g_ndb_cluster_connection->start_connect_thread(connect_callback)) - { - DBUG_PRINT("error", ("g_ndb_cluster_connection->start_connect_thread()")); - goto ndbcluster_init_error; - } -#ifndef DBUG_OFF - { - char buf[1024]; - DBUG_PRINT("info", - ("NDBCLUSTER storage engine not started, " - "will connect using %s", - g_ndb_cluster_connection-> - get_connectstring(buf,sizeof(buf)))); - } -#endif - } - else - { - DBUG_ASSERT(res == -1); - DBUG_PRINT("error", ("permanent error")); - goto ndbcluster_init_error; - } - - (void) my_hash_init(&ndbcluster_open_tables,system_charset_info,32,0,0, - (my_hash_get_key) ndbcluster_get_key,0,0); -#ifdef HAVE_NDB_BINLOG - /* start the ndb injector thread */ - if (ndbcluster_binlog_start()) - goto ndbcluster_init_error; -#endif /* HAVE_NDB_BINLOG */ - - // Create utility thread - pthread_t tmp; - if (mysql_thread_create(key_thread_ndb_util, - &tmp, &connection_attrib, ndb_util_thread_func, 0)) - { - DBUG_PRINT("error", ("Could not create ndb utility thread")); - my_hash_free(&ndbcluster_open_tables); - mysql_mutex_destroy(&ndbcluster_mutex); - mysql_mutex_destroy(&LOCK_ndb_util_thread); - mysql_cond_destroy(&COND_ndb_util_thread); - mysql_cond_destroy(&COND_ndb_util_ready); - goto ndbcluster_init_error; - } - - /* Wait for the util thread to start */ - mysql_mutex_lock(&LOCK_ndb_util_thread); - while (ndb_util_thread_running < 0) - mysql_cond_wait(&COND_ndb_util_ready, &LOCK_ndb_util_thread); - mysql_mutex_unlock(&LOCK_ndb_util_thread); - - if (!ndb_util_thread_running) - { - DBUG_PRINT("error", ("ndb utility thread exited prematurely")); - my_hash_free(&ndbcluster_open_tables); - mysql_mutex_destroy(&ndbcluster_mutex); - mysql_mutex_destroy(&LOCK_ndb_util_thread); - mysql_cond_destroy(&COND_ndb_util_thread); - mysql_cond_destroy(&COND_ndb_util_ready); - goto ndbcluster_init_error; - } - - ndbcluster_inited= 1; - DBUG_RETURN(FALSE); - -ndbcluster_init_error: - if (g_ndb) - delete g_ndb; - g_ndb= NULL; - if (g_ndb_cluster_connection) - delete g_ndb_cluster_connection; - g_ndb_cluster_connection= NULL; - ndbcluster_hton->state= SHOW_OPTION_DISABLED; // If we couldn't use handler - - DBUG_RETURN(TRUE); -} - -/** - Used to fill in INFORMATION_SCHEMA* tables. - - @param hton handle to the handlerton structure - @param thd the thread/connection descriptor - @param[in,out] tables the information schema table that is filled up - @param cond used for conditional pushdown to storage engine - @param schema_table_idx the table id that distinguishes the type of table - - @return Operation status - */ -static int ndbcluster_fill_is_table(handlerton *hton, - THD *thd, - TABLE_LIST *tables, - COND *cond, - enum enum_schema_tables schema_table_idx) -{ - int ret= 0; - - if (schema_table_idx == SCH_FILES) - { - ret= ndbcluster_fill_files_table(hton, thd, tables, cond); - } - - return ret; -} - - -static int ndbcluster_end(handlerton *hton, ha_panic_function type) -{ - DBUG_ENTER("ndbcluster_end"); - - if (!ndbcluster_inited) - DBUG_RETURN(0); - ndbcluster_inited= 0; - - /* wait for util thread to finish */ - sql_print_information("Stopping Cluster Utility thread"); - mysql_mutex_lock(&LOCK_ndb_util_thread); - ndbcluster_terminating= 1; - mysql_cond_signal(&COND_ndb_util_thread); - while (ndb_util_thread_running > 0) - mysql_cond_wait(&COND_ndb_util_ready, &LOCK_ndb_util_thread); - mysql_mutex_unlock(&LOCK_ndb_util_thread); - - -#ifdef HAVE_NDB_BINLOG - { - mysql_mutex_lock(&ndbcluster_mutex); - while (ndbcluster_open_tables.records) - { - NDB_SHARE *share= - (NDB_SHARE*) my_hash_element(&ndbcluster_open_tables, 0); -#ifndef DBUG_OFF - fprintf(stderr, "NDB: table share %s with use_count %d not freed\n", - share->key, share->use_count); -#endif - ndbcluster_real_free_share(&share); - } - mysql_mutex_unlock(&ndbcluster_mutex); - } -#endif - my_hash_free(&ndbcluster_open_tables); - - if (g_ndb) - { -#ifndef DBUG_OFF - Ndb::Free_list_usage tmp; - tmp.m_name= 0; - while (g_ndb->get_free_list_usage(&tmp)) - { - uint leaked= (uint) tmp.m_created - tmp.m_free; - if (leaked) - fprintf(stderr, "NDB: Found %u %s%s that %s not been released\n", - leaked, tmp.m_name, - (leaked == 1)?"":"'s", - (leaked == 1)?"has":"have"); - } -#endif - delete g_ndb; - g_ndb= NULL; - } - delete g_ndb_cluster_connection; - g_ndb_cluster_connection= NULL; - - // cleanup ndb interface - ndb_end_internal(); - - mysql_mutex_destroy(&ndbcluster_mutex); - mysql_mutex_destroy(&LOCK_ndb_util_thread); - mysql_cond_destroy(&COND_ndb_util_thread); - mysql_cond_destroy(&COND_ndb_util_ready); - DBUG_RETURN(0); -} - -void ha_ndbcluster::print_error(int error, myf errflag) -{ - DBUG_ENTER("ha_ndbcluster::print_error"); - DBUG_PRINT("enter", ("error: %d", error)); - - if (error == HA_ERR_NO_PARTITION_FOUND) - m_part_info->print_no_partition_found(table); - else - handler::print_error(error, errflag); - DBUG_VOID_RETURN; -} - - -/** - Static error print function called from static handler method - ndbcluster_commit and ndbcluster_rollback. -*/ - -void ndbcluster_print_error(int error, const NdbOperation *error_op) -{ - DBUG_ENTER("ndbcluster_print_error"); - TABLE_SHARE share; - const char *tab_name= (error_op) ? error_op->getTableName() : ""; - share.db.str= (char*) ""; - share.db.length= 0; - share.table_name.str= (char *) tab_name; - share.table_name.length= strlen(tab_name); - ha_ndbcluster error_handler(ndbcluster_hton, &share); - error_handler.print_error(error, MYF(0)); - DBUG_VOID_RETURN; -} - -/** - Set a given location from full pathname to database name. -*/ - -void ha_ndbcluster::set_dbname(const char *path_name, char *dbname) -{ - char *end, *ptr, *tmp_name; - char tmp_buff[FN_REFLEN + 1]; - - tmp_name= tmp_buff; - /* Scan name from the end */ - ptr= strend(path_name)-1; - while (ptr >= path_name && *ptr != '\\' && *ptr != '/') { - ptr--; - } - ptr--; - end= ptr; - while (ptr >= path_name && *ptr != '\\' && *ptr != '/') { - ptr--; - } - uint name_len= end - ptr; - memcpy(tmp_name, ptr + 1, name_len); - tmp_name[name_len]= '\0'; -#ifdef __WIN__ - /* Put to lower case */ - - ptr= tmp_name; - - while (*ptr != '\0') { - *ptr= tolower(*ptr); - ptr++; - } -#endif - filename_to_tablename(tmp_name, dbname, sizeof(tmp_buff) - 1); -} - -/** - Set m_dbname from full pathname to table file. -*/ - -void ha_ndbcluster::set_dbname(const char *path_name) -{ - set_dbname(path_name, m_dbname); -} - -/** - Set a given location from full pathname to table file. -*/ - -void -ha_ndbcluster::set_tabname(const char *path_name, char * tabname) -{ - char *end, *ptr, *tmp_name; - char tmp_buff[FN_REFLEN + 1]; - - tmp_name= tmp_buff; - /* Scan name from the end */ - end= strend(path_name)-1; - ptr= end; - while (ptr >= path_name && *ptr != '\\' && *ptr != '/') { - ptr--; - } - uint name_len= end - ptr; - memcpy(tmp_name, ptr + 1, end - ptr); - tmp_name[name_len]= '\0'; -#ifdef __WIN__ - /* Put to lower case */ - ptr= tmp_name; - - while (*ptr != '\0') { - *ptr= tolower(*ptr); - ptr++; - } -#endif - filename_to_tablename(tmp_name, tabname, sizeof(tmp_buff) - 1); -} - -/** - Set m_tabname from full pathname to table file. -*/ - -void ha_ndbcluster::set_tabname(const char *path_name) -{ - set_tabname(path_name, m_tabname); -} - - -ha_rows -ha_ndbcluster::records_in_range(uint inx, key_range *min_key, - key_range *max_key) -{ - KEY *key_info= table->key_info + inx; - uint key_length= key_info->key_length; - NDB_INDEX_TYPE idx_type= get_index_type(inx); - - DBUG_ENTER("records_in_range"); - // Prevent partial read of hash indexes by returning HA_POS_ERROR - if ((idx_type == UNIQUE_INDEX || idx_type == PRIMARY_KEY_INDEX) && - ((min_key && min_key->length < key_length) || - (max_key && max_key->length < key_length))) - DBUG_RETURN(HA_POS_ERROR); - - // Read from hash index with full key - // This is a "const" table which returns only one record! - if ((idx_type != ORDERED_INDEX) && - ((min_key && min_key->length == key_length) || - (max_key && max_key->length == key_length))) - DBUG_RETURN(1); - - if ((idx_type == PRIMARY_KEY_ORDERED_INDEX || - idx_type == UNIQUE_ORDERED_INDEX || - idx_type == ORDERED_INDEX) && - m_index[inx].index_stat != NULL) - { - NDB_INDEX_DATA& d=m_index[inx]; - const NDBINDEX* index= d.index; - Ndb* ndb=get_ndb(); - NdbTransaction* trans=NULL; - NdbIndexScanOperation* op=NULL; - int res=0; - Uint64 rows; - - do - { - // We must provide approx table rows - Uint64 table_rows=0; - Ndb_local_table_statistics *ndb_info= m_table_info; - if (ndb_info->records != ~(ha_rows)0 && ndb_info->records != 0) - { - table_rows = ndb_info->records; - DBUG_PRINT("info", ("use info->records: %lu", (ulong) table_rows)); - } - else - { - Ndb_statistics stat; - if ((res=ndb_get_table_statistics(this, TRUE, ndb, m_table, &stat))) - break; - table_rows=stat.row_count; - DBUG_PRINT("info", ("use db row_count: %lu", (ulong) table_rows)); - if (table_rows == 0) { - // Problem if autocommit=0 -#ifdef ndb_get_table_statistics_uses_active_trans - rows=0; - break; -#endif - } - } - - // Define scan op for the range - if ((trans=m_active_trans) == NULL || - trans->commitStatus() != NdbTransaction::Started) - { - DBUG_PRINT("info", ("no active trans")); - if (! (trans=ndb->startTransaction())) - ERR_BREAK(ndb->getNdbError(), res); - } - if (! (op=trans->getNdbIndexScanOperation(index, (NDBTAB*)m_table))) - ERR_BREAK(trans->getNdbError(), res); - if ((op->readTuples(NdbOperation::LM_CommittedRead)) == -1) - ERR_BREAK(op->getNdbError(), res); - const key_range *keys[2]={ min_key, max_key }; - if ((res=set_bounds(op, inx, TRUE, keys)) != 0) - break; - - // Decide if db should be contacted - int flags=0; - if (d.index_stat_query_count < d.index_stat_cache_entries || - (d.index_stat_update_freq != 0 && - d.index_stat_query_count % d.index_stat_update_freq == 0)) - { - DBUG_PRINT("info", ("force stat from db")); - flags|=NdbIndexStat::RR_UseDb; - } - if (d.index_stat->records_in_range(index, op, table_rows, &rows, flags) == -1) - ERR_BREAK(d.index_stat->getNdbError(), res); - d.index_stat_query_count++; - } while (0); - - if (trans != m_active_trans && rows == 0) - rows = 1; - if (trans != m_active_trans && trans != NULL) - ndb->closeTransaction(trans); - if (res != 0) - DBUG_RETURN(HA_POS_ERROR); - DBUG_RETURN(rows); - } - - DBUG_RETURN(10); /* Good guess when you don't know anything */ -} - -ulonglong ha_ndbcluster::table_flags(void) const -{ - THD *thd= current_thd; - ulonglong f= m_table_flags; - if (m_ha_not_exact_count) - f= f & ~HA_STATS_RECORDS_IS_EXACT; - /* - To allow for logging of ndb tables during stmt based logging; - flag cabablity, but also turn off flag for OWN_BINLOGGING - */ - if (thd->variables.binlog_format == BINLOG_FORMAT_STMT) - f= (f | HA_BINLOG_STMT_CAPABLE) & ~HA_HAS_OWN_BINLOGGING; - return f; -} -const char * ha_ndbcluster::table_type() const -{ - return("NDBCLUSTER"); -} -uint ha_ndbcluster::max_supported_record_length() const -{ - return NDB_MAX_TUPLE_SIZE; -} -uint ha_ndbcluster::max_supported_keys() const -{ - return MAX_KEY; -} -uint ha_ndbcluster::max_supported_key_parts() const -{ - return NDB_MAX_NO_OF_ATTRIBUTES_IN_KEY; -} -uint ha_ndbcluster::max_supported_key_length() const -{ - return NDB_MAX_KEY_SIZE; -} -uint ha_ndbcluster::max_supported_key_part_length() const -{ - return NDB_MAX_KEY_SIZE; -} -bool ha_ndbcluster::low_byte_first() const -{ -#ifdef WORDS_BIGENDIAN - return FALSE; -#else - return TRUE; -#endif -} -const char* ha_ndbcluster::index_type(uint key_number) -{ - switch (get_index_type(key_number)) { - case ORDERED_INDEX: - case UNIQUE_ORDERED_INDEX: - case PRIMARY_KEY_ORDERED_INDEX: - return "BTREE"; - case UNIQUE_INDEX: - case PRIMARY_KEY_INDEX: - default: - return "HASH"; - } -} - -uint8 ha_ndbcluster::table_cache_type() -{ - DBUG_ENTER("ha_ndbcluster::table_cache_type=HA_CACHE_TBL_ASKTRANSACT"); - DBUG_RETURN(HA_CACHE_TBL_ASKTRANSACT); -} - - -/** - Retrieve the commit count for the table object. - - @param thd Thread context. - @param norm_name Normalized path to the table. - @param[out] commit_count Commit count for the table. - - @return 0 on success. - @return 1 if an error occured. -*/ - -uint ndb_get_commitcount(THD *thd, char *norm_name, - Uint64 *commit_count) -{ - char dbname[NAME_LEN + 1]; - NDB_SHARE *share; - DBUG_ENTER("ndb_get_commitcount"); - - DBUG_PRINT("enter", ("name: %s", norm_name)); - pthread_mutex_lock(&ndbcluster_mutex); - if (!(share=(NDB_SHARE*) my_hash_search(&ndbcluster_open_tables, - (const uchar*) norm_name, - strlen(norm_name)))) - { - pthread_mutex_unlock(&ndbcluster_mutex); - DBUG_PRINT("info", ("Table %s not found in ndbcluster_open_tables", - norm_name)); - DBUG_RETURN(1); - } - /* ndb_share reference temporary, free below */ - share->use_count++; - DBUG_PRINT("NDB_SHARE", ("%s temporary use_count: %u", - share->key, share->use_count)); - mysql_mutex_unlock(&ndbcluster_mutex); - - mysql_mutex_lock(&share->mutex); - if (opt_ndb_cache_check_time > 0) - { - if (share->commit_count != 0) - { - *commit_count= share->commit_count; -#ifndef DBUG_OFF - char buff[22]; -#endif - DBUG_PRINT("info", ("Getting commit_count: %s from share", - llstr(share->commit_count, buff))); - mysql_mutex_unlock(&share->mutex); - /* ndb_share reference temporary free */ - DBUG_PRINT("NDB_SHARE", ("%s temporary free use_count: %u", - share->key, share->use_count)); - free_share(&share); - DBUG_RETURN(0); - } - } - DBUG_PRINT("info", ("Get commit_count from NDB")); - Ndb *ndb; - if (!(ndb= check_ndb_in_thd(thd))) - DBUG_RETURN(1); - - ha_ndbcluster::set_dbname(norm_name, dbname); - if (ndb->setDatabaseName(dbname)) - { - ERR_RETURN(ndb->getNdbError()); - } - uint lock= share->commit_count_lock; - mysql_mutex_unlock(&share->mutex); - - struct Ndb_statistics stat; - { - char tblname[NAME_LEN + 1]; - ha_ndbcluster::set_tabname(norm_name, tblname); - Ndb_table_guard ndbtab_g(ndb->getDictionary(), tblname); - if (ndbtab_g.get_table() == 0 - || ndb_get_table_statistics(NULL, FALSE, ndb, ndbtab_g.get_table(), &stat)) - { - /* ndb_share reference temporary free */ - DBUG_PRINT("NDB_SHARE", ("%s temporary free use_count: %u", - share->key, share->use_count)); - free_share(&share); - DBUG_RETURN(1); - } - } - - mysql_mutex_lock(&share->mutex); - if (share->commit_count_lock == lock) - { -#ifndef DBUG_OFF - char buff[22]; -#endif - DBUG_PRINT("info", ("Setting commit_count to %s", - llstr(stat.commit_count, buff))); - share->commit_count= stat.commit_count; - *commit_count= stat.commit_count; - } - else - { - DBUG_PRINT("info", ("Discarding commit_count, comit_count_lock changed")); - *commit_count= 0; - } - mysql_mutex_unlock(&share->mutex); - /* ndb_share reference temporary free */ - DBUG_PRINT("NDB_SHARE", ("%s temporary free use_count: %u", - share->key, share->use_count)); - free_share(&share); - DBUG_RETURN(0); -} - - -/** - Check if a cached query can be used. - - This is done by comparing the supplied engine_data to commit_count of - the table. - - The commit_count is either retrieved from the share for the table, where - it has been cached by the util thread. If the util thread is not started, - NDB has to be contacetd to retrieve the commit_count, this will introduce - a small delay while waiting for NDB to answer. - - - @param thd thread handle - @param full_name normalized path to the table in the canonical - format. - @param full_name_len length of the normalized path to the table. - @param engine_data parameter retrieved when query was first inserted into - the cache. If the value of engine_data is changed, - all queries for this table should be invalidated. - - @retval - TRUE Yes, use the query from cache - @retval - FALSE No, don't use the cached query, and if engine_data - has changed, all queries for this table should be invalidated - -*/ - -static my_bool -ndbcluster_cache_retrieval_allowed(THD *thd, - char *full_name, uint full_name_len, - ulonglong *engine_data) -{ - Uint64 commit_count; - char dbname[NAME_LEN + 1]; - char tabname[NAME_LEN + 1]; -#ifndef DBUG_OFF - char buff[22], buff2[22]; -#endif - - ha_ndbcluster::set_dbname(full_name, dbname); - ha_ndbcluster::set_tabname(full_name, tabname); - - DBUG_ENTER("ndbcluster_cache_retrieval_allowed"); - DBUG_PRINT("enter", ("dbname: %s, tabname: %s", dbname, tabname)); - - if (thd->in_multi_stmt_transaction_mode()) - { - DBUG_PRINT("exit", ("No, don't use cache in transaction")); - DBUG_RETURN(FALSE); - } - - if (ndb_get_commitcount(thd, full_name, &commit_count)) - { - *engine_data= 0; /* invalidate */ - DBUG_PRINT("exit", ("No, could not retrieve commit_count")); - DBUG_RETURN(FALSE); - } - DBUG_PRINT("info", ("*engine_data: %s, commit_count: %s", - llstr(*engine_data, buff), llstr(commit_count, buff2))); - if (commit_count == 0) - { - *engine_data= 0; /* invalidate */ - DBUG_PRINT("exit", ("No, local commit has been performed")); - DBUG_RETURN(FALSE); - } - else if (*engine_data != commit_count) - { - *engine_data= commit_count; /* invalidate */ - DBUG_PRINT("exit", ("No, commit_count has changed")); - DBUG_RETURN(FALSE); - } - - DBUG_PRINT("exit", ("OK to use cache, engine_data: %s", - llstr(*engine_data, buff))); - DBUG_RETURN(TRUE); -} - - -/** - Register a table for use in the query cache. - - Fetch the commit_count for the table and return it in engine_data, - this will later be used to check if the table has changed, before - the cached query is reused. - - @param thd thread handle - @param full_name normalized path to the table in the - canonical format. - @param full_name_len length of the normalized path to the table. - @param engine_callback function to be called before using cache on - this table - @param[out] engine_data commit_count for this table - - @retval - TRUE Yes, it's ok to cahce this query - @retval - FALSE No, don't cach the query -*/ - -my_bool -ha_ndbcluster::register_query_cache_table(THD *thd, - char *full_name, uint full_name_len, - qc_engine_callback *engine_callback, - ulonglong *engine_data) -{ - Uint64 commit_count; -#ifndef DBUG_OFF - char buff[22]; -#endif - DBUG_ENTER("ha_ndbcluster::register_query_cache_table"); - DBUG_PRINT("enter",("dbname: %s, tabname: %s", m_dbname, m_tabname)); - - if (thd->in_multi_stmt_transaction_mode()) - { - DBUG_PRINT("exit", ("Can't register table during transaction")); - DBUG_RETURN(FALSE); - } - - if (ndb_get_commitcount(thd, full_name, &commit_count)) - { - *engine_data= 0; - DBUG_PRINT("exit", ("Error, could not get commitcount")); - DBUG_RETURN(FALSE); - } - *engine_data= commit_count; - *engine_callback= ndbcluster_cache_retrieval_allowed; - DBUG_PRINT("exit", ("commit_count: %s", llstr(commit_count, buff))); - DBUG_RETURN(commit_count > 0); -} - - -/** - Handling the shared NDB_SHARE structure that is needed to - provide table locking. - - It's also used for sharing data with other NDB handlers - in the same MySQL Server. There is currently not much - data we want to or can share. -*/ - -static uchar *ndbcluster_get_key(NDB_SHARE *share, size_t *length, - my_bool not_used __attribute__((unused))) -{ - *length= share->key_length; - return (uchar*) share->key; -} - - -#ifndef DBUG_OFF - -static void print_share(const char* where, NDB_SHARE* share) -{ - fprintf(DBUG_FILE, - "%s %s.%s: use_count: %u, commit_count: %lu\n", - where, share->db, share->table_name, share->use_count, - (ulong) share->commit_count); - fprintf(DBUG_FILE, - " - key: %s, key_length: %d\n", - share->key, share->key_length); - -#ifdef HAVE_NDB_BINLOG - if (share->table) - fprintf(DBUG_FILE, - " - share->table: %p %s.%s\n", - share->table, share->table->s->db.str, - share->table->s->table_name.str); -#endif -} - - -static void print_ndbcluster_open_tables() -{ - DBUG_LOCK_FILE; - fprintf(DBUG_FILE, ">ndbcluster_open_tables\n"); - for (uint i= 0; i < ndbcluster_open_tables.records; i++) - print_share("", - (NDB_SHARE*)my_hash_element(&ndbcluster_open_tables, i)); - fprintf(DBUG_FILE, "<ndbcluster_open_tables\n"); - DBUG_UNLOCK_FILE; -} - -#endif - - -#define dbug_print_open_tables() \ - DBUG_EXECUTE("info", \ - print_ndbcluster_open_tables();); - -#define dbug_print_share(t, s) \ - DBUG_LOCK_FILE; \ - DBUG_EXECUTE("info", \ - print_share((t), (s));); \ - DBUG_UNLOCK_FILE; - - -#ifdef HAVE_NDB_BINLOG -/* - For some reason a share is still around, try to salvage the situation - by closing all cached tables. If the share still exists, there is an - error somewhere but only report this to the error log. Keep this - "trailing share" but rename it since there are still references to it - to avoid segmentation faults. There is a risk that the memory for - this trailing share leaks. - - Must be called with previous mysql_mutex_lock(&ndbcluster_mutex) -*/ -int handle_trailing_share(NDB_SHARE *share) -{ - THD *thd= current_thd; - static ulong trailing_share_id= 0; - DBUG_ENTER("handle_trailing_share"); - - /* ndb_share reference temporary, free below */ - ++share->use_count; - DBUG_PRINT("NDB_SHARE", ("%s temporary use_count: %u", - share->key, share->use_count)); - mysql_mutex_unlock(&ndbcluster_mutex); - - TABLE_LIST table_list; - bzero((char*) &table_list,sizeof(table_list)); - table_list.db= share->db; - table_list.alias= table_list.table_name= share->table_name; - close_cached_tables(thd, &table_list, FALSE, LONG_TIMEOUT); - - mysql_mutex_lock(&ndbcluster_mutex); - /* ndb_share reference temporary free */ - DBUG_PRINT("NDB_SHARE", ("%s temporary free use_count: %u", - share->key, share->use_count)); - if (!--share->use_count) - { - if (opt_ndb_extra_logging) - sql_print_information("NDB_SHARE: trailing share " - "%s(connect_count: %u) " - "released by close_cached_tables at " - "connect_count: %u", - share->key, - share->connect_count, - g_ndb_cluster_connection->get_connect_count()); - ndbcluster_real_free_share(&share); - DBUG_RETURN(0); - } - - /* - share still exists, if share has not been dropped by server - release that share - */ - if (share->state != NSS_DROPPED) - { - share->state= NSS_DROPPED; - /* ndb_share reference create free */ - DBUG_PRINT("NDB_SHARE", ("%s create free use_count: %u", - share->key, share->use_count)); - --share->use_count; - - if (share->use_count == 0) - { - if (opt_ndb_extra_logging) - sql_print_information("NDB_SHARE: trailing share " - "%s(connect_count: %u) " - "released after NSS_DROPPED check " - "at connect_count: %u", - share->key, - share->connect_count, - g_ndb_cluster_connection->get_connect_count()); - ndbcluster_real_free_share(&share); - DBUG_RETURN(0); - } - } - - sql_print_warning("NDB_SHARE: %s already exists use_count=%d." - " Moving away for safety, but possible memleak.", - share->key, share->use_count); - dbug_print_open_tables(); - - /* - Ndb share has not been released as it should - */ -#ifdef NOT_YET - DBUG_ASSERT(FALSE); -#endif - - /* - This is probably an error. We can however save the situation - at the cost of a possible mem leak, by "renaming" the share - - First remove from hash - */ - my_hash_delete(&ndbcluster_open_tables, (uchar*) share); - - /* - now give it a new name, just a running number - if space is not enough allocate some more - */ - { - const uint min_key_length= 10; - if (share->key_length < min_key_length) - { - share->key= (char*) alloc_root(&share->mem_root, min_key_length + 1); - share->key_length= min_key_length; - } - share->key_length= - my_snprintf(share->key, min_key_length + 1, "#leak%lu", - trailing_share_id++); - } - /* Keep it for possible the future trailing free */ - my_hash_insert(&ndbcluster_open_tables, (uchar*) share); - - DBUG_RETURN(0); -} - -/* - Rename share is used during rename table. -*/ -static int rename_share(NDB_SHARE *share, const char *new_key) -{ - NDB_SHARE *tmp; - mysql_mutex_lock(&ndbcluster_mutex); - uint new_length= (uint) strlen(new_key); - DBUG_PRINT("rename_share", ("old_key: %s old__length: %d", - share->key, share->key_length)); - if ((tmp= (NDB_SHARE*) my_hash_search(&ndbcluster_open_tables, - (uchar*) new_key, new_length))) - handle_trailing_share(tmp); - - /* remove the share from hash */ - my_hash_delete(&ndbcluster_open_tables, (uchar*) share); - dbug_print_open_tables(); - - /* save old stuff if insert should fail */ - uint old_length= share->key_length; - char *old_key= share->key; - - /* - now allocate and set the new key, db etc - enough space for key, db, and table_name - */ - share->key= (char*) alloc_root(&share->mem_root, 2 * (new_length + 1)); - strmov(share->key, new_key); - share->key_length= new_length; - - if (my_hash_insert(&ndbcluster_open_tables, (uchar*) share)) - { - // ToDo free the allocated stuff above? - DBUG_PRINT("error", ("rename_share: my_hash_insert %s failed", - share->key)); - share->key= old_key; - share->key_length= old_length; - if (my_hash_insert(&ndbcluster_open_tables, (uchar*) share)) - { - sql_print_error("rename_share: failed to recover %s", share->key); - DBUG_PRINT("error", ("rename_share: my_hash_insert %s failed", - share->key)); - } - dbug_print_open_tables(); - mysql_mutex_unlock(&ndbcluster_mutex); - return -1; - } - dbug_print_open_tables(); - - share->db= share->key + new_length + 1; - ha_ndbcluster::set_dbname(new_key, share->db); - share->table_name= share->db + strlen(share->db) + 1; - ha_ndbcluster::set_tabname(new_key, share->table_name); - - dbug_print_share("rename_share:", share); - if (share->table) - { - if (share->op == 0) - { - share->table->s->db.str= share->db; - share->table->s->db.length= strlen(share->db); - share->table->s->table_name.str= share->table_name; - share->table->s->table_name.length= strlen(share->table_name); - } - } - /* else rename will be handled when the ALTER event comes */ - share->old_names= old_key; - // ToDo free old_names after ALTER EVENT - - mysql_mutex_unlock(&ndbcluster_mutex); - return 0; -} -#endif - -/* - Increase refcount on existing share. - Always returns share and cannot fail. -*/ -NDB_SHARE *ndbcluster_get_share(NDB_SHARE *share) -{ - mysql_mutex_lock(&ndbcluster_mutex); - share->use_count++; - - dbug_print_open_tables(); - dbug_print_share("ndbcluster_get_share:", share); - mysql_mutex_unlock(&ndbcluster_mutex); - return share; -} - - -/* - Get a share object for key - - Returns share for key, and increases the refcount on the share. - - create_if_not_exists == TRUE: - creates share if it does not alreade exist - returns 0 only due to out of memory, and then sets my_error - - create_if_not_exists == FALSE: - returns 0 if share does not exist - - have_lock == TRUE, mysql_mutex_lock(&ndbcluster_mutex) already taken -*/ - -NDB_SHARE *ndbcluster_get_share(const char *key, TABLE *table, - bool create_if_not_exists, - bool have_lock) -{ - NDB_SHARE *share; - uint length= (uint) strlen(key); - DBUG_ENTER("ndbcluster_get_share"); - DBUG_PRINT("enter", ("key: '%s'", key)); - - if (!have_lock) - mysql_mutex_lock(&ndbcluster_mutex); - if (!(share= (NDB_SHARE*) my_hash_search(&ndbcluster_open_tables, - (uchar*) key, - length))) - { - if (!create_if_not_exists) - { - DBUG_PRINT("error", ("get_share: %s does not exist", key)); - if (!have_lock) - mysql_mutex_unlock(&ndbcluster_mutex); - DBUG_RETURN(0); - } - if ((share= (NDB_SHARE*) my_malloc(sizeof(*share), - MYF(MY_WME | MY_ZEROFILL)))) - { - MEM_ROOT **root_ptr= - my_pthread_getspecific_ptr(MEM_ROOT**, THR_MALLOC); - MEM_ROOT *old_root= *root_ptr; - init_sql_alloc(&share->mem_root, 1024, 0, MYF(0)); - *root_ptr= &share->mem_root; // remember to reset before return - share->state= NSS_INITIAL; - /* enough space for key, db, and table_name */ - share->key= (char*) alloc_root(*root_ptr, 2 * (length + 1)); - share->key_length= length; - strmov(share->key, key); - if (my_hash_insert(&ndbcluster_open_tables, (uchar*) share)) - { - free_root(&share->mem_root, MYF(0)); - my_free(share); - *root_ptr= old_root; - if (!have_lock) - mysql_mutex_unlock(&ndbcluster_mutex); - DBUG_RETURN(0); - } - thr_lock_init(&share->lock); - mysql_mutex_init(key_NDB_SHARE_mutex, &share->mutex, MY_MUTEX_INIT_FAST); - share->commit_count= 0; - share->commit_count_lock= 0; - share->db= share->key + length + 1; - ha_ndbcluster::set_dbname(key, share->db); - share->table_name= share->db + strlen(share->db) + 1; - ha_ndbcluster::set_tabname(key, share->table_name); -#ifdef HAVE_NDB_BINLOG - if (ndbcluster_binlog_init_share(share, table)) - { - DBUG_PRINT("error", ("get_share: %s could not init share", key)); - ndbcluster_real_free_share(&share); - *root_ptr= old_root; - if (!have_lock) - mysql_mutex_unlock(&ndbcluster_mutex); - DBUG_RETURN(0); - } -#endif - *root_ptr= old_root; - } - else - { - DBUG_PRINT("error", ("get_share: failed to alloc share")); - if (!have_lock) - mysql_mutex_unlock(&ndbcluster_mutex); - my_error(ER_OUTOFMEMORY, MYF(0), static_cast<int>(sizeof(*share))); - DBUG_RETURN(0); - } - } - share->use_count++; - - dbug_print_open_tables(); - dbug_print_share("ndbcluster_get_share:", share); - if (!have_lock) - mysql_mutex_unlock(&ndbcluster_mutex); - DBUG_RETURN(share); -} - - -void ndbcluster_real_free_share(NDB_SHARE **share) -{ - DBUG_ENTER("ndbcluster_real_free_share"); - dbug_print_share("ndbcluster_real_free_share:", *share); - - my_hash_delete(&ndbcluster_open_tables, (uchar*) *share); - thr_lock_delete(&(*share)->lock); - mysql_mutex_destroy(&(*share)->mutex); - -#ifdef HAVE_NDB_BINLOG - if ((*share)->table) - { - // (*share)->table->mem_root is freed by closefrm - closefrm((*share)->table, 0); - // (*share)->table_share->mem_root is freed by free_table_share - free_table_share((*share)->table_share); -#ifndef DBUG_OFF - bzero((uchar*)(*share)->table_share, sizeof(*(*share)->table_share)); - bzero((uchar*)(*share)->table, sizeof(*(*share)->table)); - (*share)->table_share= 0; - (*share)->table= 0; -#endif - } -#endif - free_root(&(*share)->mem_root, MYF(0)); - my_free(*share); - *share= 0; - - dbug_print_open_tables(); - DBUG_VOID_RETURN; -} - - -void ndbcluster_free_share(NDB_SHARE **share, bool have_lock) -{ - if (!have_lock) - mysql_mutex_lock(&ndbcluster_mutex); - if ((*share)->util_lock == current_thd) - (*share)->util_lock= 0; - if (!--(*share)->use_count) - { - ndbcluster_real_free_share(share); - } - else - { - dbug_print_open_tables(); - dbug_print_share("ndbcluster_free_share:", *share); - } - if (!have_lock) - mysql_mutex_unlock(&ndbcluster_mutex); -} - - -static -int -ndb_get_table_statistics(ha_ndbcluster* file, bool report_error, Ndb* ndb, const NDBTAB *ndbtab, - struct Ndb_statistics * ndbstat) -{ - NdbTransaction* pTrans; - NdbError error; - int retries= 10; - int reterr= 0; - int retry_sleep= 30; /* 30 milliseconds, transaction */ -#ifndef DBUG_OFF - char buff[22], buff2[22], buff3[22], buff4[22]; -#endif - DBUG_ENTER("ndb_get_table_statistics"); - DBUG_PRINT("enter", ("table: %s", ndbtab->getName())); - - DBUG_ASSERT(ndbtab != 0); - - do - { - Uint64 rows, commits, fixed_mem, var_mem; - Uint32 size; - Uint32 count= 0; - Uint64 sum_rows= 0; - Uint64 sum_commits= 0; - Uint64 sum_row_size= 0; - Uint64 sum_mem= 0; - NdbScanOperation*pOp; - int check; - - if ((pTrans= ndb->startTransaction()) == NULL) - { - error= ndb->getNdbError(); - goto retry; - } - - if ((pOp= pTrans->getNdbScanOperation(ndbtab)) == NULL) - { - error= pTrans->getNdbError(); - goto retry; - } - - if (pOp->readTuples(NdbOperation::LM_CommittedRead)) - { - error= pOp->getNdbError(); - goto retry; - } - - if (pOp->interpret_exit_last_row() == -1) - { - error= pOp->getNdbError(); - goto retry; - } - - pOp->getValue(NdbDictionary::Column::ROW_COUNT, (char*)&rows); - pOp->getValue(NdbDictionary::Column::COMMIT_COUNT, (char*)&commits); - pOp->getValue(NdbDictionary::Column::ROW_SIZE, (char*)&size); - pOp->getValue(NdbDictionary::Column::FRAGMENT_FIXED_MEMORY, - (char*)&fixed_mem); - pOp->getValue(NdbDictionary::Column::FRAGMENT_VARSIZED_MEMORY, - (char*)&var_mem); - - if (pTrans->execute(NdbTransaction::NoCommit, - NdbOperation::AbortOnError, - TRUE) == -1) - { - error= pTrans->getNdbError(); - goto retry; - } - - while ((check= pOp->nextResult(TRUE, TRUE)) == 0) - { - sum_rows+= rows; - sum_commits+= commits; - if (sum_row_size < size) - sum_row_size= size; - sum_mem+= fixed_mem + var_mem; - count++; - } - - if (check == -1) - { - error= pOp->getNdbError(); - goto retry; - } - - pOp->close(TRUE); - - ndb->closeTransaction(pTrans); - - ndbstat->row_count= sum_rows; - ndbstat->commit_count= sum_commits; - ndbstat->row_size= sum_row_size; - ndbstat->fragment_memory= sum_mem; - - DBUG_PRINT("exit", ("records: %s commits: %s " - "row_size: %s mem: %s count: %u", - llstr(sum_rows, buff), - llstr(sum_commits, buff2), - llstr(sum_row_size, buff3), - llstr(sum_mem, buff4), - count)); - - DBUG_RETURN(0); -retry: - if(report_error) - { - if (file && pTrans) - { - reterr= file->ndb_err(pTrans); - } - else - { - const NdbError& tmp= error; - ERR_PRINT(tmp); - reterr= ndb_to_mysql_error(&tmp); - } - } - else - reterr= error.code; - - if (pTrans) - { - ndb->closeTransaction(pTrans); - pTrans= NULL; - } - if (error.status == NdbError::TemporaryError && retries--) - { - my_sleep(retry_sleep); - continue; - } - set_ndb_err(current_thd, error); - break; - } while(1); - DBUG_PRINT("exit", ("failed, reterr: %u, NdbError %u(%s)", reterr, - error.code, error.message)); - DBUG_RETURN(reterr); -} - -/** - Create a .ndb file to serve as a placeholder indicating - that the table with this name is a ndb table. -*/ - -int ha_ndbcluster::write_ndb_file(const char *name) -{ - File file; - bool error=1; - char path[FN_REFLEN]; - - DBUG_ENTER("write_ndb_file"); - DBUG_PRINT("enter", ("name: %s", name)); - - (void)strxnmov(path, FN_REFLEN-1, - mysql_data_home,"/",name,ha_ndb_ext,NullS); - - if ((file= mysql_file_create(key_file_ndb, path, CREATE_MODE, - O_RDWR | O_TRUNC, MYF(MY_WME))) >= 0) - { - // It's an empty file - error=0; - mysql_file_close(file, MYF(0)); - } - DBUG_RETURN(error); -} - -void -ha_ndbcluster::release_completed_operations(NdbTransaction *trans, - bool force_release) -{ - if (trans->hasBlobOperation()) - { - /* We are reading/writing BLOB fields, - releasing operation records is unsafe - */ - return; - } - if (!force_release) - { - if (get_thd_ndb(current_thd)->query_state & NDB_QUERY_MULTI_READ_RANGE) - { - /* We are batching reads and have not consumed all fetched - rows yet, releasing operation records is unsafe - */ - return; - } - } - trans->releaseCompletedOperations(); -} - -bool -ha_ndbcluster::null_value_index_search(KEY_MULTI_RANGE *ranges, - KEY_MULTI_RANGE *end_range, - HANDLER_BUFFER *buffer) -{ - DBUG_ENTER("null_value_index_search"); - KEY* key_info= table->key_info + active_index; - KEY_MULTI_RANGE *range= ranges; - ulong reclength= table->s->reclength; - uchar *curr= (uchar*)buffer->buffer; - uchar *end_of_buffer= (uchar*)buffer->buffer_end; - - for (; range<end_range && curr+reclength <= end_of_buffer; - range++) - { - const uchar *key= range->start_key.key; - uint key_len= range->start_key.length; - if (check_null_in_key(key_info, key, key_len)) - DBUG_RETURN(TRUE); - curr += reclength; - } - DBUG_RETURN(FALSE); -} - -#if 0 -/* MRR/NDB is disabled, for details see method declarations in ha_ndbcluster.h */ -int -ha_ndbcluster::read_multi_range_first(KEY_MULTI_RANGE **found_range_p, - KEY_MULTI_RANGE *ranges, - uint range_count, - bool sorted, - HANDLER_BUFFER *buffer) -{ - m_write_op= FALSE; - int res; - KEY* key_info= table->key_info + active_index; - NDB_INDEX_TYPE cur_index_type= get_index_type(active_index); - ulong reclength= table_share->reclength; - NdbOperation* op; - Thd_ndb *thd_ndb= get_thd_ndb(current_thd); - DBUG_ENTER("ha_ndbcluster::read_multi_range_first"); - - /** - * blobs and unique hash index with NULL can't be batched currently - */ - if (uses_blob_value() || - (cur_index_type == UNIQUE_INDEX && - has_null_in_unique_index(active_index) && - null_value_index_search(ranges, ranges+range_count, buffer)) - || m_delete_cannot_batch || m_update_cannot_batch) - { - m_disable_multi_read= TRUE; - DBUG_RETURN(handler::read_multi_range_first(found_range_p, - ranges, - range_count, - sorted, - buffer)); - } - MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str); - thd_ndb->query_state|= NDB_QUERY_MULTI_READ_RANGE; - m_disable_multi_read= FALSE; - - /* - * Copy arguments into member variables - */ - m_multi_ranges= ranges; - multi_range_curr= ranges; - multi_range_end= ranges+range_count; - multi_range_sorted= sorted; - multi_range_buffer= buffer; - - /* - * read multi range will read ranges as follows (if not ordered) - * - * input read order - * ====== ========== - * pk-op 1 pk-op 1 - * pk-op 2 pk-op 2 - * range 3 range (3,5) NOTE result rows will be intermixed - * pk-op 4 pk-op 4 - * range 5 - * pk-op 6 pk-ok 6 - */ - - /* - * Variables for loop - */ - uchar *curr= (uchar*)buffer->buffer; - uchar *end_of_buffer= (uchar*)buffer->buffer_end; - NdbOperation::LockMode lm= - (NdbOperation::LockMode)get_ndb_lock_type(m_lock.type); - bool need_pk = (lm == NdbOperation::LM_Read); - const NDBTAB *tab= m_table; - const NDBINDEX *unique_idx= m_index[active_index].unique_index; - const NDBINDEX *idx= m_index[active_index].index; - const NdbOperation* lastOp= m_active_trans->getLastDefinedOperation(); - NdbIndexScanOperation* scanOp= 0; - for (; multi_range_curr<multi_range_end && curr+reclength <= end_of_buffer; - multi_range_curr++) - { - part_id_range part_spec; - if (m_use_partition_function) - { - get_partition_set(table, curr, active_index, - &multi_range_curr->start_key, - &part_spec); - DBUG_PRINT("info", ("part_spec.start_part: %u part_spec.end_part: %u", - part_spec.start_part, part_spec.end_part)); - /* - If partition pruning has found no partition in set - we can skip this scan - */ - if (part_spec.start_part > part_spec.end_part) - { - /* - We can skip this partition since the key won't fit into any - partition - */ - curr += reclength; - multi_range_curr->range_flag |= SKIP_RANGE; - continue; - } - } - switch (cur_index_type) { - case PRIMARY_KEY_ORDERED_INDEX: - if (!(multi_range_curr->start_key.length == key_info->key_length && - multi_range_curr->start_key.flag == HA_READ_KEY_EXACT)) - goto range; - // else fall through - case PRIMARY_KEY_INDEX: - { - multi_range_curr->range_flag |= UNIQUE_RANGE; - if ((op= m_active_trans->getNdbOperation(tab)) && - !op->readTuple(lm) && - !set_primary_key(op, multi_range_curr->start_key.key) && - !define_read_attrs(curr, op) && - (!m_use_partition_function || - (op->setPartitionId(part_spec.start_part), TRUE))) - curr += reclength; - else - { - ERR_RETURN_PREPARE(res, - op ? op->getNdbError() : - m_active_trans->getNdbError()) - MYSQL_INDEX_READ_ROW_DONE(res); - DBUG_RETURN(res); - } - break; - } - break; - case UNIQUE_ORDERED_INDEX: - if (!(multi_range_curr->start_key.length == key_info->key_length && - multi_range_curr->start_key.flag == HA_READ_KEY_EXACT && - !check_null_in_key(key_info, multi_range_curr->start_key.key, - multi_range_curr->start_key.length))) - goto range; - // else fall through - case UNIQUE_INDEX: - { - multi_range_curr->range_flag |= UNIQUE_RANGE; - if ((op= m_active_trans->getNdbIndexOperation(unique_idx, tab)) && - !op->readTuple(lm) && - !set_index_key(op, key_info, multi_range_curr->start_key.key) && - !define_read_attrs(curr, op)) - curr += reclength; - else - { - ERR_RETURN_PREPARE(res, - op ? op->getNdbError() : - m_active_trans->getNdbError()); - MYSQL_INDEX_READ_ROW_DONE(res); - DBUG_RETURN(res); - } - break; - } - case ORDERED_INDEX: { - range: - multi_range_curr->range_flag &= ~(uint)UNIQUE_RANGE; - if (scanOp == 0) - { - if (m_multi_cursor) - { - scanOp= m_multi_cursor; - DBUG_ASSERT(scanOp->getSorted() == sorted); - DBUG_ASSERT(scanOp->getLockMode() == - (NdbOperation::LockMode)get_ndb_lock_type(m_lock.type)); - if (scanOp->reset_bounds(m_force_send)) - { - res= ndb_err(m_active_trans); - MYSQL_INDEX_READ_ROW_DONE(res); - DBUG_RETURN(res); - } - - end_of_buffer -= reclength; - } - else if ((scanOp= m_active_trans->getNdbIndexScanOperation(idx, tab)) - &&!scanOp->readTuples(lm, 0, parallelism, sorted, - FALSE, TRUE, need_pk, TRUE) - &&!(m_cond && m_cond->generate_scan_filter(scanOp)) - &&!define_read_attrs(end_of_buffer-reclength, scanOp)) - { - m_multi_cursor= scanOp; - m_multi_range_cursor_result_ptr= end_of_buffer-reclength; - } - else - { - ERR_RETURN_PREPARE(res, - scanOp ? scanOp->getNdbError() : - m_active_trans->getNdbError()); - MYSQL_INDEX_READ_ROW_DONE(res); - DBUG_RETURN(res); - } - } - - const key_range *keys[2]= { &multi_range_curr->start_key, - &multi_range_curr->end_key }; - if ((res= set_bounds(scanOp, active_index, FALSE, keys, - multi_range_curr-ranges))) - { - MYSQL_INDEX_READ_ROW_DONE(res); - DBUG_RETURN(res); - } - break; - } - case UNDEFINED_INDEX: - DBUG_ASSERT(FALSE); - MYSQL_INDEX_READ_ROW_DONE(1); - DBUG_RETURN(1); - break; - } - } - - if (multi_range_curr != multi_range_end) - { - /* - * Mark that we're using entire buffer (even if might not) as - * we haven't read all ranges for some reason - * This as we don't want mysqld to reuse the buffer when we read - * the remaining ranges - */ - buffer->end_of_used_area= (uchar*)buffer->buffer_end; - } - else - { - buffer->end_of_used_area= curr; - } - - /* - * Set first operation in multi range - */ - m_current_multi_operation= - lastOp ? lastOp->next() : m_active_trans->getFirstDefinedOperation(); - if (!(res= execute_no_commit_ie(this, m_active_trans,true))) - { - m_multi_range_defined= multi_range_curr; - multi_range_curr= ranges; - m_multi_range_result_ptr= (uchar*)buffer->buffer; - res= loc_read_multi_range_next(found_range_p); - MYSQL_INDEX_READ_ROW_DONE(res); - DBUG_RETURN(res); - } - ERR_RETURN_PREPARE(res, m_active_trans->getNdbError()); - MYSQL_INDEX_READ_ROW_DONE(res); - DBUG_RETURN(res); -} - -#if 0 -#define DBUG_MULTI_RANGE(x) DBUG_PRINT("info", ("read_multi_range_next: case %d\n", x)); -#else -#define DBUG_MULTI_RANGE(x) -#endif - -int -ha_ndbcluster::read_multi_range_next(KEY_MULTI_RANGE ** multi_range_found_p) -{ - int rc; - DBUG_ENTER("ha_ndbcluster::read_multi_range_next"); - if (m_disable_multi_read) - { - DBUG_MULTI_RANGE(11); - DBUG_RETURN(handler::read_multi_range_next(multi_range_found_p)); - } - MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str); - rc= loc_read_multi_range_next(multi_range_found_p); - MYSQL_INDEX_READ_ROW_DONE(rc); - DBUG_RETURN(rc); -} - -int ha_ndbcluster::loc_read_multi_range_next( - KEY_MULTI_RANGE **multi_range_found_p) -{ - int res; - int range_no; - ulong reclength= table_share->reclength; - const NdbOperation* op= m_current_multi_operation; - DBUG_ENTER("ha_ndbcluster::loc_read_multi_range_next"); - - for (;multi_range_curr < m_multi_range_defined; multi_range_curr++) - { - DBUG_MULTI_RANGE(12); - if (multi_range_curr->range_flag & SKIP_RANGE) - continue; - if (multi_range_curr->range_flag & UNIQUE_RANGE) - { - if (op->getNdbError().code == 0) - { - DBUG_MULTI_RANGE(13); - goto found_next; - } - - op= m_active_trans->getNextCompletedOperation(op); - m_multi_range_result_ptr += reclength; - continue; - } - else if (m_multi_cursor && !multi_range_sorted) - { - DBUG_MULTI_RANGE(1); - if ((res= fetch_next(m_multi_cursor)) == 0) - { - DBUG_MULTI_RANGE(2); - range_no= m_multi_cursor->get_range_no(); - goto found; - } - else - { - DBUG_MULTI_RANGE(14); - goto close_scan; - } - } - else if (m_multi_cursor && multi_range_sorted) - { - if (m_active_cursor && (res= fetch_next(m_multi_cursor))) - { - DBUG_MULTI_RANGE(3); - goto close_scan; - } - - range_no= m_multi_cursor->get_range_no(); - uint current_range_no= multi_range_curr - m_multi_ranges; - if ((uint) range_no == current_range_no) - { - DBUG_MULTI_RANGE(4); - // return current row - goto found; - } - else if (range_no > (int)current_range_no) - { - DBUG_MULTI_RANGE(5); - // wait with current row - m_active_cursor= 0; - continue; - } - else - { - DBUG_MULTI_RANGE(6); - // First fetch from cursor - DBUG_ASSERT(range_no == -1); - if ((res= m_multi_cursor->nextResult(TRUE))) - { - DBUG_MULTI_RANGE(15); - goto close_scan; - } - multi_range_curr--; // Will be increased in for-loop - continue; - } - } - else /* m_multi_cursor == 0 */ - { - DBUG_MULTI_RANGE(7); - /* - * Corresponds to range 5 in example in read_multi_range_first - */ - (void)1; - continue; - } - - DBUG_ASSERT(FALSE); // Should only get here via goto's -close_scan: - if (res == 1) - { - m_multi_cursor->close(FALSE, TRUE); - m_active_cursor= m_multi_cursor= 0; - DBUG_MULTI_RANGE(8); - continue; - } - else - { - DBUG_MULTI_RANGE(9); - DBUG_RETURN(ndb_err(m_active_trans)); - } - } - - if (multi_range_curr == multi_range_end) - { - DBUG_MULTI_RANGE(16); - Thd_ndb *thd_ndb= get_thd_ndb(current_thd); - thd_ndb->query_state&= NDB_QUERY_NORMAL; - DBUG_RETURN(HA_ERR_END_OF_FILE); - } - - /* - * Read remaining ranges - */ - MYSQL_INDEX_READ_ROW_DONE(1); - DBUG_RETURN(read_multi_range_first(multi_range_found_p, - multi_range_curr, - multi_range_end - multi_range_curr, - multi_range_sorted, - multi_range_buffer)); - -found: - /* - * Found a record belonging to a scan - */ - m_active_cursor= m_multi_cursor; - * multi_range_found_p= m_multi_ranges + range_no; - memcpy(table->record[0], m_multi_range_cursor_result_ptr, reclength); - setup_recattr(m_active_cursor->getFirstRecAttr()); - unpack_record(table->record[0]); - table->status= 0; - DBUG_RETURN(0); - -found_next: - /* - * Found a record belonging to a pk/index op, - * copy result and move to next to prepare for next call - */ - * multi_range_found_p= multi_range_curr; - memcpy(table->record[0], m_multi_range_result_ptr, reclength); - setup_recattr(op->getFirstRecAttr()); - unpack_record(table->record[0]); - table->status= 0; - - multi_range_curr++; - m_current_multi_operation= m_active_trans->getNextCompletedOperation(op); - m_multi_range_result_ptr += reclength; - DBUG_RETURN(0); -} -#endif - -int -ha_ndbcluster::setup_recattr(const NdbRecAttr* curr) -{ - DBUG_ENTER("setup_recattr"); - - Field **field, **end; - NdbValue *value= m_value; - - end= table->field + table_share->fields; - - for (field= table->field; field < end; field++, value++) - { - if ((* value).ptr) - { - DBUG_ASSERT(curr != 0); - NdbValue* val= m_value + curr->getColumn()->getColumnNo(); - DBUG_ASSERT(val->ptr); - val->rec= curr; - curr= curr->next(); - } - } - - DBUG_RETURN(0); -} - -/** - @param[in] comment table comment defined by user - - @return - table comment + additional -*/ -char* -ha_ndbcluster::update_table_comment( - /* out: table comment + additional */ - const char* comment)/* in: table comment defined by user */ -{ - uint length= strlen(comment); - if (length > 64000 - 3) - { - return((char*)comment); /* string too long */ - } - - Ndb* ndb; - if (!(ndb= get_ndb())) - { - return((char*)comment); - } - - if (ndb->setDatabaseName(m_dbname)) - { - return((char*)comment); - } - const NDBTAB* tab= m_table; - DBUG_ASSERT(tab != NULL); - - char *str; - const char *fmt="%s%snumber_of_replicas: %d"; - const unsigned fmt_len_plus_extra= length + strlen(fmt); - if ((str= (char*) my_malloc(fmt_len_plus_extra, MYF(0))) == NULL) - { - sql_print_error("ha_ndbcluster::update_table_comment: " - "my_malloc(%u) failed", (unsigned int)fmt_len_plus_extra); - return (char*)comment; - } - - my_snprintf(str,fmt_len_plus_extra,fmt,comment, - length > 0 ? " ":"", - tab->getReplicaCount()); - return str; -} - - -/** - Utility thread main loop. -*/ -pthread_handler_t ndb_util_thread_func(void *arg __attribute__((unused))) -{ - THD *thd; /* needs to be first for thread_stack */ - struct timespec abstime; - Thd_ndb *thd_ndb; - uint share_list_size= 0; - NDB_SHARE **share_list= NULL; - - my_thread_init(); - DBUG_ENTER("ndb_util_thread"); - DBUG_PRINT("enter", ("cache_check_time: %lu", opt_ndb_cache_check_time)); - - mysql_mutex_lock(&LOCK_ndb_util_thread); - - thd= new THD; /* note that contructor of THD uses DBUG_ */ - if (thd == NULL) - { - my_errno= HA_ERR_OUT_OF_MEM; - DBUG_RETURN(NULL); - } - THD_CHECK_SENTRY(thd); - pthread_detach_this_thread(); - ndb_util_thread= pthread_self(); - - thd->thread_stack= (char*)&thd; /* remember where our stack is */ - if (thd->store_globals()) - goto ndb_util_thread_fail; - thd->init_for_queries(); - thd->main_security_ctx.host_or_ip= ""; - thd->client_capabilities = 0; - my_net_init(&thd->net, 0, MYF(MY_THREAD_SPECIFIC)); - thd->main_security_ctx.master_access= ~0; - thd->main_security_ctx.priv_user[0] = 0; - /* Do not use user-supplied timeout value for system threads. */ - thd->variables.lock_wait_timeout= LONG_TIMEOUT; - - CHARSET_INFO *charset_connection; - charset_connection= get_charset_by_csname("utf8", - MY_CS_PRIMARY, MYF(MY_WME)); - thd->variables.character_set_client= charset_connection; - thd->variables.character_set_results= charset_connection; - thd->variables.collation_connection= charset_connection; - thd->update_charset(); - - /* Signal successful initialization */ - ndb_util_thread_running= 1; - mysql_cond_signal(&COND_ndb_util_ready); - mysql_mutex_unlock(&LOCK_ndb_util_thread); - - /* - wait for mysql server to start - */ - mysql_mutex_lock(&LOCK_server_started); - while (!mysqld_server_started) - { - set_timespec(abstime, 1); - mysql_cond_timedwait(&COND_server_started, &LOCK_server_started, - &abstime); - if (ndbcluster_terminating) - { - mysql_mutex_unlock(&LOCK_server_started); - mysql_mutex_lock(&LOCK_ndb_util_thread); - goto ndb_util_thread_end; - } - } - mysql_mutex_unlock(&LOCK_server_started); - - /* - Wait for cluster to start - */ - mysql_mutex_lock(&LOCK_ndb_util_thread); - while (!ndb_cluster_node_id && (ndbcluster_hton->slot != ~(uint)0)) - { - /* ndb not connected yet */ - mysql_cond_wait(&COND_ndb_util_thread, &LOCK_ndb_util_thread); - if (ndbcluster_terminating) - goto ndb_util_thread_end; - } - mysql_mutex_unlock(&LOCK_ndb_util_thread); - - /* Get thd_ndb for this thread */ - if (!(thd_ndb= ha_ndbcluster::seize_thd_ndb())) - { - sql_print_error("Could not allocate Thd_ndb object"); - mysql_mutex_lock(&LOCK_ndb_util_thread); - goto ndb_util_thread_end; - } - set_thd_ndb(thd, thd_ndb); - thd_ndb->options|= TNO_NO_LOG_SCHEMA_OP; - -#ifdef HAVE_NDB_BINLOG - if (opt_ndb_extra_logging && ndb_binlog_running) - sql_print_information("NDB Binlog: Ndb tables initially read only."); - /* create tables needed by the replication */ - ndbcluster_setup_binlog_table_shares(thd); -#else - /* - Get all table definitions from the storage node - */ - ndbcluster_find_all_files(thd); -#endif - - set_timespec(abstime, 0); - for (;;) - { - mysql_mutex_lock(&LOCK_ndb_util_thread); - if (!ndbcluster_terminating) - mysql_cond_timedwait(&COND_ndb_util_thread, - &LOCK_ndb_util_thread, - &abstime); - if (ndbcluster_terminating) /* Shutting down server */ - goto ndb_util_thread_end; - mysql_mutex_unlock(&LOCK_ndb_util_thread); -#ifdef NDB_EXTRA_DEBUG_UTIL_THREAD - DBUG_PRINT("ndb_util_thread", ("Started, opt_ndb_cache_check_time: %lu", - opt_ndb_cache_check_time)); -#endif - -#ifdef HAVE_NDB_BINLOG - /* - Check that the ndb_apply_status_share and ndb_schema_share - have been created. - If not try to create it - */ - if (!ndb_binlog_tables_inited) - ndbcluster_setup_binlog_table_shares(thd); -#endif - - if (opt_ndb_cache_check_time == 0) - { - /* Wake up in 1 second to check if value has changed */ - set_timespec(abstime, 1); - continue; - } - - /* Lock mutex and fill list with pointers to all open tables */ - NDB_SHARE *share; - mysql_mutex_lock(&ndbcluster_mutex); - uint i, open_count, record_count= ndbcluster_open_tables.records; - if (share_list_size < record_count) - { - NDB_SHARE ** new_share_list= new NDB_SHARE * [record_count]; - if (!new_share_list) - { - sql_print_warning("ndb util thread: malloc failure, " - "query cache not maintained properly"); - mysql_mutex_unlock(&ndbcluster_mutex); - goto next; // At least do not crash - } - delete [] share_list; - share_list_size= record_count; - share_list= new_share_list; - } - for (i= 0, open_count= 0; i < record_count; i++) - { - share= (NDB_SHARE *)my_hash_element(&ndbcluster_open_tables, i); -#ifdef HAVE_NDB_BINLOG - if ((share->use_count - (int) (share->op != 0) - (int) (share->op != 0)) - <= 0) - continue; // injector thread is the only user, skip statistics - share->util_lock= current_thd; // Mark that util thread has lock -#endif /* HAVE_NDB_BINLOG */ - /* ndb_share reference temporary, free below */ - share->use_count++; /* Make sure the table can't be closed */ - DBUG_PRINT("NDB_SHARE", ("%s temporary use_count: %u", - share->key, share->use_count)); - DBUG_PRINT("ndb_util_thread", - ("Found open table[%d]: %s, use_count: %d", - i, share->table_name, share->use_count)); - - /* Store pointer to table */ - share_list[open_count++]= share; - } - mysql_mutex_unlock(&ndbcluster_mutex); - - /* Iterate through the open files list */ - for (i= 0; i < open_count; i++) - { - share= share_list[i]; -#ifdef HAVE_NDB_BINLOG - if ((share->use_count - (int) (share->op != 0) - (int) (share->op != 0)) - <= 1) - { - /* - Util thread and injector thread is the only user, skip statistics - */ - /* ndb_share reference temporary free */ - DBUG_PRINT("NDB_SHARE", ("%s temporary free use_count: %u", - share->key, share->use_count)); - free_share(&share); - continue; - } -#endif /* HAVE_NDB_BINLOG */ - DBUG_PRINT("ndb_util_thread", - ("Fetching commit count for: %s", share->key)); - - struct Ndb_statistics stat; - uint lock; - mysql_mutex_lock(&share->mutex); - lock= share->commit_count_lock; - mysql_mutex_unlock(&share->mutex); - { - /* Contact NDB to get commit count for table */ - Ndb* ndb= thd_ndb->ndb; - if (ndb->setDatabaseName(share->db)) - { - goto loop_next; - } - Ndb_table_guard ndbtab_g(ndb->getDictionary(), share->table_name); - if (ndbtab_g.get_table() && - ndb_get_table_statistics(NULL, FALSE, ndb, - ndbtab_g.get_table(), &stat) == 0) - { -#ifndef DBUG_OFF - char buff[22], buff2[22]; -#endif - DBUG_PRINT("info", - ("Table: %s commit_count: %s rows: %s", - share->key, - llstr(stat.commit_count, buff), - llstr(stat.row_count, buff2))); - } - else - { - DBUG_PRINT("ndb_util_thread", - ("Error: Could not get commit count for table %s", - share->key)); - stat.commit_count= 0; - } - } - loop_next: - mysql_mutex_lock(&share->mutex); - if (share->commit_count_lock == lock) - share->commit_count= stat.commit_count; - mysql_mutex_unlock(&share->mutex); - - /* ndb_share reference temporary free */ - DBUG_PRINT("NDB_SHARE", ("%s temporary free use_count: %u", - share->key, share->use_count)); - free_share(&share); - } -next: - /* Calculate new time to wake up */ - int secs= 0; - int msecs= opt_ndb_cache_check_time; - - struct timeval tick_time; - gettimeofday(&tick_time, 0); - abstime.tv_sec= tick_time.tv_sec; - abstime.tv_nsec= tick_time.tv_usec * 1000; - - if (msecs >= 1000){ - secs= msecs / 1000; - msecs= msecs % 1000; - } - - abstime.tv_sec+= secs; - abstime.tv_nsec+= msecs * 1000000; - if (abstime.tv_nsec >= 1000000000) { - abstime.tv_sec+= 1; - abstime.tv_nsec-= 1000000000; - } - } - - mysql_mutex_lock(&LOCK_ndb_util_thread); - -ndb_util_thread_end: -ndb_util_thread_fail: - if (share_list) - delete [] share_list; - delete thd; - - /* signal termination */ - ndb_util_thread_running= 0; - mysql_cond_signal(&COND_ndb_util_ready); - mysql_mutex_unlock(&LOCK_ndb_util_thread); - DBUG_PRINT("exit", ("ndb_util_thread")); - - DBUG_LEAVE; // Must match DBUG_ENTER() - my_thread_end(); - pthread_exit(0); - return NULL; // Avoid compiler warnings -} - -/* - Condition pushdown -*/ -/** - Push a condition to ndbcluster storage engine for evaluation - during table and index scans. The conditions will be stored on a stack - for possibly storing several conditions. The stack can be popped - by calling cond_pop, handler::extra(HA_EXTRA_RESET) (handler::reset()) - will clear the stack. - The current implementation supports arbitrary AND/OR nested conditions - with comparisons between columns and constants (including constant - expressions and function calls) and the following comparison operators: - =, !=, >, >=, <, <=, "is null", and "is not null". - - @retval - NULL The condition was supported and will be evaluated for each - row found during the scan - @retval - cond The condition was not supported and all rows will be returned from - the scan for evaluation (and thus not saved on stack) -*/ -const -COND* -ha_ndbcluster::cond_push(const COND *cond) -{ - DBUG_ENTER("cond_push"); - if (!m_cond) - m_cond= new ha_ndbcluster_cond; - if (!m_cond) - { - my_errno= HA_ERR_OUT_OF_MEM; - DBUG_RETURN(NULL); - } - DBUG_EXECUTE("where",print_where((COND *)cond, m_tabname, QT_ORDINARY);); - DBUG_RETURN(m_cond->cond_push(cond, table, (NDBTAB *)m_table)); -} - -/** - Pop the top condition from the condition stack of the handler instance. -*/ -void -ha_ndbcluster::cond_pop() -{ - if (m_cond) - m_cond->cond_pop(); -} - - -/* - get table space info for SHOW CREATE TABLE -*/ -char* ha_ndbcluster::get_tablespace_name(THD *thd, char* name, uint name_len) -{ - Ndb *ndb= check_ndb_in_thd(thd); - NDBDICT *ndbdict= ndb->getDictionary(); - NdbError ndberr; - Uint32 id; - ndb->setDatabaseName(m_dbname); - const NDBTAB *ndbtab= m_table; - DBUG_ASSERT(ndbtab != NULL); - if (!ndbtab->getTablespace(&id)) - { - return 0; - } - { - NdbDictionary::Tablespace ts= ndbdict->getTablespace(id); - ndberr= ndbdict->getNdbError(); - if(ndberr.classification != NdbError::NoError) - goto err; - DBUG_PRINT("info", ("Found tablespace '%s'", ts.getName())); - if (name) - { - strxnmov(name, name_len, ts.getName(), NullS); - return name; - } - else - return (my_strdup(ts.getName(), MYF(0))); - } -err: - if (ndberr.status == NdbError::TemporaryError) - push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, - ER_GET_TEMPORARY_ERRMSG, ER(ER_GET_TEMPORARY_ERRMSG), - ndberr.code, ndberr.message, "NDB"); - else - push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, - ER_GET_ERRMSG, ER(ER_GET_ERRMSG), - ndberr.code, ndberr.message, "NDB"); - return 0; -} - -/* - Implements the SHOW NDB STATUS command. -*/ -bool -ndbcluster_show_status(handlerton *hton, THD* thd, stat_print_fn *stat_print, - enum ha_stat_type stat_type) -{ - char buf[IO_SIZE]; - uint buflen; - DBUG_ENTER("ndbcluster_show_status"); - - if (stat_type != HA_ENGINE_STATUS) - { - DBUG_RETURN(FALSE); - } - - update_status_variables(g_ndb_cluster_connection); - buflen= - my_snprintf(buf, sizeof(buf), - "cluster_node_id=%ld, " - "connected_host=%s, " - "connected_port=%ld, " - "number_of_data_nodes=%ld, " - "number_of_ready_data_nodes=%ld, " - "connect_count=%ld", - ndb_cluster_node_id, - ndb_connected_host, - ndb_connected_port, - ndb_number_of_data_nodes, - ndb_number_of_ready_data_nodes, - ndb_connect_count); - if (stat_print(thd, ndbcluster_hton_name, ndbcluster_hton_name_length, - STRING_WITH_LEN("connection"), buf, buflen)) - DBUG_RETURN(TRUE); - - if (get_thd_ndb(thd) && get_thd_ndb(thd)->ndb) - { - Ndb* ndb= (get_thd_ndb(thd))->ndb; - Ndb::Free_list_usage tmp; - tmp.m_name= 0; - while (ndb->get_free_list_usage(&tmp)) - { - buflen= - my_snprintf(buf, sizeof(buf), - "created=%u, free=%u, sizeof=%u", - tmp.m_created, tmp.m_free, tmp.m_sizeof); - if (stat_print(thd, ndbcluster_hton_name, ndbcluster_hton_name_length, - tmp.m_name, strlen(tmp.m_name), buf, buflen)) - DBUG_RETURN(TRUE); - } - } -#ifdef HAVE_NDB_BINLOG - ndbcluster_show_status_binlog(thd, stat_print, stat_type); -#endif - - DBUG_RETURN(FALSE); -} - - -/* - Create a table in NDB Cluster - */ -static uint get_no_fragments(ulonglong max_rows) -{ -#if MYSQL_VERSION_ID >= 50000 - uint acc_row_size= 25 + /*safety margin*/ 2; -#else - uint acc_row_size= pk_length*4; - /* add acc overhead */ - if (pk_length <= 8) /* main page will set the limit */ - acc_row_size+= 25 + /*safety margin*/ 2; - else /* overflow page will set the limit */ - acc_row_size+= 4 + /*safety margin*/ 4; -#endif - ulonglong acc_fragment_size= 512*1024*1024; -#if MYSQL_VERSION_ID >= 50100 - return (max_rows*acc_row_size)/acc_fragment_size+1; -#else - return ((max_rows*acc_row_size)/acc_fragment_size+1 - +1/*correct rounding*/)/2; -#endif -} - - -/* - Routine to adjust default number of partitions to always be a multiple - of number of nodes and never more than 4 times the number of nodes. - -*/ -static bool adjusted_frag_count(uint no_fragments, uint no_nodes, - uint &reported_frags) -{ - uint i= 0; - reported_frags= no_nodes; - while (reported_frags < no_fragments && ++i < 4 && - (reported_frags + no_nodes) < MAX_PARTITIONS) - reported_frags+= no_nodes; - return (reported_frags < no_fragments); -} - -int ha_ndbcluster::get_default_no_partitions(HA_CREATE_INFO *create_info) -{ - ha_rows max_rows, min_rows; - if (create_info) - { - max_rows= create_info->max_rows; - min_rows= create_info->min_rows; - } - else - { - max_rows= table_share->max_rows; - min_rows= table_share->min_rows; - } - uint reported_frags; - uint no_fragments= - get_no_fragments(max_rows >= min_rows ? max_rows : min_rows); - uint no_nodes= g_ndb_cluster_connection->no_db_nodes(); - if (adjusted_frag_count(no_fragments, no_nodes, reported_frags)) - { - push_warning(current_thd, - Sql_condition::WARN_LEVEL_WARN, ER_UNKNOWN_ERROR, - "Ndb might have problems storing the max amount of rows specified"); - } - return (int)reported_frags; -} - - -/* - Set-up auto-partitioning for NDB Cluster - - SYNOPSIS - set_auto_partitions() - part_info Partition info struct to set-up - - RETURN VALUE - NONE - - DESCRIPTION - Set-up auto partitioning scheme for tables that didn't define any - partitioning. We'll use PARTITION BY KEY() in this case which - translates into partition by primary key if a primary key exists - and partition by hidden key otherwise. -*/ - - -enum ndb_distribution_enum { ND_KEYHASH= 0, ND_LINHASH= 1 }; -static const char* distribution_names[]= { "KEYHASH", "LINHASH", NullS }; -static ulong default_ndb_distribution= ND_KEYHASH; -static TYPELIB distribution_typelib= { - array_elements(distribution_names) - 1, - "", - distribution_names, - NULL -}; -static MYSQL_SYSVAR_ENUM( - distribution, /* name */ - default_ndb_distribution, /* var */ - PLUGIN_VAR_RQCMDARG, - "Default distribution for new tables in ndb", - NULL, /* check func. */ - NULL, /* update func. */ - ND_KEYHASH, /* default */ - &distribution_typelib /* typelib */ -); - -void ha_ndbcluster::set_auto_partitions(partition_info *part_info) -{ - DBUG_ENTER("ha_ndbcluster::set_auto_partitions"); - part_info->list_of_part_fields= TRUE; - part_info->part_type= HASH_PARTITION; - switch (default_ndb_distribution) - { - case ND_KEYHASH: - part_info->linear_hash_ind= FALSE; - break; - case ND_LINHASH: - part_info->linear_hash_ind= TRUE; - break; - } - DBUG_VOID_RETURN; -} - - -int ha_ndbcluster::set_range_data(void *tab_ref, partition_info *part_info) -{ - NDBTAB *tab= (NDBTAB*)tab_ref; - int32 *range_data= (int32*)my_malloc(part_info->num_parts*sizeof(int32), - MYF(0)); - uint i; - int error= 0; - bool unsigned_flag= part_info->part_expr->unsigned_flag; - DBUG_ENTER("set_range_data"); - - if (!range_data) - { - mem_alloc_error(part_info->num_parts*sizeof(int32)); - DBUG_RETURN(1); - } - for (i= 0; i < part_info->num_parts; i++) - { - longlong range_val= part_info->range_int_array[i]; - if (unsigned_flag) - range_val-= 0x8000000000000000ULL; - if (range_val < INT_MIN32 || range_val >= INT_MAX32) - { - if ((i != part_info->num_parts - 1) || - (range_val != LONGLONG_MAX)) - { - my_error(ER_LIMITED_PART_RANGE, MYF(0), "NDB"); - error= 1; - goto error; - } - range_val= INT_MAX32; - } - range_data[i]= (int32)range_val; - } - tab->setRangeListData(range_data, sizeof(int32)*part_info->num_parts); -error: - my_free(range_data); - DBUG_RETURN(error); -} - -int ha_ndbcluster::set_list_data(void *tab_ref, partition_info *part_info) -{ - NDBTAB *tab= (NDBTAB*)tab_ref; - int32 *list_data= (int32*)my_malloc(part_info->num_list_values * 2 - * sizeof(int32), MYF(0)); - uint32 *part_id, i; - int error= 0; - bool unsigned_flag= part_info->part_expr->unsigned_flag; - DBUG_ENTER("set_list_data"); - - if (!list_data) - { - mem_alloc_error(part_info->num_list_values*2*sizeof(int32)); - DBUG_RETURN(1); - } - for (i= 0; i < part_info->num_list_values; i++) - { - LIST_PART_ENTRY *list_entry= &part_info->list_array[i]; - longlong list_val= list_entry->list_value; - if (unsigned_flag) - list_val-= 0x8000000000000000ULL; - if (list_val < INT_MIN32 || list_val > INT_MAX32) - { - my_error(ER_LIMITED_PART_RANGE, MYF(0), "NDB"); - error= 1; - goto error; - } - list_data[2*i]= (int32)list_val; - part_id= (uint32*)&list_data[2*i+1]; - *part_id= list_entry->partition_id; - } - tab->setRangeListData(list_data, 2*sizeof(int32)*part_info->num_list_values); -error: - my_free(list_data); - DBUG_RETURN(error); -} - -/* - User defined partitioning set-up. We need to check how many fragments the - user wants defined and which node groups to put those into. Later we also - want to attach those partitions to a tablespace. - - All the functionality of the partition function, partition limits and so - forth are entirely handled by the MySQL Server. There is one exception to - this rule for PARTITION BY KEY where NDB handles the hash function and - this type can thus be handled transparently also by NDB API program. - For RANGE, HASH and LIST and subpartitioning the NDB API programs must - implement the function to map to a partition. -*/ - -uint ha_ndbcluster::set_up_partition_info(partition_info *part_info, - TABLE *table, - void *tab_par) -{ - uint16 frag_data[MAX_PARTITIONS]; - char *ts_names[MAX_PARTITIONS]; - ulong fd_index= 0, i, j; - NDBTAB *tab= (NDBTAB*)tab_par; - NDBTAB::FragmentType ftype= NDBTAB::UserDefined; - partition_element *part_elem; - bool first= TRUE; - uint tot_ts_name_len; - List_iterator<partition_element> part_it(part_info->partitions); - int error; - DBUG_ENTER("ha_ndbcluster::set_up_partition_info"); - - if (part_info->part_type == HASH_PARTITION && - part_info->list_of_part_fields == TRUE) - { - Field **fields= part_info->part_field_array; - - if (part_info->linear_hash_ind) - ftype= NDBTAB::DistrKeyLin; - else - ftype= NDBTAB::DistrKeyHash; - - for (i= 0; i < part_info->part_field_list.elements; i++) - { - NDBCOL *col= tab->getColumn(fields[i]->field_index); - DBUG_PRINT("info",("setting dist key on %s", col->getName())); - col->setPartitionKey(TRUE); - } - } - else - { - if (!current_thd->variables.new_mode) - { - push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN, - ER_ILLEGAL_HA_CREATE_OPTION, - ER(ER_ILLEGAL_HA_CREATE_OPTION), - ndbcluster_hton_name, - "LIST, RANGE and HASH partition disabled by default," - " use --new option to enable"); - DBUG_RETURN(HA_ERR_UNSUPPORTED); - } - /* - Create a shadow field for those tables that have user defined - partitioning. This field stores the value of the partition - function such that NDB can handle reorganisations of the data - even when the MySQL Server isn't available to assist with - calculation of the partition function value. - */ - NDBCOL col; - DBUG_PRINT("info", ("Generating partition func value field")); - col.setName("$PART_FUNC_VALUE"); - col.setType(NdbDictionary::Column::Int); - col.setLength(1); - col.setNullable(FALSE); - col.setPrimaryKey(FALSE); - col.setAutoIncrement(FALSE); - tab->addColumn(col); - if (part_info->part_type == RANGE_PARTITION) - { - if ((error= set_range_data((void*)tab, part_info))) - { - DBUG_RETURN(error); - } - } - else if (part_info->part_type == LIST_PARTITION) - { - if ((error= set_list_data((void*)tab, part_info))) - { - DBUG_RETURN(error); - } - } - } - tab->setFragmentType(ftype); - i= 0; - tot_ts_name_len= 0; - do - { - uint ng; - part_elem= part_it++; - if (!part_info->is_sub_partitioned()) - { - ng= part_elem->nodegroup_id; - if (first && ng == UNDEF_NODEGROUP) - ng= 0; - ts_names[fd_index]= part_elem->tablespace_name; - frag_data[fd_index++]= ng; - } - else - { - List_iterator<partition_element> sub_it(part_elem->subpartitions); - j= 0; - do - { - part_elem= sub_it++; - ng= part_elem->nodegroup_id; - if (first && ng == UNDEF_NODEGROUP) - ng= 0; - ts_names[fd_index]= part_elem->tablespace_name; - frag_data[fd_index++]= ng; - } while (++j < part_info->num_subparts); - } - first= FALSE; - } while (++i < part_info->num_parts); - tab->setDefaultNoPartitionsFlag(part_info->use_default_num_partitions); - tab->setLinearFlag(part_info->linear_hash_ind); - { - ha_rows max_rows= table_share->max_rows; - ha_rows min_rows= table_share->min_rows; - if (max_rows < min_rows) - max_rows= min_rows; - if (max_rows != (ha_rows)0) /* default setting, don't set fragmentation */ - { - tab->setMaxRows(max_rows); - tab->setMinRows(min_rows); - } - } - tab->setTablespaceNames(ts_names, fd_index*sizeof(char*)); - tab->setFragmentCount(fd_index); - tab->setFragmentData(&frag_data, fd_index*2); - DBUG_RETURN(0); -} - - -bool ha_ndbcluster::check_if_incompatible_data(HA_CREATE_INFO *create_info, - uint table_changes) -{ - DBUG_ENTER("ha_ndbcluster::check_if_incompatible_data"); - uint i; - const NDBTAB *tab= (const NDBTAB *) m_table; - - if (THDVAR(current_thd, use_copying_alter_table)) - { - DBUG_PRINT("info", ("On-line alter table disabled")); - DBUG_RETURN(COMPATIBLE_DATA_NO); - } - - int pk= 0; - int ai= 0; - - if (create_info->tablespace) - create_info->storage_media = HA_SM_DISK; - else - create_info->storage_media = HA_SM_MEMORY; - - for (i= 0; i < table->s->fields; i++) - { - Field *field= table->field[i]; - const NDBCOL *col= tab->getColumn(i); - if ((col->getStorageType() == NDB_STORAGETYPE_MEMORY && create_info->storage_media != HA_SM_MEMORY) || - (col->getStorageType() == NDB_STORAGETYPE_DISK && create_info->storage_media != HA_SM_DISK)) - { - DBUG_PRINT("info", ("Column storage media is changed")); - DBUG_RETURN(COMPATIBLE_DATA_NO); - } - - if (field->flags & FIELD_IS_RENAMED) - { - DBUG_PRINT("info", ("Field has been renamed, copy table")); - DBUG_RETURN(COMPATIBLE_DATA_NO); - } - if ((field->flags & FIELD_IN_ADD_INDEX) && - col->getStorageType() == NdbDictionary::Column::StorageTypeDisk) - { - DBUG_PRINT("info", ("add/drop index not supported for disk stored column")); - DBUG_RETURN(COMPATIBLE_DATA_NO); - } - - if (field->flags & PRI_KEY_FLAG) - pk=1; - if (field->flags & FIELD_IN_ADD_INDEX) - ai=1; - } - - char tablespace_name[FN_LEN + 1]; - if (get_tablespace_name(current_thd, tablespace_name, FN_LEN)) - { - if (create_info->tablespace) - { - if (strcmp(create_info->tablespace, tablespace_name)) - { - DBUG_PRINT("info", ("storage media is changed, old tablespace=%s, new tablespace=%s", - tablespace_name, create_info->tablespace)); - DBUG_RETURN(COMPATIBLE_DATA_NO); - } - } - else - { - DBUG_PRINT("info", ("storage media is changed, old is DISK and tablespace=%s, new is MEM", - tablespace_name)); - DBUG_RETURN(COMPATIBLE_DATA_NO); - } - } - else - { - if (create_info->storage_media != HA_SM_MEMORY) - { - DBUG_PRINT("info", ("storage media is changed, old is MEM, new is DISK and tablespace=%s", - create_info->tablespace)); - DBUG_RETURN(COMPATIBLE_DATA_NO); - } - } - - if (table_changes != IS_EQUAL_YES) - DBUG_RETURN(COMPATIBLE_DATA_NO); - - /* Check that auto_increment value was not changed */ - if ((create_info->used_fields & HA_CREATE_USED_AUTO) && - create_info->auto_increment_value != 0) - { - DBUG_PRINT("info", ("auto_increment value changed")); - DBUG_RETURN(COMPATIBLE_DATA_NO); - } - - /* Check that row format didn't change */ - if ((create_info->used_fields & HA_CREATE_USED_AUTO) && - get_row_type() != create_info->row_type) - { - DBUG_PRINT("info", ("row format changed")); - DBUG_RETURN(COMPATIBLE_DATA_NO); - } - - DBUG_PRINT("info", ("new table seems compatible")); - DBUG_RETURN(COMPATIBLE_DATA_YES); -} - -bool set_up_tablespace(st_alter_tablespace *alter_info, - NdbDictionary::Tablespace *ndb_ts) -{ - ndb_ts->setName(alter_info->tablespace_name); - ndb_ts->setExtentSize(alter_info->extent_size); - ndb_ts->setDefaultLogfileGroup(alter_info->logfile_group_name); - return FALSE; -} - -bool set_up_datafile(st_alter_tablespace *alter_info, - NdbDictionary::Datafile *ndb_df) -{ - if (alter_info->max_size > 0) - { - my_error(ER_TABLESPACE_AUTO_EXTEND_ERROR, MYF(0)); - return TRUE; - } - ndb_df->setPath(alter_info->data_file_name); - ndb_df->setSize(alter_info->initial_size); - ndb_df->setTablespace(alter_info->tablespace_name); - return FALSE; -} - -bool set_up_logfile_group(st_alter_tablespace *alter_info, - NdbDictionary::LogfileGroup *ndb_lg) -{ - ndb_lg->setName(alter_info->logfile_group_name); - ndb_lg->setUndoBufferSize(alter_info->undo_buffer_size); - return FALSE; -} - -bool set_up_undofile(st_alter_tablespace *alter_info, - NdbDictionary::Undofile *ndb_uf) -{ - ndb_uf->setPath(alter_info->undo_file_name); - ndb_uf->setSize(alter_info->initial_size); - ndb_uf->setLogfileGroup(alter_info->logfile_group_name); - return FALSE; -} - -int ndbcluster_alter_tablespace(handlerton *hton, - THD* thd, st_alter_tablespace *alter_info) -{ - int is_tablespace= 0; - NdbError err; - NDBDICT *dict; - int error; - const char *errmsg; - Ndb *ndb; - DBUG_ENTER("ha_ndbcluster::alter_tablespace"); - LINT_INIT(errmsg); - - ndb= check_ndb_in_thd(thd); - if (ndb == NULL) - { - DBUG_RETURN(HA_ERR_NO_CONNECTION); - } - dict= ndb->getDictionary(); - - switch (alter_info->ts_cmd_type){ - case (CREATE_TABLESPACE): - { - error= ER_CREATE_FILEGROUP_FAILED; - - NdbDictionary::Tablespace ndb_ts; - NdbDictionary::Datafile ndb_df; - NdbDictionary::ObjectId objid; - if (set_up_tablespace(alter_info, &ndb_ts)) - { - DBUG_RETURN(1); - } - if (set_up_datafile(alter_info, &ndb_df)) - { - DBUG_RETURN(1); - } - errmsg= "TABLESPACE"; - if (dict->createTablespace(ndb_ts, &objid)) - { - DBUG_PRINT("error", ("createTablespace returned %d", error)); - goto ndberror; - } - DBUG_PRINT("alter_info", ("Successfully created Tablespace")); - errmsg= "DATAFILE"; - if (dict->createDatafile(ndb_df)) - { - err= dict->getNdbError(); - NdbDictionary::Tablespace tmp= dict->getTablespace(ndb_ts.getName()); - if (dict->getNdbError().code == 0 && - tmp.getObjectId() == objid.getObjectId() && - tmp.getObjectVersion() == objid.getObjectVersion()) - { - dict->dropTablespace(tmp); - } - - DBUG_PRINT("error", ("createDatafile returned %d", error)); - goto ndberror2; - } - is_tablespace= 1; - break; - } - case (ALTER_TABLESPACE): - { - error= ER_ALTER_FILEGROUP_FAILED; - if (alter_info->ts_alter_tablespace_type == ALTER_TABLESPACE_ADD_FILE) - { - NdbDictionary::Datafile ndb_df; - if (set_up_datafile(alter_info, &ndb_df)) - { - DBUG_RETURN(1); - } - errmsg= " CREATE DATAFILE"; - if (dict->createDatafile(ndb_df)) - { - goto ndberror; - } - } - else if(alter_info->ts_alter_tablespace_type == ALTER_TABLESPACE_DROP_FILE) - { - NdbDictionary::Tablespace ts= dict->getTablespace(alter_info->tablespace_name); - NdbDictionary::Datafile df= dict->getDatafile(0, alter_info->data_file_name); - NdbDictionary::ObjectId objid; - df.getTablespaceId(&objid); - if (ts.getObjectId() == objid.getObjectId() && - strcmp(df.getPath(), alter_info->data_file_name) == 0) - { - errmsg= " DROP DATAFILE"; - if (dict->dropDatafile(df)) - { - goto ndberror; - } - } - else - { - DBUG_PRINT("error", ("No such datafile")); - my_error(ER_ALTER_FILEGROUP_FAILED, MYF(0), " NO SUCH FILE"); - DBUG_RETURN(1); - } - } - else - { - DBUG_PRINT("error", ("Unsupported alter tablespace: %d", - alter_info->ts_alter_tablespace_type)); - DBUG_RETURN(HA_ADMIN_NOT_IMPLEMENTED); - } - is_tablespace= 1; - break; - } - case (CREATE_LOGFILE_GROUP): - { - error= ER_CREATE_FILEGROUP_FAILED; - NdbDictionary::LogfileGroup ndb_lg; - NdbDictionary::Undofile ndb_uf; - NdbDictionary::ObjectId objid; - if (alter_info->undo_file_name == NULL) - { - /* - REDO files in LOGFILE GROUP not supported yet - */ - DBUG_RETURN(HA_ADMIN_NOT_IMPLEMENTED); - } - if (set_up_logfile_group(alter_info, &ndb_lg)) - { - DBUG_RETURN(1); - } - errmsg= "LOGFILE GROUP"; - if (dict->createLogfileGroup(ndb_lg, &objid)) - { - goto ndberror; - } - DBUG_PRINT("alter_info", ("Successfully created Logfile Group")); - if (set_up_undofile(alter_info, &ndb_uf)) - { - DBUG_RETURN(1); - } - errmsg= "UNDOFILE"; - if (dict->createUndofile(ndb_uf)) - { - err= dict->getNdbError(); - NdbDictionary::LogfileGroup tmp= dict->getLogfileGroup(ndb_lg.getName()); - if (dict->getNdbError().code == 0 && - tmp.getObjectId() == objid.getObjectId() && - tmp.getObjectVersion() == objid.getObjectVersion()) - { - dict->dropLogfileGroup(tmp); - } - goto ndberror2; - } - break; - } - case (ALTER_LOGFILE_GROUP): - { - error= ER_ALTER_FILEGROUP_FAILED; - if (alter_info->undo_file_name == NULL) - { - /* - REDO files in LOGFILE GROUP not supported yet - */ - DBUG_RETURN(HA_ADMIN_NOT_IMPLEMENTED); - } - NdbDictionary::Undofile ndb_uf; - if (set_up_undofile(alter_info, &ndb_uf)) - { - DBUG_RETURN(1); - } - errmsg= "CREATE UNDOFILE"; - if (dict->createUndofile(ndb_uf)) - { - goto ndberror; - } - break; - } - case (DROP_TABLESPACE): - { - error= ER_DROP_FILEGROUP_FAILED; - errmsg= "TABLESPACE"; - if (dict->dropTablespace(dict->getTablespace(alter_info->tablespace_name))) - { - goto ndberror; - } - is_tablespace= 1; - break; - } - case (DROP_LOGFILE_GROUP): - { - error= ER_DROP_FILEGROUP_FAILED; - errmsg= "LOGFILE GROUP"; - if (dict->dropLogfileGroup(dict->getLogfileGroup(alter_info->logfile_group_name))) - { - goto ndberror; - } - break; - } - case (CHANGE_FILE_TABLESPACE): - { - DBUG_RETURN(HA_ADMIN_NOT_IMPLEMENTED); - } - case (ALTER_ACCESS_MODE_TABLESPACE): - { - DBUG_RETURN(HA_ADMIN_NOT_IMPLEMENTED); - } - default: - { - DBUG_RETURN(HA_ADMIN_NOT_IMPLEMENTED); - } - } -#ifdef HAVE_NDB_BINLOG - if (is_tablespace) - ndbcluster_log_schema_op(thd, 0, - thd->query(), thd->query_length(), - "", alter_info->tablespace_name, - 0, 0, - SOT_TABLESPACE, 0, 0); - else - ndbcluster_log_schema_op(thd, 0, - thd->query(), thd->query_length(), - "", alter_info->logfile_group_name, - 0, 0, - SOT_LOGFILE_GROUP, 0, 0); -#endif - DBUG_RETURN(FALSE); - -ndberror: - err= dict->getNdbError(); -ndberror2: - set_ndb_err(thd, err); - ndb_to_mysql_error(&err); - - my_error(error, MYF(0), errmsg); - DBUG_RETURN(1); -} - - -bool ha_ndbcluster::get_no_parts(const char *name, uint *num_parts) -{ - Ndb *ndb; - NDBDICT *dict; - int err; - DBUG_ENTER("ha_ndbcluster::get_no_parts"); - LINT_INIT(err); - - set_dbname(name); - set_tabname(name); - for (;;) - { - if (check_ndb_connection()) - { - err= HA_ERR_NO_CONNECTION; - break; - } - ndb= get_ndb(); - ndb->setDatabaseName(m_dbname); - Ndb_table_guard ndbtab_g(dict= ndb->getDictionary(), m_tabname); - if (!ndbtab_g.get_table()) - ERR_BREAK(dict->getNdbError(), err); - *num_parts= ndbtab_g.get_table()->getFragmentCount(); - DBUG_RETURN(FALSE); - } - - print_error(err, MYF(0)); - DBUG_RETURN(TRUE); -} - -static int ndbcluster_fill_files_table(handlerton *hton, - THD *thd, - TABLE_LIST *tables, - COND *cond) -{ - TABLE* table= tables->table; - Ndb *ndb= check_ndb_in_thd(thd); - NdbDictionary::Dictionary* dict= ndb->getDictionary(); - NdbDictionary::Dictionary::List dflist; - NdbError ndberr; - uint i; - DBUG_ENTER("ndbcluster_fill_files_table"); - - dict->listObjects(dflist, NdbDictionary::Object::Datafile); - ndberr= dict->getNdbError(); - if (ndberr.classification != NdbError::NoError) - ERR_RETURN(ndberr); - - for (i= 0; i < dflist.count; i++) - { - NdbDictionary::Dictionary::List::Element& elt = dflist.elements[i]; - Ndb_cluster_connection_node_iter iter; - uint id; - - g_ndb_cluster_connection->init_get_next_node(iter); - - while ((id= g_ndb_cluster_connection->get_next_node(iter))) - { - init_fill_schema_files_row(table); - NdbDictionary::Datafile df= dict->getDatafile(id, elt.name); - ndberr= dict->getNdbError(); - if(ndberr.classification != NdbError::NoError) - { - if (ndberr.classification == NdbError::SchemaError) - continue; - - if (ndberr.classification == NdbError::UnknownResultError) - continue; - - ERR_RETURN(ndberr); - } - NdbDictionary::Tablespace ts= dict->getTablespace(df.getTablespace()); - ndberr= dict->getNdbError(); - if (ndberr.classification != NdbError::NoError) - { - if (ndberr.classification == NdbError::SchemaError) - continue; - ERR_RETURN(ndberr); - } - table->field[IS_FILES_TABLE_CATALOG]->store(STRING_WITH_LEN("def"), - system_charset_info); - table->field[IS_FILES_FILE_NAME]->set_notnull(); - table->field[IS_FILES_FILE_NAME]->store(elt.name, strlen(elt.name), - system_charset_info); - table->field[IS_FILES_FILE_TYPE]->set_notnull(); - table->field[IS_FILES_FILE_TYPE]->store("DATAFILE",8, - system_charset_info); - table->field[IS_FILES_TABLESPACE_NAME]->set_notnull(); - table->field[IS_FILES_TABLESPACE_NAME]->store(df.getTablespace(), - strlen(df.getTablespace()), - system_charset_info); - table->field[IS_FILES_LOGFILE_GROUP_NAME]->set_notnull(); - table->field[IS_FILES_LOGFILE_GROUP_NAME]-> - store(ts.getDefaultLogfileGroup(), - strlen(ts.getDefaultLogfileGroup()), - system_charset_info); - table->field[IS_FILES_ENGINE]->set_notnull(); - table->field[IS_FILES_ENGINE]->store(ndbcluster_hton_name, - ndbcluster_hton_name_length, - system_charset_info); - - table->field[IS_FILES_FREE_EXTENTS]->set_notnull(); - table->field[IS_FILES_FREE_EXTENTS]->store(df.getFree() - / ts.getExtentSize()); - table->field[IS_FILES_TOTAL_EXTENTS]->set_notnull(); - table->field[IS_FILES_TOTAL_EXTENTS]->store(df.getSize() - / ts.getExtentSize()); - table->field[IS_FILES_EXTENT_SIZE]->set_notnull(); - table->field[IS_FILES_EXTENT_SIZE]->store(ts.getExtentSize()); - table->field[IS_FILES_INITIAL_SIZE]->set_notnull(); - table->field[IS_FILES_INITIAL_SIZE]->store(df.getSize()); - table->field[IS_FILES_MAXIMUM_SIZE]->set_notnull(); - table->field[IS_FILES_MAXIMUM_SIZE]->store(df.getSize()); - table->field[IS_FILES_VERSION]->set_notnull(); - table->field[IS_FILES_VERSION]->store(df.getObjectVersion()); - - table->field[IS_FILES_ROW_FORMAT]->set_notnull(); - table->field[IS_FILES_ROW_FORMAT]->store("FIXED", 5, system_charset_info); - - char extra[30]; - int len= my_snprintf(extra, sizeof(extra), "CLUSTER_NODE=%u", id); - table->field[IS_FILES_EXTRA]->set_notnull(); - table->field[IS_FILES_EXTRA]->store(extra, len, system_charset_info); - schema_table_store_record(thd, table); - } - } - - NdbDictionary::Dictionary::List uflist; - dict->listObjects(uflist, NdbDictionary::Object::Undofile); - ndberr= dict->getNdbError(); - if (ndberr.classification != NdbError::NoError) - ERR_RETURN(ndberr); - - for (i= 0; i < uflist.count; i++) - { - NdbDictionary::Dictionary::List::Element& elt= uflist.elements[i]; - Ndb_cluster_connection_node_iter iter; - unsigned id; - - g_ndb_cluster_connection->init_get_next_node(iter); - - while ((id= g_ndb_cluster_connection->get_next_node(iter))) - { - NdbDictionary::Undofile uf= dict->getUndofile(id, elt.name); - ndberr= dict->getNdbError(); - if (ndberr.classification != NdbError::NoError) - { - if (ndberr.classification == NdbError::SchemaError) - continue; - if (ndberr.classification == NdbError::UnknownResultError) - continue; - ERR_RETURN(ndberr); - } - NdbDictionary::LogfileGroup lfg= - dict->getLogfileGroup(uf.getLogfileGroup()); - ndberr= dict->getNdbError(); - if (ndberr.classification != NdbError::NoError) - { - if (ndberr.classification == NdbError::SchemaError) - continue; - ERR_RETURN(ndberr); - } - - init_fill_schema_files_row(table); - table->field[IS_FILES_FILE_NAME]->set_notnull(); - table->field[IS_FILES_FILE_NAME]->store(elt.name, strlen(elt.name), - system_charset_info); - table->field[IS_FILES_FILE_TYPE]->set_notnull(); - table->field[IS_FILES_FILE_TYPE]->store("UNDO LOG", 8, - system_charset_info); - NdbDictionary::ObjectId objid; - uf.getLogfileGroupId(&objid); - table->field[IS_FILES_LOGFILE_GROUP_NAME]->set_notnull(); - table->field[IS_FILES_LOGFILE_GROUP_NAME]->store(uf.getLogfileGroup(), - strlen(uf.getLogfileGroup()), - system_charset_info); - table->field[IS_FILES_LOGFILE_GROUP_NUMBER]->set_notnull(); - table->field[IS_FILES_LOGFILE_GROUP_NUMBER]->store(objid.getObjectId()); - table->field[IS_FILES_ENGINE]->set_notnull(); - table->field[IS_FILES_ENGINE]->store(ndbcluster_hton_name, - ndbcluster_hton_name_length, - system_charset_info); - - table->field[IS_FILES_TOTAL_EXTENTS]->set_notnull(); - table->field[IS_FILES_TOTAL_EXTENTS]->store(uf.getSize()/4); - table->field[IS_FILES_EXTENT_SIZE]->set_notnull(); - table->field[IS_FILES_EXTENT_SIZE]->store(4); - - table->field[IS_FILES_INITIAL_SIZE]->set_notnull(); - table->field[IS_FILES_INITIAL_SIZE]->store(uf.getSize()); - table->field[IS_FILES_MAXIMUM_SIZE]->set_notnull(); - table->field[IS_FILES_MAXIMUM_SIZE]->store(uf.getSize()); - - table->field[IS_FILES_VERSION]->set_notnull(); - table->field[IS_FILES_VERSION]->store(uf.getObjectVersion()); - - char extra[100]; - int len= my_snprintf(extra,sizeof(extra),"CLUSTER_NODE=%u;UNDO_BUFFER_SIZE=%lu", - id, (ulong) lfg.getUndoBufferSize()); - table->field[IS_FILES_EXTRA]->set_notnull(); - table->field[IS_FILES_EXTRA]->store(extra, len, system_charset_info); - schema_table_store_record(thd, table); - } - } - - // now for LFGs - NdbDictionary::Dictionary::List lfglist; - dict->listObjects(lfglist, NdbDictionary::Object::LogfileGroup); - ndberr= dict->getNdbError(); - if (ndberr.classification != NdbError::NoError) - ERR_RETURN(ndberr); - - for (i= 0; i < lfglist.count; i++) - { - NdbDictionary::Dictionary::List::Element& elt= lfglist.elements[i]; - - NdbDictionary::LogfileGroup lfg= dict->getLogfileGroup(elt.name); - ndberr= dict->getNdbError(); - if (ndberr.classification != NdbError::NoError) - { - if (ndberr.classification == NdbError::SchemaError) - continue; - ERR_RETURN(ndberr); - } - - init_fill_schema_files_row(table); - table->field[IS_FILES_FILE_TYPE]->set_notnull(); - table->field[IS_FILES_FILE_TYPE]->store("UNDO LOG", 8, - system_charset_info); - - table->field[IS_FILES_LOGFILE_GROUP_NAME]->set_notnull(); - table->field[IS_FILES_LOGFILE_GROUP_NAME]->store(elt.name, - strlen(elt.name), - system_charset_info); - table->field[IS_FILES_LOGFILE_GROUP_NUMBER]->set_notnull(); - table->field[IS_FILES_LOGFILE_GROUP_NUMBER]->store(lfg.getObjectId()); - table->field[IS_FILES_ENGINE]->set_notnull(); - table->field[IS_FILES_ENGINE]->store(ndbcluster_hton_name, - ndbcluster_hton_name_length, - system_charset_info); - - table->field[IS_FILES_FREE_EXTENTS]->set_notnull(); - table->field[IS_FILES_FREE_EXTENTS]->store(lfg.getUndoFreeWords()); - table->field[IS_FILES_EXTENT_SIZE]->set_notnull(); - table->field[IS_FILES_EXTENT_SIZE]->store(4); - - table->field[IS_FILES_VERSION]->set_notnull(); - table->field[IS_FILES_VERSION]->store(lfg.getObjectVersion()); - - char extra[100]; - int len= my_snprintf(extra,sizeof(extra), - "UNDO_BUFFER_SIZE=%lu", - (ulong) lfg.getUndoBufferSize()); - table->field[IS_FILES_EXTRA]->set_notnull(); - table->field[IS_FILES_EXTRA]->store(extra, len, system_charset_info); - schema_table_store_record(thd, table); - } - DBUG_RETURN(0); -} - -SHOW_VAR ndb_status_variables_export[]= { - {"Ndb", (char*) &ndb_status_variables, SHOW_ARRAY}, - {NullS, NullS, SHOW_LONG} -}; - -static MYSQL_SYSVAR_ULONG( - cache_check_time, /* name */ - opt_ndb_cache_check_time, /* var */ - PLUGIN_VAR_RQCMDARG, - "A dedicated thread is created to, at the given " - "millisecond interval, invalidate the query cache " - "if another MySQL server in the cluster has changed " - "the data in the database.", - NULL, /* check func. */ - NULL, /* update func. */ - 0, /* default */ - 0, /* min */ - ONE_YEAR_IN_SECONDS, /* max */ - 0 /* block */ -); - - -static MYSQL_SYSVAR_ULONG( - extra_logging, /* name */ - opt_ndb_extra_logging, /* var */ - PLUGIN_VAR_OPCMDARG, - "Turn on more logging in the error log.", - NULL, /* check func. */ - NULL, /* update func. */ - 1, /* default */ - 0, /* min */ - 0, /* max */ - 0 /* block */ -); - - -ulong opt_ndb_report_thresh_binlog_epoch_slip; -static MYSQL_SYSVAR_ULONG( - report_thresh_binlog_epoch_slip, /* name */ - opt_ndb_report_thresh_binlog_epoch_slip,/* var */ - PLUGIN_VAR_RQCMDARG, - "Threshold on number of epochs to be behind before reporting binlog " - "status. E.g. 3 means that if the difference between what epoch has " - "been received from the storage nodes and what has been applied to " - "the binlog is 3 or more, a status message will be sent to the cluster " - "log.", - NULL, /* check func. */ - NULL, /* update func. */ - 3, /* default */ - 0, /* min */ - 256, /* max */ - 0 /* block */ -); - - -ulong opt_ndb_report_thresh_binlog_mem_usage; -static MYSQL_SYSVAR_ULONG( - report_thresh_binlog_mem_usage, /* name */ - opt_ndb_report_thresh_binlog_mem_usage,/* var */ - PLUGIN_VAR_RQCMDARG, - "Threshold on percentage of free memory before reporting binlog " - "status. E.g. 10 means that if amount of available memory for " - "receiving binlog data from the storage nodes goes below 10%, " - "a status message will be sent to the cluster log.", - NULL, /* check func. */ - NULL, /* update func. */ - 10, /* default */ - 0, /* min */ - 100, /* max */ - 0 /* block */ -); - - -static MYSQL_SYSVAR_STR( - connectstring, /* name */ - opt_ndb_connectstring, /* var */ - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "Connect string for ndbcluster.", - NULL, /* check func. */ - NULL, /* update func. */ - NULL /* default */ -); - - -static MYSQL_SYSVAR_STR( - mgmd_host, /* name */ - opt_ndb_mgmd_host, /* var */ - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "Set host and port for ndb_mgmd. Syntax: hostname[:port]", - NULL, /* check func. */ - NULL, /* update func. */ - NULL /* default */ -); - - -static MYSQL_SYSVAR_UINT( - nodeid, /* name */ - opt_ndb_nodeid, /* var */ - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "Nodeid for this mysqld in the cluster.", - NULL, /* check func. */ - NULL, /* update func. */ - 0, /* default */ - 0, /* min */ - 255, /* max */ - 0 /* block */ -); - -static struct st_mysql_sys_var* system_variables[]= { - MYSQL_SYSVAR(cache_check_time), - MYSQL_SYSVAR(extra_logging), - MYSQL_SYSVAR(report_thresh_binlog_mem_usage), - MYSQL_SYSVAR(report_thresh_binlog_epoch_slip), - MYSQL_SYSVAR(distribution), - MYSQL_SYSVAR(autoincrement_prefetch_sz), - MYSQL_SYSVAR(force_send), - MYSQL_SYSVAR(use_exact_count), - MYSQL_SYSVAR(use_transactions), - MYSQL_SYSVAR(use_copying_alter_table), - MYSQL_SYSVAR(optimized_node_selection), - MYSQL_SYSVAR(index_stat_enable), - MYSQL_SYSVAR(index_stat_cache_entries), - MYSQL_SYSVAR(index_stat_update_freq), - MYSQL_SYSVAR(connectstring), - MYSQL_SYSVAR(mgmd_host), - MYSQL_SYSVAR(nodeid), - - NULL -}; - - -struct st_mysql_storage_engine ndbcluster_storage_engine= -{ MYSQL_HANDLERTON_INTERFACE_VERSION }; - -mysql_declare_plugin(ndbcluster) -{ - MYSQL_STORAGE_ENGINE_PLUGIN, - &ndbcluster_storage_engine, - ndbcluster_hton_name, - "MySQL AB", - "Clustered, fault-tolerant tables", - PLUGIN_LICENSE_GPL, - ndbcluster_init, /* Plugin Init */ - NULL, /* Plugin Deinit */ - 0x0100 /* 1.0 */, - ndb_status_variables_export,/* status variables */ - system_variables, /* system variables */ - NULL, /* config options */ - 0, /* flags */ -} -mysql_declare_plugin_end; -maria_declare_plugin(ndbcluster) -{ - MYSQL_STORAGE_ENGINE_PLUGIN, - &ndbcluster_storage_engine, - ndbcluster_hton_name, - "MySQL AB", - "Clustered, fault-tolerant tables", - PLUGIN_LICENSE_GPL, - ndbcluster_init, /* Plugin Init */ - NULL, /* Plugin Deinit */ - 0x0100 /* 1.0 */, - ndb_status_variables_export,/* status variables */ - NULL, /* system variables */ - "1.0", /* string version */ - MariaDB_PLUGIN_MATURITY_GAMMA /* maturity */ -} -maria_declare_plugin_end; - -#else -int Sun_ar_require_a_symbol_here= 0; -#endif diff --git a/sql/ha_ndbcluster.h b/sql/ha_ndbcluster.h deleted file mode 100644 index 70e1e9dc7cf..00000000000 --- a/sql/ha_ndbcluster.h +++ /dev/null @@ -1,599 +0,0 @@ -#ifndef HA_NDBCLUSTER_INCLUDED -#define HA_NDBCLUSTER_INCLUDED - -/* Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; version 2 of the License. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ - -/* - This file defines the NDB Cluster handler: the interface between MySQL and - NDB Cluster -*/ - -/* The class defining a handle to an NDB Cluster table */ - -#ifdef USE_PRAGMA_INTERFACE -#pragma interface /* gcc class implementation */ -#endif - -/* Blob tables and events are internal to NDB and must never be accessed */ -#define IS_NDB_BLOB_PREFIX(A) is_prefix(A, "NDB$BLOB") - -#include <NdbApi.hpp> -#include <ndbapi_limits.h> - -#define NDB_HIDDEN_PRIMARY_KEY_LENGTH 8 - -#ifdef HAVE_PSI_INTERFACE -extern PSI_file_key key_file_ndb; -#endif /* HAVE_PSI_INTERFACE */ - - -class Ndb; // Forward declaration -class NdbOperation; // Forward declaration -class NdbTransaction; // Forward declaration -class NdbRecAttr; // Forward declaration -class NdbScanOperation; -class NdbIndexScanOperation; -class NdbBlob; -class NdbIndexStat; -class NdbEventOperation; -class ha_ndbcluster_cond; - -#include "sql_partition.h" /* part_id_range */ - -// connectstring to cluster if given by mysqld -extern const char *ndbcluster_connectstring; - -typedef enum ndb_index_type { - UNDEFINED_INDEX = 0, - PRIMARY_KEY_INDEX = 1, - PRIMARY_KEY_ORDERED_INDEX = 2, - UNIQUE_INDEX = 3, - UNIQUE_ORDERED_INDEX = 4, - ORDERED_INDEX = 5 -} NDB_INDEX_TYPE; - -typedef enum ndb_index_status { - UNDEFINED = 0, - ACTIVE = 1, - TO_BE_DROPPED = 2 -} NDB_INDEX_STATUS; - -typedef struct ndb_index_data { - NDB_INDEX_TYPE type; - NDB_INDEX_STATUS status; - const NdbDictionary::Index *index; - const NdbDictionary::Index *unique_index; - unsigned char *unique_index_attrid_map; - bool null_in_unique_index; - // In this version stats are not shared between threads - NdbIndexStat* index_stat; - uint index_stat_cache_entries; - // Simple counter mechanism to decide when to connect to db - uint index_stat_update_freq; - uint index_stat_query_count; -} NDB_INDEX_DATA; - -typedef enum ndb_write_op { - NDB_INSERT = 0, - NDB_UPDATE = 1, - NDB_PK_UPDATE = 2 -} NDB_WRITE_OP; - -typedef union { const NdbRecAttr *rec; NdbBlob *blob; void *ptr; } NdbValue; - -int get_ndb_blobs_value(TABLE* table, NdbValue* value_array, - uchar*& buffer, uint& buffer_size, - my_ptrdiff_t ptrdiff); - -typedef enum { - NSS_INITIAL= 0, - NSS_DROPPED, - NSS_ALTERED -} NDB_SHARE_STATE; - -typedef struct st_ndbcluster_share { - NDB_SHARE_STATE state; - MEM_ROOT mem_root; - THR_LOCK lock; - mysql_mutex_t mutex; - char *key; - uint key_length; - THD *util_lock; - uint use_count; - uint commit_count_lock; - ulonglong commit_count; - char *db; - char *table_name; - Ndb::TupleIdRange tuple_id_range; -#ifdef HAVE_NDB_BINLOG - uint32 connect_count; - uint32 flags; - NdbEventOperation *op; - NdbEventOperation *op_old; // for rename table - char *old_names; // for rename table - TABLE_SHARE *table_share; - TABLE *table; - uchar *record[2]; // pointer to allocated records for receiving data - NdbValue *ndb_value[2]; - MY_BITMAP *subscriber_bitmap; -#endif -} NDB_SHARE; - -inline -NDB_SHARE_STATE -get_ndb_share_state(NDB_SHARE *share) -{ - NDB_SHARE_STATE state; - mysql_mutex_lock(&share->mutex); - state= share->state; - mysql_mutex_unlock(&share->mutex); - return state; -} - -inline -void -set_ndb_share_state(NDB_SHARE *share, NDB_SHARE_STATE state) -{ - mysql_mutex_lock(&share->mutex); - share->state= state; - mysql_mutex_unlock(&share->mutex); -} - -struct Ndb_tuple_id_range_guard { - Ndb_tuple_id_range_guard(NDB_SHARE* _share) : - share(_share), - range(share->tuple_id_range) { - mysql_mutex_lock(&share->mutex); - } - ~Ndb_tuple_id_range_guard() { - mysql_mutex_unlock(&share->mutex); - } - NDB_SHARE* share; - Ndb::TupleIdRange& range; -}; - -#ifdef HAVE_NDB_BINLOG -/* NDB_SHARE.flags */ -#define NSF_HIDDEN_PK 1 /* table has hidden primary key */ -#define NSF_BLOB_FLAG 2 /* table has blob attributes */ -#define NSF_NO_BINLOG 4 /* table should not be binlogged */ -#endif - -typedef enum ndb_query_state_bits { - NDB_QUERY_NORMAL = 0, - NDB_QUERY_MULTI_READ_RANGE = 1 -} NDB_QUERY_STATE_BITS; - -/* - Place holder for ha_ndbcluster thread specific data -*/ - -enum THD_NDB_OPTIONS -{ - TNO_NO_LOG_SCHEMA_OP= 1 << 0 -}; - -enum THD_NDB_TRANS_OPTIONS -{ - TNTO_INJECTED_APPLY_STATUS= 1 << 0 - ,TNTO_NO_LOGGING= 1 << 1 -}; - -struct Ndb_local_table_statistics { - int no_uncommitted_rows_count; - ulong last_count; - ha_rows records; -}; - -class Thd_ndb -{ - public: - Thd_ndb(); - ~Thd_ndb(); - - void init_open_tables(); - - Ndb *ndb; - ulong count; - uint lock_count; - uint start_stmt_count; - NdbTransaction *trans; - bool m_error; - bool m_slow_path; - int m_error_code; - uint32 m_query_id; /* query id whn m_error_code was set */ - uint32 options; - uint32 trans_options; - List<NDB_SHARE> changed_tables; - uint query_state; - HASH open_tables; -}; - -class ha_ndbcluster: public handler -{ - public: - ha_ndbcluster(handlerton *hton, TABLE_SHARE *table); - ~ha_ndbcluster(); - - int ha_initialise(); - int open(const char *name, int mode, uint test_if_locked); - int close(void); - - int write_row(uchar *buf); - int update_row(const uchar *old_data, uchar *new_data); - int delete_row(const uchar *buf); - int index_init(uint index, bool sorted); - int index_end(); - int index_read(uchar *buf, const uchar *key, uint key_len, - enum ha_rkey_function find_flag); - int index_next(uchar *buf); - int index_prev(uchar *buf); - int index_first(uchar *buf); - int index_last(uchar *buf); - int index_read_last(uchar * buf, const uchar * key, uint key_len); - int rnd_init(bool scan); - int rnd_end(); - int rnd_next(uchar *buf); - int rnd_pos(uchar *buf, uchar *pos); - void position(const uchar *record); - int read_range_first(const key_range *start_key, - const key_range *end_key, - bool eq_range, bool sorted); - int read_range_first_to_buf(const key_range *start_key, - const key_range *end_key, - bool eq_range, bool sorted, - uchar* buf); - int read_range_next(); - int alter_tablespace(st_alter_tablespace *info); - - /** - * Multi range stuff - */ -#if 0 - /* - MRR/NDB is disabled in MariaDB. This is because in MariaDB, we've - backported - - the latest version of MRR interface (BKA needs this) - - the latest version of DS-MRR implementation - but didn't backport the latest version MRR/NDB implementation. - - */ - int read_multi_range_first(KEY_MULTI_RANGE **found_range_p, - KEY_MULTI_RANGE*ranges, uint range_count, - bool sorted, HANDLER_BUFFER *buffer); - int read_multi_range_next(KEY_MULTI_RANGE **found_range_p); -#endif - bool null_value_index_search(KEY_MULTI_RANGE *ranges, - KEY_MULTI_RANGE *end_range, - HANDLER_BUFFER *buffer); - - bool get_error_message(int error, String *buf); - ha_rows records(); - ha_rows estimate_rows_upper_bound() - { return HA_POS_ERROR; } - int info(uint); - void get_dynamic_partition_info(PARTITION_STATS *stat_info, uint part_id); - int extra(enum ha_extra_function operation); - int extra_opt(enum ha_extra_function operation, ulong cache_size); - int reset(); - int external_lock(THD *thd, int lock_type); - void unlock_row(); - int start_stmt(THD *thd, thr_lock_type lock_type); - void print_error(int error, myf errflag); - const char * table_type() const; - const char ** bas_ext() const; - ulonglong table_flags(void) const; - void prepare_for_alter(); - int add_index(TABLE *table_arg, KEY *key_info, uint num_of_keys); - int prepare_drop_index(TABLE *table_arg, uint *key_num, uint num_of_keys); - int final_drop_index(TABLE *table_arg); - void set_part_info(partition_info *part_info); - ulong index_flags(uint idx, uint part, bool all_parts) const; - uint max_supported_record_length() const; - uint max_supported_keys() const; - uint max_supported_key_parts() const; - uint max_supported_key_length() const; - uint max_supported_key_part_length() const; - - int rename_table(const char *from, const char *to); - int delete_table(const char *name); - int create(const char *name, TABLE *form, HA_CREATE_INFO *info); - int create_handler_files(const char *file, const char *old_name, - int action_flag, HA_CREATE_INFO *info); - int get_default_no_partitions(HA_CREATE_INFO *info); - bool get_no_parts(const char *name, uint *no_parts); - void set_auto_partitions(partition_info *part_info); - virtual bool is_fatal_error(int error, uint flags) - { - if (!handler::is_fatal_error(error, flags) || - error == HA_ERR_NO_PARTITION_FOUND) - return FALSE; - return TRUE; - } - - THR_LOCK_DATA **store_lock(THD *thd, - THR_LOCK_DATA **to, - enum thr_lock_type lock_type); - - bool low_byte_first() const; - - const char* index_type(uint key_number); - - double scan_time(); - ha_rows records_in_range(uint inx, key_range *min_key, key_range *max_key); - void start_bulk_insert(ha_rows rows); - int end_bulk_insert(); - - static Thd_ndb* seize_thd_ndb(); - static void release_thd_ndb(Thd_ndb* thd_ndb); - -static void set_dbname(const char *pathname, char *dbname); -static void set_tabname(const char *pathname, char *tabname); - - /* - Condition pushdown - */ - - /* - Push condition down to the table handler. - SYNOPSIS - cond_push() - cond Condition to be pushed. The condition tree must not be - modified by the by the caller. - RETURN - The 'remainder' condition that caller must use to filter out records. - NULL means the handler will not return rows that do not match the - passed condition. - NOTES - The pushed conditions form a stack (from which one can remove the - last pushed condition using cond_pop). - The table handler filters out rows using (pushed_cond1 AND pushed_cond2 - AND ... AND pushed_condN) - or less restrictive condition, depending on handler's capabilities. - - handler->reset() call empties the condition stack. - Calls to rnd_init/rnd_end, index_init/index_end etc do not affect the - condition stack. - The current implementation supports arbitrary AND/OR nested conditions - with comparisons between columns and constants (including constant - expressions and function calls) and the following comparison operators: - =, !=, >, >=, <, <=, like, "not like", "is null", and "is not null". - Negated conditions are supported by NOT which generate NAND/NOR groups. - */ - const COND *cond_push(const COND *cond); - /* - Pop the top condition from the condition stack of the handler instance. - SYNOPSIS - cond_pop() - Pops the top if condition stack, if stack is not empty - */ - void cond_pop(); - - uint8 table_cache_type(); - - /* - * Internal to ha_ndbcluster, used by C functions - */ - int ndb_err(NdbTransaction*); - - my_bool register_query_cache_table(THD *thd, char *table_key, - uint key_length, - qc_engine_callback *engine_callback, - ulonglong *engine_data); - - bool check_if_incompatible_data(HA_CREATE_INFO *info, - uint table_changes); - -private: - int loc_read_multi_range_next(KEY_MULTI_RANGE **found_range_p); - friend int ndbcluster_drop_database_impl(const char *path); - friend int ndb_handle_schema_change(THD *thd, - Ndb *ndb, NdbEventOperation *pOp, - NDB_SHARE *share); - - static int delete_table(ha_ndbcluster *h, Ndb *ndb, - const char *path, - const char *db, - const char *table_name); - int create_ndb_index(const char *name, KEY *key_info, bool unique); - int create_ordered_index(const char *name, KEY *key_info); - int create_unique_index(const char *name, KEY *key_info); - int create_index(const char *name, KEY *key_info, - NDB_INDEX_TYPE idx_type, uint idx_no); -// Index list management - int create_indexes(Ndb *ndb, TABLE *tab); - int open_indexes(Ndb *ndb, TABLE *tab, bool ignore_error); - void renumber_indexes(Ndb *ndb, TABLE *tab); - int drop_indexes(Ndb *ndb, TABLE *tab); - int add_index_handle(THD *thd, NdbDictionary::Dictionary *dict, - KEY *key_info, const char *index_name, uint index_no); - int get_metadata(const char* path); - void release_metadata(THD *thd, Ndb *ndb); - NDB_INDEX_TYPE get_index_type(uint idx_no) const; - NDB_INDEX_TYPE get_index_type_from_table(uint index_no) const; - NDB_INDEX_TYPE get_index_type_from_key(uint index_no, KEY *key_info, - bool primary) const; - bool has_null_in_unique_index(uint idx_no) const; - bool check_index_fields_not_null(KEY *key_info); - - uint set_up_partition_info(partition_info *part_info, - TABLE *table, - void *tab); - char* get_tablespace_name(THD *thd, char *name, uint name_len); - int set_range_data(void *tab, partition_info* part_info); - int set_list_data(void *tab, partition_info* part_info); - int complemented_read(const uchar *old_data, uchar *new_data, - uint32 old_part_id); - int pk_read(const uchar *key, uint key_len, uchar *buf, uint32 part_id); - int ordered_index_scan(const key_range *start_key, - const key_range *end_key, - bool sorted, bool descending, uchar* buf, - part_id_range *part_spec); - int unique_index_read(const uchar *key, uint key_len, - uchar *buf); - int unique_index_scan(const KEY* key_info, - const uchar *key, - uint key_len, - uchar *buf); - int full_table_scan(uchar * buf); - - bool check_all_operations_for_error(NdbTransaction *trans, - const NdbOperation *first, - const NdbOperation *last, - uint errcode); - int peek_indexed_rows(const uchar *record, NDB_WRITE_OP write_op); - int fetch_next(NdbScanOperation* op); - int set_auto_inc(Field *field); - int next_result(uchar *buf); - int define_read_attrs(uchar* buf, NdbOperation* op); - int filtered_scan(const uchar *key, uint key_len, - uchar *buf, - enum ha_rkey_function find_flag); - int close_scan(); - void unpack_record(uchar *buf); - int get_ndb_lock_type(enum thr_lock_type type); - - void set_dbname(const char *pathname); - void set_tabname(const char *pathname); - - bool set_hidden_key(NdbOperation*, - uint fieldnr, const uchar* field_ptr); - int set_ndb_key(NdbOperation*, Field *field, - uint fieldnr, const uchar* field_ptr); - int set_ndb_value(NdbOperation*, Field *field, uint fieldnr, - int row_offset= 0, bool *set_blob_value= 0); - int get_ndb_value(NdbOperation*, Field *field, uint fieldnr, uchar*); - int get_ndb_partition_id(NdbOperation *); - friend int g_get_ndb_blobs_value(NdbBlob *ndb_blob, void *arg); - int set_primary_key(NdbOperation *op, const uchar *key); - int set_primary_key_from_record(NdbOperation *op, const uchar *record); - bool check_index_fields_in_write_set(uint keyno); - int set_index_key_from_record(NdbOperation *op, const uchar *record, - uint keyno); - int set_bounds(NdbIndexScanOperation*, uint inx, bool rir, - const key_range *keys[2], uint= 0); - int key_cmp(uint keynr, const uchar * old_row, const uchar * new_row); - int set_index_key(NdbOperation *, const KEY *key_info, const uchar *key_ptr); - void print_results(); - - virtual void get_auto_increment(ulonglong offset, ulonglong increment, - ulonglong nb_desired_values, - ulonglong *first_value, - ulonglong *nb_reserved_values); - bool uses_blob_value(); - - char *update_table_comment(const char * comment); - - int write_ndb_file(const char *name); - - int check_ndb_connection(THD* thd= current_thd); - - void set_rec_per_key(); - int records_update(); - void no_uncommitted_rows_execute_failure(); - void no_uncommitted_rows_update(int); - void no_uncommitted_rows_reset(THD *); - - void release_completed_operations(NdbTransaction*, bool); - - friend int execute_commit(ha_ndbcluster*, NdbTransaction*); - friend int execute_no_commit_ignore_no_key(ha_ndbcluster*, NdbTransaction*); - friend int execute_no_commit(ha_ndbcluster*, NdbTransaction*, bool); - friend int execute_no_commit_ie(ha_ndbcluster*, NdbTransaction*, bool); - - void transaction_checks(THD *thd); - int start_statement(THD *thd, Thd_ndb *thd_ndb, Ndb* ndb); - int init_handler_for_statement(THD *thd, Thd_ndb *thd_ndb); - - NdbTransaction *m_active_trans; - NdbScanOperation *m_active_cursor; - const NdbDictionary::Table *m_table; - struct Ndb_local_table_statistics *m_table_info; - struct Ndb_local_table_statistics m_table_info_instance; - char m_dbname[FN_HEADLEN]; - //char m_schemaname[FN_HEADLEN]; - char m_tabname[FN_HEADLEN]; - ulonglong m_table_flags; - THR_LOCK_DATA m_lock; - bool m_lock_tuple; - NDB_SHARE *m_share; - NDB_INDEX_DATA m_index[MAX_KEY]; - // NdbRecAttr has no reference to blob - NdbValue m_value[NDB_MAX_ATTRIBUTES_IN_TABLE]; - uchar m_ref[NDB_HIDDEN_PRIMARY_KEY_LENGTH]; - partition_info *m_part_info; - uint32 m_part_id; - uchar *m_rec0; - Field **m_part_field_array; - bool m_use_partition_function; - bool m_sorted; - bool m_use_write; - bool m_ignore_dup_key; - bool m_has_unique_index; - bool m_primary_key_update; - bool m_write_op; - bool m_ignore_no_key; - ha_rows m_rows_to_insert; // TODO: merge it with handler::estimation_rows_to_insert? - ha_rows m_rows_inserted; - ha_rows m_bulk_insert_rows; - ha_rows m_rows_changed; - bool m_bulk_insert_not_flushed; - bool m_delete_cannot_batch; - bool m_update_cannot_batch; - ha_rows m_ops_pending; - bool m_skip_auto_increment; - bool m_blobs_pending; - bool m_slow_path; - my_ptrdiff_t m_blobs_offset; - // memory for blobs in one tuple - uchar *m_blobs_buffer; - uint32 m_blobs_buffer_size; - uint m_dupkey; - // set from thread variables at external lock - bool m_ha_not_exact_count; - bool m_force_send; - ha_rows m_autoincrement_prefetch; - bool m_transaction_on; - - ha_ndbcluster_cond *m_cond; - bool m_disable_multi_read; - uchar *m_multi_range_result_ptr; - KEY_MULTI_RANGE *m_multi_ranges; - KEY_MULTI_RANGE *m_multi_range_defined; - const NdbOperation *m_current_multi_operation; - NdbIndexScanOperation *m_multi_cursor; - uchar *m_multi_range_cursor_result_ptr; - int setup_recattr(const NdbRecAttr*); - Ndb *get_ndb(); -}; - -extern SHOW_VAR ndb_status_variables[]; - -int ndbcluster_discover(THD* thd, const char* dbname, const char* name, - const void** frmblob, uint* frmlen); -int ndbcluster_find_files(THD *thd,const char *db,const char *path, - const char *wild, bool dir, List<LEX_STRING> *files); -int ndbcluster_table_exists_in_engine(THD* thd, - const char *db, const char *name); -void ndbcluster_print_error(int error, const NdbOperation *error_op); - -static const char ndbcluster_hton_name[]= "ndbcluster"; -static const int ndbcluster_hton_name_length=sizeof(ndbcluster_hton_name)-1; -extern int ndbcluster_terminating; -extern int ndb_util_thread_running; -extern mysql_cond_t COND_ndb_util_ready; - -#endif /* HA_NDBCLUSTER_INCLUDED */ diff --git a/sql/ha_ndbcluster_binlog.cc b/sql/ha_ndbcluster_binlog.cc deleted file mode 100644 index 531211eb175..00000000000 --- a/sql/ha_ndbcluster_binlog.cc +++ /dev/null @@ -1,4425 +0,0 @@ -/* Copyright (c) 2006, 2013, Oracle and/or its affiliates. - Copyright (c) 2012, 2013, Monty Proram Ab. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; version 2 of the License. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -*/ - -#include "sql_priv.h" -#include "unireg.h" // REQUIRED: for other includes -#include "sql_show.h" -#ifdef WITH_NDBCLUSTER_STORAGE_ENGINE -#include "ha_ndbcluster.h" - -#ifdef HAVE_NDB_BINLOG -#include "rpl_injector.h" -#include "rpl_filter.h" -#include "slave.h" -#include "ha_ndbcluster_binlog.h" -#include "NdbDictionary.hpp" -#include "ndb_cluster_connection.hpp" -#include <util/NdbAutoPtr.hpp> - -#include "sql_base.h" // close_thread_tables -#include "sql_table.h" // build_table_filename -#include "table.h" // open_table_from_share -#include "discover.h" // readfrm, writefrm -#include "lock.h" // MYSQL_LOCK_IGNORE_FLUSH, - // mysql_unlock_tables -#include "sql_parse.h" // mysql_parse -#include "transaction.h" - -#ifdef ndb_dynamite -#undef assert -#define assert(x) do { if(x) break; ::printf("%s %d: assert failed: %s\n", __FILE__, __LINE__, #x); ::fflush(stdout); ::signal(SIGABRT,SIG_DFL); ::abort(); ::kill(::getpid(),6); ::kill(::getpid(),9); } while (0) -#endif - -extern my_bool opt_ndb_log_binlog_index; -extern ulong opt_ndb_extra_logging; -/* - defines for cluster replication table names -*/ -#include "ha_ndbcluster_tables.h" -#define NDB_APPLY_TABLE_FILE "./" NDB_REP_DB "/" NDB_APPLY_TABLE -#define NDB_SCHEMA_TABLE_FILE "./" NDB_REP_DB "/" NDB_SCHEMA_TABLE - -/* - Timeout for syncing schema events between - mysql servers, and between mysql server and the binlog -*/ -static const int DEFAULT_SYNC_TIMEOUT= 120; - - -/* - Flag showing if the ndb injector thread is running, if so == 1 - -1 if it was started but later stopped for some reason - 0 if never started -*/ -static int ndb_binlog_thread_running= 0; - -/* - Flag showing if the ndb binlog should be created, if so == TRUE - FALSE if not -*/ -my_bool ndb_binlog_running= FALSE; -my_bool ndb_binlog_tables_inited= FALSE; - -/* - Global reference to the ndb injector thread THD oject - - Has one sole purpose, for setting the in_use table member variable - in get_share(...) -*/ -THD *injector_thd= 0; - -/* - Global reference to ndb injector thd object. - - Used mainly by the binlog index thread, but exposed to the client sql - thread for one reason; to setup the events operations for a table - to enable ndb injector thread receiving events. - - Must therefore always be used with a surrounding - mysql_mutex_lock(&injector_mutex), when doing create/dropEventOperation -*/ -static Ndb *injector_ndb= 0; -static Ndb *schema_ndb= 0; - -static int ndbcluster_binlog_inited= 0; -/* - Flag "ndbcluster_binlog_terminating" set when shutting down mysqld. - Server main loop should call handlerton function: - - ndbcluster_hton->binlog_func == - ndbcluster_binlog_func(...,BFN_BINLOG_END,...) == - ndbcluster_binlog_end - - at shutdown, which sets the flag. And then server needs to wait for it - to complete. Otherwise binlog will not be complete. - - ndbcluster_hton->panic == ndbcluster_end() will not return until - ndb binlog is completed -*/ -static int ndbcluster_binlog_terminating= 0; - -/* - Mutex and condition used for interacting between client sql thread - and injector thread -*/ -pthread_t ndb_binlog_thread; -mysql_mutex_t injector_mutex; -mysql_cond_t injector_cond; - -/* NDB Injector thread (used for binlog creation) */ -static ulonglong ndb_latest_applied_binlog_epoch= 0; -static ulonglong ndb_latest_handled_binlog_epoch= 0; -static ulonglong ndb_latest_received_binlog_epoch= 0; - -NDB_SHARE *ndb_apply_status_share= 0; -NDB_SHARE *ndb_schema_share= 0; -mysql_mutex_t ndb_schema_share_mutex; - -extern my_bool opt_log_slave_updates; -static my_bool g_ndb_log_slave_updates; - -/* Schema object distribution handling */ -HASH ndb_schema_objects; -typedef struct st_ndb_schema_object { - mysql_mutex_t mutex; - char *key; - uint key_length; - uint use_count; - MY_BITMAP slock_bitmap; - uint32 slock[256/32]; // 256 bits for lock status of table -} NDB_SCHEMA_OBJECT; -static NDB_SCHEMA_OBJECT *ndb_get_schema_object(const char *key, - my_bool create_if_not_exists, - my_bool have_lock); -static void ndb_free_schema_object(NDB_SCHEMA_OBJECT **ndb_schema_object, - bool have_lock); - -static Uint64 *p_latest_trans_gci= 0; - -/* - Global variables for holding the ndb_binlog_index table reference -*/ -static TABLE *ndb_binlog_index= 0; -static TABLE_LIST binlog_tables; - -/* - Helper functions -*/ - -#ifndef DBUG_OFF -/* purecov: begin deadcode */ -static void print_records(TABLE *table, const uchar *record) -{ - for (uint j= 0; j < table->s->fields; j++) - { - char buf[40]; - int pos= 0; - Field *field= table->field[j]; - const uchar* field_ptr= field->ptr - table->record[0] + record; - int pack_len= field->pack_length(); - int n= pack_len < 10 ? pack_len : 10; - - for (int i= 0; i < n && pos < 20; i++) - { - pos+= sprintf(&buf[pos]," %x", (int) (uchar) field_ptr[i]); - } - buf[pos]= 0; - DBUG_PRINT("info",("[%u]field_ptr[0->%d]: %s", j, n, buf)); - } -} -/* purecov: end */ -#else -#define print_records(a,b) -#endif - - -#ifndef DBUG_OFF -static void dbug_print_table(const char *info, TABLE *table) -{ - if (table == 0) - { - DBUG_PRINT("info",("%s: (null)", info)); - return; - } - DBUG_PRINT("info", - ("%s: %s.%s s->fields: %d " - "reclength: %lu rec_buff_length: %u record[0]: 0x%lx " - "record[1]: 0x%lx", - info, - table->s->db.str, - table->s->table_name.str, - table->s->fields, - table->s->reclength, - table->s->rec_buff_length, - (long) table->record[0], - (long) table->record[1])); - - for (unsigned int i= 0; i < table->s->fields; i++) - { - Field *f= table->field[i]; - DBUG_PRINT("info", - ("[%d] \"%s\"(0x%lx:%s%s%s%s%s%s) type: %d pack_length: %d " - "ptr: 0x%lx[+%d] null_bit: %u null_ptr: 0x%lx[+%d]", - i, - f->field_name, - (long) f->flags, - (f->flags & PRI_KEY_FLAG) ? "pri" : "attr", - (f->flags & NOT_NULL_FLAG) ? "" : ",nullable", - (f->flags & UNSIGNED_FLAG) ? ",unsigned" : ",signed", - (f->flags & ZEROFILL_FLAG) ? ",zerofill" : "", - (f->flags & BLOB_FLAG) ? ",blob" : "", - (f->flags & BINARY_FLAG) ? ",binary" : "", - f->real_type(), - f->pack_length(), - (long) f->ptr, (int) (f->ptr - table->record[0]), - f->null_bit, - (long) f->null_ptr, - (int) ((uchar*) f->null_ptr - table->record[0]))); - if (f->type() == MYSQL_TYPE_BIT) - { - Field_bit *g= (Field_bit*) f; - DBUG_PRINT("MYSQL_TYPE_BIT",("field_length: %d bit_ptr: 0x%lx[+%d] " - "bit_ofs: %d bit_len: %u", - g->field_length, (long) g->bit_ptr, - (int) ((uchar*) g->bit_ptr - - table->record[0]), - g->bit_ofs, g->bit_len)); - } - } -} -#else -#define dbug_print_table(a,b) -#endif - - -/* - Run a query through mysql_parse - - Used to: - - purging the ndb_binlog_index - - creating the ndb_apply_status table -*/ -static void run_query(THD *thd, char *buf, char *end, - const int *no_print_error, my_bool disable_binlog) -{ - ulong save_thd_query_length= thd->query_length(); - char *save_thd_query= thd->query(); - ulong save_thread_id= thd->variables.pseudo_thread_id; - struct system_status_var save_thd_status_var= thd->status_var; - THD_TRANS save_thd_transaction_all= thd->transaction.all; - THD_TRANS save_thd_transaction_stmt= thd->transaction.stmt; - ulonglong save_thd_options= thd->variables.option_bits; - DBUG_ASSERT(sizeof(save_thd_options) == sizeof(thd->variables.option_bits)); - NET save_thd_net= thd->net; - - bzero((char*) &thd->net, sizeof(NET)); - thd->set_query(buf, (uint) (end - buf)); - thd->variables.pseudo_thread_id= thread_id; - thd->transaction.stmt.modified_non_trans_table= FALSE; - if (disable_binlog) - thd->variables.option_bits&= ~OPTION_BIN_LOG; - - DBUG_PRINT("query", ("%s", thd->query())); - - DBUG_ASSERT(!thd->in_sub_stmt); - DBUG_ASSERT(!thd->locked_tables_mode); - - { - Parser_state parser_state; - if (!parser_state.init(thd, thd->query(), thd->query_length())) - mysql_parse(thd, thd->query(), thd->query_length(), &parser_state); - } - - if (no_print_error && thd->is_slave_error) - { - int i; - Thd_ndb *thd_ndb= get_thd_ndb(thd); - for (i= 0; no_print_error[i]; i++) - if ((thd_ndb->m_error_code == no_print_error[i]) || - (thd->get_stmt_da()->sql_errno() == (unsigned) no_print_error[i])) - break; - if (!no_print_error[i]) - sql_print_error("NDB: %s: error %s %d(ndb: %d) %d %d", - buf, - thd->get_stmt_da()->message(), - thd->get_stmt_da()->sql_errno(), - thd_ndb->m_error_code, - (int) thd->is_error(), thd->is_slave_error); - } - /* - XXX: this code is broken. mysql_parse()/mysql_reset_thd_for_next_command() - can not be called from within a statement, and - run_query() can be called from anywhere, including from within - a sub-statement. - This particular reset is a temporary hack to avoid an assert - for double assignment of the diagnostics area when run_query() - is called from ndbcluster_reset_logs(), which is called from - mysql_flush(). - */ - thd->get_stmt_da()->reset_diagnostics_area(); - - thd->variables.option_bits= save_thd_options; - thd->set_query(save_thd_query, save_thd_query_length); - thd->variables.pseudo_thread_id= save_thread_id; - thd->status_var= save_thd_status_var; - thd->transaction.all= save_thd_transaction_all; - thd->transaction.stmt= save_thd_transaction_stmt; - thd->net= save_thd_net; - thd->set_current_stmt_binlog_format_row(); - - if (thd == injector_thd) - { - /* - running the query will close all tables, including the ndb_binlog_index - used in injector_thd - */ - ndb_binlog_index= 0; - } -} - -static void -ndbcluster_binlog_close_table(THD *thd, NDB_SHARE *share) -{ - DBUG_ENTER("ndbcluster_binlog_close_table"); - if (share->table_share) - { - closefrm(share->table, 1); - share->table_share= 0; - share->table= 0; - } - DBUG_ASSERT(share->table == 0); - DBUG_VOID_RETURN; -} - - -/* - Creates a TABLE object for the ndb cluster table - - NOTES - This does not open the underlying table -*/ - -static int -ndbcluster_binlog_open_table(THD *thd, NDB_SHARE *share, - TABLE_SHARE *table_share, TABLE *table, - int reopen) -{ - int error; - DBUG_ENTER("ndbcluster_binlog_open_table"); - - init_tmp_table_share(thd, table_share, share->db, 0, share->table_name, - share->key); - if ((error= open_table_def(thd, table_share, 0))) - { - DBUG_PRINT("error", ("open_table_def failed: %d my_errno: %d", error, my_errno)); - free_table_share(table_share); - DBUG_RETURN(error); - } - if ((error= open_table_from_share(thd, table_share, "", 0 /* fon't allocate buffers */, - (uint) READ_ALL, 0, table, FALSE))) - { - DBUG_PRINT("error", ("open_table_from_share failed %d my_errno: %d", error, my_errno)); - free_table_share(table_share); - DBUG_RETURN(error); - } - tdc_assign_new_table_id(table_share); - - if (!reopen) - { - // allocate memory on ndb share so it can be reused after online alter table - (void)multi_alloc_root(&share->mem_root, - &(share->record[0]), table->s->rec_buff_length, - &(share->record[1]), table->s->rec_buff_length, - NULL); - } - { - my_ptrdiff_t row_offset= share->record[0] - table->record[0]; - Field **p_field; - for (p_field= table->field; *p_field; p_field++) - (*p_field)->move_field_offset(row_offset); - table->record[0]= share->record[0]; - table->record[1]= share->record[1]; - } - - table->in_use= injector_thd; - - table->s->db.str= share->db; - table->s->db.length= strlen(share->db); - table->s->table_name.str= share->table_name; - table->s->table_name.length= strlen(share->table_name); - - DBUG_ASSERT(share->table_share == 0); - share->table_share= table_share; - DBUG_ASSERT(share->table == 0); - share->table= table; - /* We can't use 'use_all_columns()' as the file object is not setup yet */ - table->column_bitmaps_set_no_signal(&table->s->all_set, &table->s->all_set); -#ifndef DBUG_OFF - dbug_print_table("table", table); -#endif - DBUG_RETURN(0); -} - - -/* - Initialize the binlog part of the NDB_SHARE -*/ -int ndbcluster_binlog_init_share(NDB_SHARE *share, TABLE *_table) -{ - THD *thd= current_thd; - MEM_ROOT *mem_root= &share->mem_root; - int do_event_op= ndb_binlog_running; - int error= 0; - DBUG_ENTER("ndbcluster_binlog_init_share"); - - share->connect_count= g_ndb_cluster_connection->get_connect_count(); - - share->op= 0; - share->table= 0; - - if (!ndb_schema_share && - strcmp(share->db, NDB_REP_DB) == 0 && - strcmp(share->table_name, NDB_SCHEMA_TABLE) == 0) - do_event_op= 1; - else if (!ndb_apply_status_share && - strcmp(share->db, NDB_REP_DB) == 0 && - strcmp(share->table_name, NDB_APPLY_TABLE) == 0) - do_event_op= 1; - - { - int i, no_nodes= g_ndb_cluster_connection->no_db_nodes(); - share->subscriber_bitmap= (MY_BITMAP*) - alloc_root(mem_root, no_nodes * sizeof(MY_BITMAP)); - for (i= 0; i < no_nodes; i++) - { - my_bitmap_init(&share->subscriber_bitmap[i], - (Uint32*)alloc_root(mem_root, max_ndb_nodes/8), - max_ndb_nodes, FALSE); - bitmap_clear_all(&share->subscriber_bitmap[i]); - } - } - - if (!do_event_op) - { - if (_table) - { - if (_table->s->primary_key == MAX_KEY) - share->flags|= NSF_HIDDEN_PK; - if (_table->s->blob_fields != 0) - share->flags|= NSF_BLOB_FLAG; - } - else - { - share->flags|= NSF_NO_BINLOG; - } - DBUG_RETURN(error); - } - while (1) - { - int error; - TABLE_SHARE *table_share= (TABLE_SHARE *) alloc_root(mem_root, sizeof(*table_share)); - TABLE *table= (TABLE*) alloc_root(mem_root, sizeof(*table)); - if ((error= ndbcluster_binlog_open_table(thd, share, table_share, table, 0))) - break; - /* - ! do not touch the contents of the table - it may be in use by the injector thread - */ - MEM_ROOT *mem_root= &share->mem_root; - share->ndb_value[0]= (NdbValue*) - alloc_root(mem_root, sizeof(NdbValue) * - (table->s->fields + 2 /*extra for hidden key and part key*/)); - share->ndb_value[1]= (NdbValue*) - alloc_root(mem_root, sizeof(NdbValue) * - (table->s->fields + 2 /*extra for hidden key and part key*/)); - - if (table->s->primary_key == MAX_KEY) - share->flags|= NSF_HIDDEN_PK; - if (table->s->blob_fields != 0) - share->flags|= NSF_BLOB_FLAG; - break; - } - DBUG_RETURN(error); -} - -/***************************************************************** - functions called from master sql client threads -****************************************************************/ - -/* - called in mysql_show_binlog_events and reset_logs to make sure we wait for - all events originating from this mysql server to arrive in the binlog - - Wait for the last epoch in which the last transaction is a part of. - - Wait a maximum of 30 seconds. -*/ -static void ndbcluster_binlog_wait(THD *thd) -{ - if (ndb_binlog_running) - { - DBUG_ENTER("ndbcluster_binlog_wait"); - const char *save_info= thd ? thd->proc_info : 0; - ulonglong wait_epoch= *p_latest_trans_gci; - int count= 30; - if (thd) - thd->proc_info= "Waiting for ndbcluster binlog update to " - "reach current position"; - while (count && ndb_binlog_running && - ndb_latest_handled_binlog_epoch < wait_epoch) - { - count--; - sleep(1); - } - if (thd) - thd->proc_info= save_info; - DBUG_VOID_RETURN; - } -} - -/* - Called from MYSQL_BIN_LOG::reset_logs in log.cc when binlog is emptied -*/ -static int ndbcluster_reset_logs(THD *thd) -{ - if (!ndb_binlog_running) - return 0; - - DBUG_ENTER("ndbcluster_reset_logs"); - - /* - Wait for all events orifinating from this mysql server has - reached the binlog before continuing to reset - */ - ndbcluster_binlog_wait(thd); - - char buf[1024]; - char *end= strmov(buf, "DELETE FROM " NDB_REP_DB "." NDB_REP_TABLE); - - run_query(thd, buf, end, NULL, TRUE); - - DBUG_RETURN(0); -} - -/* - Called from MYSQL_BIN_LOG::purge_logs in log.cc when the binlog "file" - is removed -*/ - -static int -ndbcluster_binlog_index_purge_file(THD *thd, const char *file) -{ - if (!ndb_binlog_running || thd->slave_thread) - return 0; - - DBUG_ENTER("ndbcluster_binlog_index_purge_file"); - DBUG_PRINT("enter", ("file: %s", file)); - - char buf[1024]; - char *end= strmov(strmov(strmov(buf, - "DELETE FROM " - NDB_REP_DB "." NDB_REP_TABLE - " WHERE File='"), file), "'"); - - run_query(thd, buf, end, NULL, TRUE); - - DBUG_RETURN(0); -} - -static void -ndbcluster_binlog_log_query(handlerton *hton, THD *thd, enum_binlog_command binlog_command, - const char *query, uint query_length, - const char *db, const char *table_name) -{ - DBUG_ENTER("ndbcluster_binlog_log_query"); - DBUG_PRINT("enter", ("db: %s table_name: %s query: %s", - db, table_name, query)); - enum SCHEMA_OP_TYPE type; - int log= 0; - switch (binlog_command) - { - case LOGCOM_CREATE_TABLE: - type= SOT_CREATE_TABLE; - DBUG_ASSERT(FALSE); - break; - case LOGCOM_ALTER_TABLE: - type= SOT_ALTER_TABLE; - log= 1; - break; - case LOGCOM_RENAME_TABLE: - type= SOT_RENAME_TABLE; - DBUG_ASSERT(FALSE); - break; - case LOGCOM_DROP_TABLE: - type= SOT_DROP_TABLE; - DBUG_ASSERT(FALSE); - break; - case LOGCOM_CREATE_DB: - type= SOT_CREATE_DB; - log= 1; - break; - case LOGCOM_ALTER_DB: - type= SOT_ALTER_DB; - log= 1; - break; - case LOGCOM_DROP_DB: - type= SOT_DROP_DB; - DBUG_ASSERT(FALSE); - break; - } - if (log) - { - ndbcluster_log_schema_op(thd, 0, query, query_length, - db, table_name, 0, 0, type, - 0, 0); - } - DBUG_VOID_RETURN; -} - - -/* - End use of the NDB Cluster binlog - - wait for binlog thread to shutdown -*/ - -static int ndbcluster_binlog_end(THD *thd) -{ - DBUG_ENTER("ndbcluster_binlog_end"); - - if (!ndbcluster_binlog_inited) - DBUG_RETURN(0); - ndbcluster_binlog_inited= 0; - -#ifdef HAVE_NDB_BINLOG - if (ndb_util_thread_running > 0) - { - /* - Wait for util thread to die (as this uses the injector mutex) - There is a very small change that ndb_util_thread dies and the - following mutex is freed before it's accessed. This shouldn't - however be a likely case as the ndbcluster_binlog_end is supposed to - be called before ndb_cluster_end(). - */ - mysql_mutex_lock(&LOCK_ndb_util_thread); - /* Ensure mutex are not freed if ndb_cluster_end is running at same time */ - ndb_util_thread_running++; - ndbcluster_terminating= 1; - mysql_cond_signal(&COND_ndb_util_thread); - while (ndb_util_thread_running > 1) - mysql_cond_wait(&COND_ndb_util_ready, &LOCK_ndb_util_thread); - ndb_util_thread_running--; - mysql_mutex_unlock(&LOCK_ndb_util_thread); - } - - /* wait for injector thread to finish */ - ndbcluster_binlog_terminating= 1; - mysql_mutex_lock(&injector_mutex); - mysql_cond_signal(&injector_cond); - while (ndb_binlog_thread_running > 0) - mysql_cond_wait(&injector_cond, &injector_mutex); - mysql_mutex_unlock(&injector_mutex); - - mysql_mutex_destroy(&injector_mutex); - mysql_cond_destroy(&injector_cond); - mysql_mutex_destroy(&ndb_schema_share_mutex); -#endif - - DBUG_RETURN(0); -} - -/***************************************************************** - functions called from slave sql client threads -****************************************************************/ -static void ndbcluster_reset_slave(THD *thd) -{ - if (!ndb_binlog_running) - return; - - DBUG_ENTER("ndbcluster_reset_slave"); - char buf[1024]; - char *end= strmov(buf, "DELETE FROM " NDB_REP_DB "." NDB_APPLY_TABLE); - run_query(thd, buf, end, NULL, TRUE); - DBUG_VOID_RETURN; -} - -/* - Initialize the binlog part of the ndb handlerton -*/ - -/** - Upon the sql command flush logs, we need to ensure that all outstanding - ndb data to be logged has made it to the binary log to get a deterministic - behavior on the rotation of the log. - */ -static bool ndbcluster_flush_logs(handlerton *hton) -{ - ndbcluster_binlog_wait(current_thd); - return FALSE; -} - -static int ndbcluster_binlog_func(handlerton *hton, THD *thd, - enum_binlog_func fn, - void *arg) -{ - switch(fn) - { - case BFN_RESET_LOGS: - ndbcluster_reset_logs(thd); - break; - case BFN_RESET_SLAVE: - ndbcluster_reset_slave(thd); - break; - case BFN_BINLOG_WAIT: - ndbcluster_binlog_wait(thd); - break; - case BFN_BINLOG_END: - ndbcluster_binlog_end(thd); - break; - case BFN_BINLOG_PURGE_FILE: - ndbcluster_binlog_index_purge_file(thd, (const char *)arg); - break; - } - return 0; -} - -void ndbcluster_binlog_init_handlerton() -{ - handlerton *h= ndbcluster_hton; - h->flush_logs= ndbcluster_flush_logs; - h->binlog_func= ndbcluster_binlog_func; - h->binlog_log_query= ndbcluster_binlog_log_query; -} - - - - - -/* - check the availability af the ndb_apply_status share - - return share, but do not increase refcount - - return 0 if there is no share -*/ -static NDB_SHARE *ndbcluster_check_ndb_apply_status_share() -{ - mysql_mutex_lock(&ndbcluster_mutex); - - void *share= my_hash_search(&ndbcluster_open_tables, - (uchar*) NDB_APPLY_TABLE_FILE, - sizeof(NDB_APPLY_TABLE_FILE) - 1); - DBUG_PRINT("info",("ndbcluster_check_ndb_apply_status_share %s 0x%lx", - NDB_APPLY_TABLE_FILE, (long) share)); - mysql_mutex_unlock(&ndbcluster_mutex); - return (NDB_SHARE*) share; -} - -/* - check the availability af the schema share - - return share, but do not increase refcount - - return 0 if there is no share -*/ -static NDB_SHARE *ndbcluster_check_ndb_schema_share() -{ - mysql_mutex_lock(&ndbcluster_mutex); - - void *share= my_hash_search(&ndbcluster_open_tables, - (uchar*) NDB_SCHEMA_TABLE_FILE, - sizeof(NDB_SCHEMA_TABLE_FILE) - 1); - DBUG_PRINT("info",("ndbcluster_check_ndb_schema_share %s 0x%lx", - NDB_SCHEMA_TABLE_FILE, (long) share)); - mysql_mutex_unlock(&ndbcluster_mutex); - return (NDB_SHARE*) share; -} - -/* - Create the ndb_apply_status table -*/ -static int ndbcluster_create_ndb_apply_status_table(THD *thd) -{ - DBUG_ENTER("ndbcluster_create_ndb_apply_status_table"); - - /* - Check if we already have the apply status table. - If so it should have been discovered at startup - and thus have a share - */ - - if (ndbcluster_check_ndb_apply_status_share()) - DBUG_RETURN(0); - - if (g_ndb_cluster_connection->get_no_ready() <= 0) - DBUG_RETURN(0); - - char buf[1024 + 1], *end; - - if (opt_ndb_extra_logging) - sql_print_information("NDB: Creating " NDB_REP_DB "." NDB_APPLY_TABLE); - - /* - Check if apply status table exists in MySQL "dictionary" - if so, remove it since there is none in Ndb - */ - { - build_table_filename(buf, sizeof(buf) - 1, - NDB_REP_DB, NDB_APPLY_TABLE, reg_ext, 0); - mysql_file_delete(key_file_frm, buf, MYF(0)); - } - - /* - Note, updating this table schema must be reflected in ndb_restore - */ - end= strmov(buf, "CREATE TABLE IF NOT EXISTS " - NDB_REP_DB "." NDB_APPLY_TABLE - " ( server_id INT UNSIGNED NOT NULL," - " epoch BIGINT UNSIGNED NOT NULL, " - " log_name VARCHAR(255) BINARY NOT NULL, " - " start_pos BIGINT UNSIGNED NOT NULL, " - " end_pos BIGINT UNSIGNED NOT NULL, " - " PRIMARY KEY USING HASH (server_id) ) ENGINE=NDB CHARACTER SET latin1"); - - const int no_print_error[6]= {ER_TABLE_EXISTS_ERROR, - 701, - 702, - 721, // Table already exist - 4009, - 0}; // do not print error 701 etc - run_query(thd, buf, end, no_print_error, TRUE); - - DBUG_RETURN(0); -} - - -/* - Create the schema table -*/ -static int ndbcluster_create_schema_table(THD *thd) -{ - DBUG_ENTER("ndbcluster_create_schema_table"); - - /* - Check if we already have the schema table. - If so it should have been discovered at startup - and thus have a share - */ - - if (ndbcluster_check_ndb_schema_share()) - DBUG_RETURN(0); - - if (g_ndb_cluster_connection->get_no_ready() <= 0) - DBUG_RETURN(0); - - char buf[1024 + 1], *end; - - if (opt_ndb_extra_logging) - sql_print_information("NDB: Creating " NDB_REP_DB "." NDB_SCHEMA_TABLE); - - /* - Check if schema table exists in MySQL "dictionary" - if so, remove it since there is none in Ndb - */ - { - build_table_filename(buf, sizeof(buf) - 1, - NDB_REP_DB, NDB_SCHEMA_TABLE, reg_ext, 0); - mysql_file_delete(key_file_frm, buf, MYF(0)); - } - - /* - Update the defines below to reflect the table schema - */ - end= strmov(buf, "CREATE TABLE IF NOT EXISTS " - NDB_REP_DB "." NDB_SCHEMA_TABLE - " ( db VARBINARY(63) NOT NULL," - " name VARBINARY(63) NOT NULL," - " slock BINARY(32) NOT NULL," - " query BLOB NOT NULL," - " node_id INT UNSIGNED NOT NULL," - " epoch BIGINT UNSIGNED NOT NULL," - " id INT UNSIGNED NOT NULL," - " version INT UNSIGNED NOT NULL," - " type INT UNSIGNED NOT NULL," - " PRIMARY KEY USING HASH (db,name) ) ENGINE=NDB CHARACTER SET latin1"); - - const int no_print_error[6]= {ER_TABLE_EXISTS_ERROR, - 701, - 702, - 721, // Table already exist - 4009, - 0}; // do not print error 701 etc - run_query(thd, buf, end, no_print_error, TRUE); - - DBUG_RETURN(0); -} - -int ndbcluster_setup_binlog_table_shares(THD *thd) -{ - if (!ndb_schema_share && - ndbcluster_check_ndb_schema_share() == 0) - { - ndb_create_table_from_engine(thd, NDB_REP_DB, NDB_SCHEMA_TABLE); - if (!ndb_schema_share) - { - ndbcluster_create_schema_table(thd); - // always make sure we create the 'schema' first - if (!ndb_schema_share) - return 1; - } - } - if (!ndb_apply_status_share && - ndbcluster_check_ndb_apply_status_share() == 0) - { - ndb_create_table_from_engine(thd, NDB_REP_DB, NDB_APPLY_TABLE); - if (!ndb_apply_status_share) - { - ndbcluster_create_ndb_apply_status_table(thd); - if (!ndb_apply_status_share) - return 1; - } - } - if (!ndbcluster_find_all_files(thd)) - { - ndb_binlog_tables_inited= TRUE; - if (opt_ndb_extra_logging) - sql_print_information("NDB Binlog: ndb tables writable"); - close_cached_tables(NULL, NULL, FALSE, LONG_TIMEOUT); - /* Signal injector thread that all is setup */ - mysql_cond_signal(&injector_cond); - } - return 0; -} - -/* - Defines and struct for schema table. - Should reflect table definition above. -*/ -#define SCHEMA_DB_I 0u -#define SCHEMA_NAME_I 1u -#define SCHEMA_SLOCK_I 2u -#define SCHEMA_QUERY_I 3u -#define SCHEMA_NODE_ID_I 4u -#define SCHEMA_EPOCH_I 5u -#define SCHEMA_ID_I 6u -#define SCHEMA_VERSION_I 7u -#define SCHEMA_TYPE_I 8u -#define SCHEMA_SIZE 9u -#define SCHEMA_SLOCK_SIZE 32u - -struct Cluster_schema -{ - uchar db_length; - char db[64]; - uchar name_length; - char name[64]; - uchar slock_length; - uint32 slock[SCHEMA_SLOCK_SIZE/4]; - unsigned short query_length; - char *query; - Uint64 epoch; - uint32 node_id; - uint32 id; - uint32 version; - uint32 type; - uint32 any_value; -}; - -static void print_could_not_discover_error(THD *thd, - const Cluster_schema *schema) -{ - sql_print_error("NDB Binlog: Could not discover table '%s.%s' from " - "binlog schema event '%s' from node %d. " - "my_errno: %d", - schema->db, schema->name, schema->query, - schema->node_id, my_errno); - List_iterator_fast<Sql_condition> it(thd->warning_info->warn_list()); - Sql_condition *err; - while ((err= it++)) - sql_print_warning("NDB Binlog: (%d)%s", err->get_sql_errno(), - err->get_message_text()); -} - -/* - Transfer schema table data into corresponding struct -*/ -static void ndbcluster_get_schema(NDB_SHARE *share, - Cluster_schema *s) -{ - TABLE *table= share->table; - Field **field; - /* unpack blob values */ - uchar* blobs_buffer= 0; - uint blobs_buffer_size= 0; - my_bitmap_map *old_map= dbug_tmp_use_all_columns(table, table->read_set); - { - ptrdiff_t ptrdiff= 0; - int ret= get_ndb_blobs_value(table, share->ndb_value[0], - blobs_buffer, blobs_buffer_size, - ptrdiff); - if (ret != 0) - { - my_free(blobs_buffer); - DBUG_PRINT("info", ("blob read error")); - DBUG_ASSERT(FALSE); - } - } - /* db varchar 1 length uchar */ - field= table->field; - s->db_length= *(uint8*)(*field)->ptr; - DBUG_ASSERT(s->db_length <= (*field)->field_length); - DBUG_ASSERT((*field)->field_length + 1 == sizeof(s->db)); - memcpy(s->db, (*field)->ptr + 1, s->db_length); - s->db[s->db_length]= 0; - /* name varchar 1 length uchar */ - field++; - s->name_length= *(uint8*)(*field)->ptr; - DBUG_ASSERT(s->name_length <= (*field)->field_length); - DBUG_ASSERT((*field)->field_length + 1 == sizeof(s->name)); - memcpy(s->name, (*field)->ptr + 1, s->name_length); - s->name[s->name_length]= 0; - /* slock fixed length */ - field++; - s->slock_length= (*field)->field_length; - DBUG_ASSERT((*field)->field_length == sizeof(s->slock)); - memcpy(s->slock, (*field)->ptr, s->slock_length); - /* query blob */ - field++; - { - Field_blob *field_blob= (Field_blob*)(*field); - uint blob_len= field_blob->get_length((*field)->ptr); - uchar *blob_ptr= 0; - field_blob->get_ptr(&blob_ptr); - DBUG_ASSERT(blob_len == 0 || blob_ptr != 0); - s->query_length= blob_len; - s->query= sql_strmake((char*) blob_ptr, blob_len); - } - /* node_id */ - field++; - s->node_id= ((Field_long *)*field)->val_int(); - /* epoch */ - field++; - s->epoch= ((Field_long *)*field)->val_int(); - /* id */ - field++; - s->id= ((Field_long *)*field)->val_int(); - /* version */ - field++; - s->version= ((Field_long *)*field)->val_int(); - /* type */ - field++; - s->type= ((Field_long *)*field)->val_int(); - /* free blobs buffer */ - my_free(blobs_buffer); - dbug_tmp_restore_column_map(table->read_set, old_map); -} - -/* - helper function to pack a ndb varchar -*/ -char *ndb_pack_varchar(const NDBCOL *col, char *buf, - const char *str, int sz) -{ - switch (col->getArrayType()) - { - case NDBCOL::ArrayTypeFixed: - memcpy(buf, str, sz); - break; - case NDBCOL::ArrayTypeShortVar: - *(uchar*)buf= (uchar)sz; - memcpy(buf + 1, str, sz); - break; - case NDBCOL::ArrayTypeMediumVar: - int2store(buf, sz); - memcpy(buf + 2, str, sz); - break; - } - return buf; -} - -/* - acknowledge handling of schema operation -*/ -static int -ndbcluster_update_slock(THD *thd, - const char *db, - const char *table_name) -{ - DBUG_ENTER("ndbcluster_update_slock"); - if (!ndb_schema_share) - { - DBUG_RETURN(0); - } - - const NdbError *ndb_error= 0; - uint32 node_id= g_ndb_cluster_connection->node_id(); - Ndb *ndb= check_ndb_in_thd(thd); - char save_db[FN_HEADLEN]; - strcpy(save_db, ndb->getDatabaseName()); - - char tmp_buf[FN_REFLEN]; - NDBDICT *dict= ndb->getDictionary(); - ndb->setDatabaseName(NDB_REP_DB); - Ndb_table_guard ndbtab_g(dict, NDB_SCHEMA_TABLE); - const NDBTAB *ndbtab= ndbtab_g.get_table(); - NdbTransaction *trans= 0; - int retries= 100; - int retry_sleep= 10; /* 10 milliseconds, transaction */ - const NDBCOL *col[SCHEMA_SIZE]; - unsigned sz[SCHEMA_SIZE]; - - MY_BITMAP slock; - uint32 bitbuf[SCHEMA_SLOCK_SIZE/4]; - my_bitmap_init(&slock, bitbuf, sizeof(bitbuf)*8, false); - - if (ndbtab == 0) - { - abort(); - DBUG_RETURN(0); - } - - { - uint i; - for (i= 0; i < SCHEMA_SIZE; i++) - { - col[i]= ndbtab->getColumn(i); - if (i != SCHEMA_QUERY_I) - { - sz[i]= col[i]->getLength(); - DBUG_ASSERT(sz[i] <= sizeof(tmp_buf)); - } - } - } - - while (1) - { - if ((trans= ndb->startTransaction()) == 0) - goto err; - { - NdbOperation *op= 0; - int r= 0; - - /* read the bitmap exlusive */ - r|= (op= trans->getNdbOperation(ndbtab)) == 0; - DBUG_ASSERT(r == 0); - r|= op->readTupleExclusive(); - DBUG_ASSERT(r == 0); - - /* db */ - ndb_pack_varchar(col[SCHEMA_DB_I], tmp_buf, db, strlen(db)); - r|= op->equal(SCHEMA_DB_I, tmp_buf); - DBUG_ASSERT(r == 0); - /* name */ - ndb_pack_varchar(col[SCHEMA_NAME_I], tmp_buf, table_name, - strlen(table_name)); - r|= op->equal(SCHEMA_NAME_I, tmp_buf); - DBUG_ASSERT(r == 0); - /* slock */ - r|= op->getValue(SCHEMA_SLOCK_I, (char*)slock.bitmap) == 0; - DBUG_ASSERT(r == 0); - } - if (trans->execute(NdbTransaction::NoCommit)) - goto err; - bitmap_clear_bit(&slock, node_id); - { - NdbOperation *op= 0; - int r= 0; - - /* now update the tuple */ - r|= (op= trans->getNdbOperation(ndbtab)) == 0; - DBUG_ASSERT(r == 0); - r|= op->updateTuple(); - DBUG_ASSERT(r == 0); - - /* db */ - ndb_pack_varchar(col[SCHEMA_DB_I], tmp_buf, db, strlen(db)); - r|= op->equal(SCHEMA_DB_I, tmp_buf); - DBUG_ASSERT(r == 0); - /* name */ - ndb_pack_varchar(col[SCHEMA_NAME_I], tmp_buf, table_name, - strlen(table_name)); - r|= op->equal(SCHEMA_NAME_I, tmp_buf); - DBUG_ASSERT(r == 0); - /* slock */ - r|= op->setValue(SCHEMA_SLOCK_I, (char*)slock.bitmap); - DBUG_ASSERT(r == 0); - /* node_id */ - r|= op->setValue(SCHEMA_NODE_ID_I, node_id); - DBUG_ASSERT(r == 0); - /* type */ - r|= op->setValue(SCHEMA_TYPE_I, (uint32)SOT_CLEAR_SLOCK); - DBUG_ASSERT(r == 0); - } - if (trans->execute(NdbTransaction::Commit) == 0) - { - dict->forceGCPWait(); - DBUG_PRINT("info", ("node %d cleared lock on '%s.%s'", - node_id, db, table_name)); - break; - } - err: - const NdbError *this_error= trans ? - &trans->getNdbError() : &ndb->getNdbError(); - if (this_error->status == NdbError::TemporaryError) - { - if (retries--) - { - if (trans) - ndb->closeTransaction(trans); - my_sleep(retry_sleep); - continue; // retry - } - } - ndb_error= this_error; - break; - } - - if (ndb_error) - { - char buf[1024]; - my_snprintf(buf, sizeof(buf), "Could not release lock on '%s.%s'", - db, table_name); - push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, - ER_GET_ERRMSG, ER(ER_GET_ERRMSG), - ndb_error->code, ndb_error->message, buf); - } - if (trans) - ndb->closeTransaction(trans); - ndb->setDatabaseName(save_db); - DBUG_RETURN(0); -} - -/* - log query in schema table -*/ -static void ndb_report_waiting(const char *key, - int the_time, - const char *op, - const char *obj) -{ - ulonglong ndb_latest_epoch= 0; - const char *proc_info= "<no info>"; - mysql_mutex_lock(&injector_mutex); - if (injector_ndb) - ndb_latest_epoch= injector_ndb->getLatestGCI(); - if (injector_thd) - proc_info= injector_thd->proc_info; - mysql_mutex_unlock(&injector_mutex); - sql_print_information("NDB %s:" - " waiting max %u sec for %s %s." - " epochs: (%u,%u,%u)" - " injector proc_info: %s" - ,key, the_time, op, obj - ,(uint)ndb_latest_handled_binlog_epoch - ,(uint)ndb_latest_received_binlog_epoch - ,(uint)ndb_latest_epoch - ,proc_info - ); -} - -int ndbcluster_log_schema_op(THD *thd, NDB_SHARE *share, - const char *query, int query_length, - const char *db, const char *table_name, - uint32 ndb_table_id, - uint32 ndb_table_version, - enum SCHEMA_OP_TYPE type, - const char *new_db, const char *new_table_name) -{ - DBUG_ENTER("ndbcluster_log_schema_op"); - Thd_ndb *thd_ndb= get_thd_ndb(thd); - if (!thd_ndb) - { - if (!(thd_ndb= ha_ndbcluster::seize_thd_ndb())) - { - sql_print_error("Could not allocate Thd_ndb object"); - DBUG_RETURN(1); - } - set_thd_ndb(thd, thd_ndb); - } - - DBUG_PRINT("enter", - ("query: %s db: %s table_name: %s thd_ndb->options: %d", - query, db, table_name, thd_ndb->options)); - if (!ndb_schema_share || thd_ndb->options & TNO_NO_LOG_SCHEMA_OP) - { - DBUG_RETURN(0); - } - - char tmp_buf2_mem[FN_REFLEN]; - String tmp_buf2(tmp_buf2_mem, sizeof(tmp_buf2_mem), system_charset_info); - tmp_buf2.length(0); - const char *type_str; - switch (type) - { - case SOT_DROP_TABLE: - /* drop database command, do not log at drop table */ - if (thd->lex->sql_command == SQLCOM_DROP_DB) - DBUG_RETURN(0); - /* redo the drop table query as is may contain several tables */ - tmp_buf2.append(STRING_WITH_LEN("drop table ")); - append_identifier(thd, &tmp_buf2, table_name, strlen(table_name)); - query= tmp_buf2.c_ptr_safe(); - query_length= tmp_buf2.length(); - type_str= "drop table"; - break; - case SOT_RENAME_TABLE: - /* redo the rename table query as is may contain several tables */ - tmp_buf2.append(STRING_WITH_LEN("rename table ")); - append_identifier(thd, &tmp_buf2, db, strlen(db)); - tmp_buf2.append(STRING_WITH_LEN(".")); - append_identifier(thd, &tmp_buf2, table_name, strlen(table_name)); - tmp_buf2.append(STRING_WITH_LEN(" to ")); - append_identifier(thd, &tmp_buf2, new_db, strlen(new_db)); - tmp_buf2.append(STRING_WITH_LEN(".")); - append_identifier(thd, &tmp_buf2, new_table_name, strlen(new_table_name)); - query= tmp_buf2.c_ptr_safe(); - query_length= tmp_buf2.length(); - type_str= "rename table"; - break; - case SOT_CREATE_TABLE: - type_str= "create table"; - break; - case SOT_ALTER_TABLE: - type_str= "alter table"; - break; - case SOT_DROP_DB: - type_str= "drop db"; - break; - case SOT_CREATE_DB: - type_str= "create db"; - break; - case SOT_ALTER_DB: - type_str= "alter db"; - break; - case SOT_TABLESPACE: - type_str= "tablespace"; - break; - case SOT_LOGFILE_GROUP: - type_str= "logfile group"; - break; - case SOT_TRUNCATE_TABLE: - type_str= "truncate table"; - break; - default: - abort(); /* should not happen, programming error */ - } - - NDB_SCHEMA_OBJECT *ndb_schema_object; - { - char key[FN_REFLEN + 1]; - build_table_filename(key, sizeof(key) - 1, db, table_name, "", 0); - ndb_schema_object= ndb_get_schema_object(key, TRUE, FALSE); - } - - const NdbError *ndb_error= 0; - uint32 node_id= g_ndb_cluster_connection->node_id(); - Uint64 epoch= 0; - MY_BITMAP schema_subscribers; - uint32 bitbuf[sizeof(ndb_schema_object->slock)/4]; - char bitbuf_e[sizeof(bitbuf)]; - bzero(bitbuf_e, sizeof(bitbuf_e)); - { - int i, updated= 0; - int no_storage_nodes= g_ndb_cluster_connection->no_db_nodes(); - my_bitmap_init(&schema_subscribers, bitbuf, sizeof(bitbuf)*8, FALSE); - bitmap_set_all(&schema_subscribers); - - /* begin protect ndb_schema_share */ - mysql_mutex_lock(&ndb_schema_share_mutex); - if (ndb_schema_share == 0) - { - mysql_mutex_unlock(&ndb_schema_share_mutex); - if (ndb_schema_object) - ndb_free_schema_object(&ndb_schema_object, FALSE); - DBUG_RETURN(0); - } - mysql_mutex_lock(&ndb_schema_share->mutex); - for (i= 0; i < no_storage_nodes; i++) - { - MY_BITMAP *table_subscribers= &ndb_schema_share->subscriber_bitmap[i]; - if (!bitmap_is_clear_all(table_subscribers)) - { - bitmap_intersect(&schema_subscribers, - table_subscribers); - updated= 1; - } - } - mysql_mutex_unlock(&ndb_schema_share->mutex); - mysql_mutex_unlock(&ndb_schema_share_mutex); - /* end protect ndb_schema_share */ - - if (updated) - { - bitmap_clear_bit(&schema_subscribers, node_id); - /* - if setting own acknowledge bit it is important that - no other mysqld's are registred, as subsequent code - will cause the original event to be hidden (by blob - merge event code) - */ - if (bitmap_is_clear_all(&schema_subscribers)) - bitmap_set_bit(&schema_subscribers, node_id); - } - else - bitmap_clear_all(&schema_subscribers); - - if (ndb_schema_object) - { - mysql_mutex_lock(&ndb_schema_object->mutex); - memcpy(ndb_schema_object->slock, schema_subscribers.bitmap, - sizeof(ndb_schema_object->slock)); - mysql_mutex_unlock(&ndb_schema_object->mutex); - } - - DBUG_DUMP("schema_subscribers", (uchar*)schema_subscribers.bitmap, - no_bytes_in_map(&schema_subscribers)); - DBUG_PRINT("info", ("bitmap_is_clear_all(&schema_subscribers): %d", - bitmap_is_clear_all(&schema_subscribers))); - } - - Ndb *ndb= thd_ndb->ndb; - char save_db[FN_REFLEN]; - strcpy(save_db, ndb->getDatabaseName()); - - char tmp_buf[FN_REFLEN]; - NDBDICT *dict= ndb->getDictionary(); - ndb->setDatabaseName(NDB_REP_DB); - Ndb_table_guard ndbtab_g(dict, NDB_SCHEMA_TABLE); - const NDBTAB *ndbtab= ndbtab_g.get_table(); - NdbTransaction *trans= 0; - int retries= 100; - int retry_sleep= 10; /* 10 milliseconds, transaction */ - const NDBCOL *col[SCHEMA_SIZE]; - unsigned sz[SCHEMA_SIZE]; - - if (ndbtab == 0) - { - if (strcmp(NDB_REP_DB, db) != 0 || - strcmp(NDB_SCHEMA_TABLE, table_name)) - { - ndb_error= &dict->getNdbError(); - } - goto end; - } - - { - uint i; - for (i= 0; i < SCHEMA_SIZE; i++) - { - col[i]= ndbtab->getColumn(i); - if (i != SCHEMA_QUERY_I) - { - sz[i]= col[i]->getLength(); - DBUG_ASSERT(sz[i] <= sizeof(tmp_buf)); - } - } - } - - while (1) - { - const char *log_db= db; - const char *log_tab= table_name; - const char *log_subscribers= (char*)schema_subscribers.bitmap; - uint32 log_type= (uint32)type; - if ((trans= ndb->startTransaction()) == 0) - goto err; - while (1) - { - NdbOperation *op= 0; - int r= 0; - r|= (op= trans->getNdbOperation(ndbtab)) == 0; - DBUG_ASSERT(r == 0); - r|= op->writeTuple(); - DBUG_ASSERT(r == 0); - - /* db */ - ndb_pack_varchar(col[SCHEMA_DB_I], tmp_buf, log_db, strlen(log_db)); - r|= op->equal(SCHEMA_DB_I, tmp_buf); - DBUG_ASSERT(r == 0); - /* name */ - ndb_pack_varchar(col[SCHEMA_NAME_I], tmp_buf, log_tab, - strlen(log_tab)); - r|= op->equal(SCHEMA_NAME_I, tmp_buf); - DBUG_ASSERT(r == 0); - /* slock */ - DBUG_ASSERT(sz[SCHEMA_SLOCK_I] == sizeof(bitbuf)); - r|= op->setValue(SCHEMA_SLOCK_I, log_subscribers); - DBUG_ASSERT(r == 0); - /* query */ - { - NdbBlob *ndb_blob= op->getBlobHandle(SCHEMA_QUERY_I); - DBUG_ASSERT(ndb_blob != 0); - uint blob_len= query_length; - const char* blob_ptr= query; - r|= ndb_blob->setValue(blob_ptr, blob_len); - DBUG_ASSERT(r == 0); - } - /* node_id */ - r|= op->setValue(SCHEMA_NODE_ID_I, node_id); - DBUG_ASSERT(r == 0); - /* epoch */ - r|= op->setValue(SCHEMA_EPOCH_I, epoch); - DBUG_ASSERT(r == 0); - /* id */ - r|= op->setValue(SCHEMA_ID_I, ndb_table_id); - DBUG_ASSERT(r == 0); - /* version */ - r|= op->setValue(SCHEMA_VERSION_I, ndb_table_version); - DBUG_ASSERT(r == 0); - /* type */ - r|= op->setValue(SCHEMA_TYPE_I, log_type); - DBUG_ASSERT(r == 0); - /* any value */ - if (!(thd->variables.option_bits & OPTION_BIN_LOG)) - r|= op->setAnyValue(NDB_ANYVALUE_FOR_NOLOGGING); - else - r|= op->setAnyValue(thd->server_id); - DBUG_ASSERT(r == 0); - if (log_db != new_db && new_db && new_table_name) - { - log_db= new_db; - log_tab= new_table_name; - log_subscribers= bitbuf_e; // no ack expected on this - log_type= (uint32)SOT_RENAME_TABLE_NEW; - continue; - } - break; - } - if (trans->execute(NdbTransaction::Commit) == 0) - { - DBUG_PRINT("info", ("logged: %s", query)); - break; - } -err: - const NdbError *this_error= trans ? - &trans->getNdbError() : &ndb->getNdbError(); - if (this_error->status == NdbError::TemporaryError) - { - if (retries--) - { - if (trans) - ndb->closeTransaction(trans); - my_sleep(retry_sleep); - continue; // retry - } - } - ndb_error= this_error; - break; - } -end: - if (ndb_error) - push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, - ER_GET_ERRMSG, ER(ER_GET_ERRMSG), - ndb_error->code, - ndb_error->message, - "Could not log query '%s' on other mysqld's"); - - if (trans) - ndb->closeTransaction(trans); - ndb->setDatabaseName(save_db); - - /* - Wait for other mysqld's to acknowledge the table operation - */ - if (ndb_error == 0 && - !bitmap_is_clear_all(&schema_subscribers)) - { - /* - if own nodeid is set we are a single mysqld registred - as an optimization we update the slock directly - */ - if (bitmap_is_set(&schema_subscribers, node_id)) - ndbcluster_update_slock(thd, db, table_name); - else - dict->forceGCPWait(); - - int max_timeout= DEFAULT_SYNC_TIMEOUT; - mysql_mutex_lock(&ndb_schema_object->mutex); - while (1) - { - struct timespec abstime; - int i; - int no_storage_nodes= g_ndb_cluster_connection->no_db_nodes(); - set_timespec(abstime, 1); - int ret= mysql_cond_timedwait(&injector_cond, - &ndb_schema_object->mutex, - &abstime); - if (thd->killed) - break; - - /* begin protect ndb_schema_share */ - mysql_mutex_lock(&ndb_schema_share_mutex); - if (ndb_schema_share == 0) - { - mysql_mutex_unlock(&ndb_schema_share_mutex); - break; - } - mysql_mutex_lock(&ndb_schema_share->mutex); - for (i= 0; i < no_storage_nodes; i++) - { - /* remove any unsubscribed from schema_subscribers */ - MY_BITMAP *tmp= &ndb_schema_share->subscriber_bitmap[i]; - if (!bitmap_is_clear_all(tmp)) - bitmap_intersect(&schema_subscribers, tmp); - } - mysql_mutex_unlock(&ndb_schema_share->mutex); - mysql_mutex_unlock(&ndb_schema_share_mutex); - /* end protect ndb_schema_share */ - - /* remove any unsubscribed from ndb_schema_object->slock */ - bitmap_intersect(&ndb_schema_object->slock_bitmap, &schema_subscribers); - - DBUG_DUMP("ndb_schema_object->slock_bitmap.bitmap", - (uchar*)ndb_schema_object->slock_bitmap.bitmap, - no_bytes_in_map(&ndb_schema_object->slock_bitmap)); - - if (bitmap_is_clear_all(&ndb_schema_object->slock_bitmap)) - break; - - if (ret) - { - max_timeout--; - if (max_timeout == 0) - { - sql_print_error("NDB %s: distributing %s timed out. Ignoring...", - type_str, ndb_schema_object->key); - break; - } - if (opt_ndb_extra_logging) - ndb_report_waiting(type_str, max_timeout, - "distributing", ndb_schema_object->key); - } - } - mysql_mutex_unlock(&ndb_schema_object->mutex); - } - - if (ndb_schema_object) - ndb_free_schema_object(&ndb_schema_object, FALSE); - - DBUG_RETURN(0); -} - -/* - Handle _non_ data events from the storage nodes -*/ -int -ndb_handle_schema_change(THD *thd, Ndb *ndb, NdbEventOperation *pOp, - NDB_SHARE *share) -{ - DBUG_ENTER("ndb_handle_schema_change"); - TABLE* table= share->table; - TABLE_SHARE *table_share= share->table_share; - const char *dbname= table_share->db.str; - const char *tabname= table_share->table_name.str; - bool do_close_cached_tables= FALSE; - bool is_online_alter_table= FALSE; - bool is_rename_table= FALSE; - bool is_remote_change= - (uint) pOp->getReqNodeId() != g_ndb_cluster_connection->node_id(); - - if (pOp->getEventType() == NDBEVENT::TE_ALTER) - { - if (pOp->tableFrmChanged()) - { - DBUG_PRINT("info", ("NDBEVENT::TE_ALTER: table frm changed")); - is_online_alter_table= TRUE; - } - else - { - DBUG_PRINT("info", ("NDBEVENT::TE_ALTER: name changed")); - DBUG_ASSERT(pOp->tableNameChanged()); - is_rename_table= TRUE; - } - } - - { - ndb->setDatabaseName(dbname); - Ndb_table_guard ndbtab_g(ndb->getDictionary(), tabname); - const NDBTAB *ev_tab= pOp->getTable(); - const NDBTAB *cache_tab= ndbtab_g.get_table(); - if (cache_tab && - cache_tab->getObjectId() == ev_tab->getObjectId() && - cache_tab->getObjectVersion() <= ev_tab->getObjectVersion()) - ndbtab_g.invalidate(); - } - - /* - Refresh local frm file and dictionary cache if - remote on-line alter table - */ - if (is_remote_change && is_online_alter_table) - { - const char *tabname= table_share->table_name.str; - char key[FN_REFLEN + 1]; - uchar *data= 0, *pack_data= 0; - size_t length, pack_length; - int error; - NDBDICT *dict= ndb->getDictionary(); - const NDBTAB *altered_table= pOp->getTable(); - - DBUG_PRINT("info", ("Detected frm change of table %s.%s", - dbname, tabname)); - build_table_filename(key, FN_LEN - 1, dbname, tabname, NullS, 0); - /* - If the there is no local table shadowing the altered table and - it has an frm that is different than the one on disk then - overwrite it with the new table definition - */ - if (!ndbcluster_check_if_local_table(dbname, tabname) && - readfrm(key, &data, &length) == 0 && - packfrm(data, length, &pack_data, &pack_length) == 0 && - cmp_frm(altered_table, pack_data, pack_length)) - { - DBUG_DUMP("frm", (uchar*) altered_table->getFrmData(), - altered_table->getFrmLength()); - Ndb_table_guard ndbtab_g(dict, tabname); - const NDBTAB *old= ndbtab_g.get_table(); - if (!old && - old->getObjectVersion() != altered_table->getObjectVersion()) - dict->putTable(altered_table); - - my_free(data); - data= NULL; - if ((error= unpackfrm(&data, &length, - (const uchar*) altered_table->getFrmData())) || - (error= writefrm(key, data, length))) - { - sql_print_information("NDB: Failed write frm for %s.%s, error %d", - dbname, tabname, error); - } - - // copy names as memory will be freed - NdbAutoPtr<char> a1((char *)(dbname= strdup(dbname))); - NdbAutoPtr<char> a2((char *)(tabname= strdup(tabname))); - ndbcluster_binlog_close_table(thd, share); - - TABLE_LIST table_list; - bzero((char*) &table_list,sizeof(table_list)); - table_list.db= (char *)dbname; - table_list.alias= table_list.table_name= (char *)tabname; - close_cached_tables(thd, &table_list, FALSE, LONG_TIMEOUT); - - if ((error= ndbcluster_binlog_open_table(thd, share, - table_share, table, 1))) - sql_print_information("NDB: Failed to re-open table %s.%s", - dbname, tabname); - - table= share->table; - table_share= share->table_share; - dbname= table_share->db.str; - tabname= table_share->table_name.str; - } - my_free(data); - my_free(pack_data); - } - - // If only frm was changed continue replicating - if (is_online_alter_table) - { - /* Signal ha_ndbcluster::alter_table that drop is done */ - mysql_cond_signal(&injector_cond); - DBUG_RETURN(0); - } - - mysql_mutex_lock(&share->mutex); - if (is_rename_table && !is_remote_change) - { - DBUG_PRINT("info", ("Detected name change of table %s.%s", - share->db, share->table_name)); - /* ToDo: remove printout */ - if (opt_ndb_extra_logging) - sql_print_information("NDB Binlog: rename table %s%s/%s -> %s.", - share_prefix, share->table->s->db.str, - share->table->s->table_name.str, - share->key); - { - ndb->setDatabaseName(share->table->s->db.str); - Ndb_table_guard ndbtab_g(ndb->getDictionary(), - share->table->s->table_name.str); - const NDBTAB *ev_tab= pOp->getTable(); - const NDBTAB *cache_tab= ndbtab_g.get_table(); - if (cache_tab && - cache_tab->getObjectId() == ev_tab->getObjectId() && - cache_tab->getObjectVersion() <= ev_tab->getObjectVersion()) - ndbtab_g.invalidate(); - } - /* do the rename of the table in the share */ - share->table->s->db.str= share->db; - share->table->s->db.length= strlen(share->db); - share->table->s->table_name.str= share->table_name; - share->table->s->table_name.length= strlen(share->table_name); - } - DBUG_ASSERT(share->op == pOp || share->op_old == pOp); - if (share->op_old == pOp) - share->op_old= 0; - else - share->op= 0; - // either just us or drop table handling as well - - /* Signal ha_ndbcluster::delete/rename_table that drop is done */ - mysql_mutex_unlock(&share->mutex); - mysql_cond_signal(&injector_cond); - - mysql_mutex_lock(&ndbcluster_mutex); - /* ndb_share reference binlog free */ - DBUG_PRINT("NDB_SHARE", ("%s binlog free use_count: %u", - share->key, share->use_count)); - free_share(&share, TRUE); - if (is_remote_change && share && share->state != NSS_DROPPED) - { - DBUG_PRINT("info", ("remote change")); - share->state= NSS_DROPPED; - if (share->use_count != 1) - { - /* open handler holding reference */ - /* wait with freeing create ndb_share to below */ - do_close_cached_tables= TRUE; - } - else - { - /* ndb_share reference create free */ - DBUG_PRINT("NDB_SHARE", ("%s create free use_count: %u", - share->key, share->use_count)); - free_share(&share, TRUE); - share= 0; - } - } - else - share= 0; - mysql_mutex_unlock(&ndbcluster_mutex); - - pOp->setCustomData(0); - - mysql_mutex_lock(&injector_mutex); - ndb->dropEventOperation(pOp); - pOp= 0; - mysql_mutex_unlock(&injector_mutex); - - if (do_close_cached_tables) - { - TABLE_LIST table_list; - bzero((char*) &table_list,sizeof(table_list)); - table_list.db= (char *)dbname; - table_list.alias= table_list.table_name= (char *)tabname; - close_cached_tables(thd, &table_list, FALSE, LONG_TIMEOUT); - /* ndb_share reference create free */ - DBUG_PRINT("NDB_SHARE", ("%s create free use_count: %u", - share->key, share->use_count)); - free_share(&share); - } - DBUG_RETURN(0); -} - -static void ndb_binlog_query(THD *thd, Cluster_schema *schema) -{ - if (schema->any_value & NDB_ANYVALUE_RESERVED) - { - if (schema->any_value != NDB_ANYVALUE_FOR_NOLOGGING) - sql_print_warning("NDB: unknown value for binlog signalling 0x%X, " - "query not logged", - schema->any_value); - return; - } - uint32 thd_server_id_save= thd->server_id; - DBUG_ASSERT(sizeof(thd_server_id_save) == sizeof(thd->server_id)); - char *thd_db_save= thd->db; - if (schema->any_value == 0) - thd->server_id= ::server_id; - else - thd->server_id= schema->any_value; - thd->db= schema->db; - int errcode = query_error_code(thd, thd->killed == NOT_KILLED); - thd->binlog_query(THD::STMT_QUERY_TYPE, schema->query, - schema->query_length, FALSE, TRUE, - schema->name[0] == 0 || thd->db[0] == 0, - errcode); - thd->server_id= thd_server_id_save; - thd->db= thd_db_save; -} - -static int -ndb_binlog_thread_handle_schema_event(THD *thd, Ndb *ndb, - NdbEventOperation *pOp, - List<Cluster_schema> - *post_epoch_log_list, - List<Cluster_schema> - *post_epoch_unlock_list, - MEM_ROOT *mem_root) -{ - DBUG_ENTER("ndb_binlog_thread_handle_schema_event"); - NDB_SHARE *tmp_share= (NDB_SHARE *)pOp->getCustomData(); - if (tmp_share && ndb_schema_share == tmp_share) - { - NDBEVENT::TableEvent ev_type= pOp->getEventType(); - DBUG_PRINT("enter", ("%s.%s ev_type: %d", - tmp_share->db, tmp_share->table_name, ev_type)); - if (ev_type == NDBEVENT::TE_UPDATE || - ev_type == NDBEVENT::TE_INSERT) - { - Cluster_schema *schema= (Cluster_schema *) - sql_alloc(sizeof(Cluster_schema)); - MY_BITMAP slock; - my_bitmap_init(&slock, schema->slock, 8*SCHEMA_SLOCK_SIZE, FALSE); - uint node_id= g_ndb_cluster_connection->node_id(); - { - ndbcluster_get_schema(tmp_share, schema); - schema->any_value= pOp->getAnyValue(); - } - enum SCHEMA_OP_TYPE schema_type= (enum SCHEMA_OP_TYPE)schema->type; - DBUG_PRINT("info", - ("%s.%s: log query_length: %d query: '%s' type: %d", - schema->db, schema->name, - schema->query_length, schema->query, - schema_type)); - if (schema_type == SOT_CLEAR_SLOCK) - { - /* - handle slock after epoch is completed to ensure that - schema events get inserted in the binlog after any data - events - */ - post_epoch_log_list->push_back(schema, mem_root); - DBUG_RETURN(0); - } - if (schema->node_id != node_id) - { - int log_query= 0, post_epoch_unlock= 0; - switch (schema_type) - { - case SOT_DROP_TABLE: - // fall through - case SOT_RENAME_TABLE: - // fall through - case SOT_RENAME_TABLE_NEW: - // fall through - case SOT_ALTER_TABLE: - post_epoch_log_list->push_back(schema, mem_root); - /* acknowledge this query _after_ epoch completion */ - post_epoch_unlock= 1; - break; - case SOT_TRUNCATE_TABLE: - { - char key[FN_REFLEN + 1]; - build_table_filename(key, sizeof(key) - 1, - schema->db, schema->name, "", 0); - /* ndb_share reference temporary, free below */ - NDB_SHARE *share= get_share(key, 0, FALSE, FALSE); - if (share) - { - DBUG_PRINT("NDB_SHARE", ("%s temporary use_count: %u", - share->key, share->use_count)); - } - // invalidation already handled by binlog thread - if (!share || !share->op) - { - { - injector_ndb->setDatabaseName(schema->db); - Ndb_table_guard ndbtab_g(injector_ndb->getDictionary(), - schema->name); - ndbtab_g.invalidate(); - } - TABLE_LIST table_list; - bzero((char*) &table_list,sizeof(table_list)); - table_list.db= schema->db; - table_list.alias= table_list.table_name= schema->name; - close_cached_tables(thd, &table_list, FALSE, LONG_TIMEOUT); - } - /* ndb_share reference temporary free */ - if (share) - { - DBUG_PRINT("NDB_SHARE", ("%s temporary free use_count: %u", - share->key, share->use_count)); - free_share(&share); - } - } - // fall through - case SOT_CREATE_TABLE: - if (ndbcluster_check_if_local_table(schema->db, schema->name)) - { - DBUG_PRINT("info", ("NDB Binlog: Skipping locally defined table '%s.%s'", - schema->db, schema->name)); - sql_print_error("NDB Binlog: Skipping locally defined table '%s.%s' from " - "binlog schema event '%s' from node %d. ", - schema->db, schema->name, schema->query, - schema->node_id); - } - else if (ndb_create_table_from_engine(thd, schema->db, schema->name)) - { - print_could_not_discover_error(thd, schema); - } - log_query= 1; - break; - case SOT_DROP_DB: - /* Drop the database locally if it only contains ndb tables */ - if (! ndbcluster_check_if_local_tables_in_db(thd, schema->db)) - { - const int no_print_error[1]= {0}; - run_query(thd, schema->query, - schema->query + schema->query_length, - no_print_error, /* print error */ - TRUE); /* don't binlog the query */ - /* binlog dropping database after any table operations */ - post_epoch_log_list->push_back(schema, mem_root); - /* acknowledge this query _after_ epoch completion */ - post_epoch_unlock= 1; - } - else - { - /* Database contained local tables, leave it */ - sql_print_error("NDB Binlog: Skipping drop database '%s' since it contained local tables " - "binlog schema event '%s' from node %d. ", - schema->db, schema->query, - schema->node_id); - log_query= 1; - } - break; - case SOT_CREATE_DB: - /* fall through */ - case SOT_ALTER_DB: - { - const int no_print_error[1]= {0}; - run_query(thd, schema->query, - schema->query + schema->query_length, - no_print_error, /* print error */ - TRUE); /* don't binlog the query */ - log_query= 1; - break; - } - case SOT_TABLESPACE: - case SOT_LOGFILE_GROUP: - log_query= 1; - break; - case SOT_CLEAR_SLOCK: - abort(); - } - if (log_query && ndb_binlog_running) - ndb_binlog_query(thd, schema); - /* signal that schema operation has been handled */ - DBUG_DUMP("slock", (uchar*) schema->slock, schema->slock_length); - if (bitmap_is_set(&slock, node_id)) - { - if (post_epoch_unlock) - post_epoch_unlock_list->push_back(schema, mem_root); - else - ndbcluster_update_slock(thd, schema->db, schema->name); - } - } - DBUG_RETURN(0); - } - /* - the normal case of UPDATE/INSERT has already been handled - */ - switch (ev_type) - { - case NDBEVENT::TE_DELETE: - // skip - break; - case NDBEVENT::TE_CLUSTER_FAILURE: - if (opt_ndb_extra_logging) - sql_print_information("NDB Binlog: cluster failure for %s at epoch %u.", - ndb_schema_share->key, (unsigned) pOp->getGCI()); - // fall through - case NDBEVENT::TE_DROP: - if (opt_ndb_extra_logging && - ndb_binlog_tables_inited && ndb_binlog_running) - sql_print_information("NDB Binlog: ndb tables initially " - "read only on reconnect."); - - /* begin protect ndb_schema_share */ - mysql_mutex_lock(&ndb_schema_share_mutex); - /* ndb_share reference binlog extra free */ - DBUG_PRINT("NDB_SHARE", ("%s binlog extra free use_count: %u", - ndb_schema_share->key, - ndb_schema_share->use_count)); - free_share(&ndb_schema_share); - ndb_schema_share= 0; - ndb_binlog_tables_inited= 0; - mysql_mutex_unlock(&ndb_schema_share_mutex); - /* end protect ndb_schema_share */ - - close_cached_tables(NULL, NULL, FALSE, LONG_TIMEOUT); - // fall through - case NDBEVENT::TE_ALTER: - ndb_handle_schema_change(thd, ndb, pOp, tmp_share); - break; - case NDBEVENT::TE_NODE_FAILURE: - { - uint8 node_id= g_node_id_map[pOp->getNdbdNodeId()]; - DBUG_ASSERT(node_id != 0xFF); - mysql_mutex_lock(&tmp_share->mutex); - bitmap_clear_all(&tmp_share->subscriber_bitmap[node_id]); - DBUG_PRINT("info",("NODE_FAILURE UNSUBSCRIBE[%d]", node_id)); - if (opt_ndb_extra_logging) - { - sql_print_information("NDB Binlog: Node: %d, down," - " Subscriber bitmask %x%x", - pOp->getNdbdNodeId(), - tmp_share->subscriber_bitmap[node_id].bitmap[1], - tmp_share->subscriber_bitmap[node_id].bitmap[0]); - } - mysql_mutex_unlock(&tmp_share->mutex); - mysql_cond_signal(&injector_cond); - break; - } - case NDBEVENT::TE_SUBSCRIBE: - { - uint8 node_id= g_node_id_map[pOp->getNdbdNodeId()]; - uint8 req_id= pOp->getReqNodeId(); - DBUG_ASSERT(req_id != 0 && node_id != 0xFF); - mysql_mutex_lock(&tmp_share->mutex); - bitmap_set_bit(&tmp_share->subscriber_bitmap[node_id], req_id); - DBUG_PRINT("info",("SUBSCRIBE[%d] %d", node_id, req_id)); - if (opt_ndb_extra_logging) - { - sql_print_information("NDB Binlog: Node: %d, subscribe from node %d," - " Subscriber bitmask %x%x", - pOp->getNdbdNodeId(), - req_id, - tmp_share->subscriber_bitmap[node_id].bitmap[1], - tmp_share->subscriber_bitmap[node_id].bitmap[0]); - } - mysql_mutex_unlock(&tmp_share->mutex); - mysql_cond_signal(&injector_cond); - break; - } - case NDBEVENT::TE_UNSUBSCRIBE: - { - uint8 node_id= g_node_id_map[pOp->getNdbdNodeId()]; - uint8 req_id= pOp->getReqNodeId(); - DBUG_ASSERT(req_id != 0 && node_id != 0xFF); - mysql_mutex_lock(&tmp_share->mutex); - bitmap_clear_bit(&tmp_share->subscriber_bitmap[node_id], req_id); - DBUG_PRINT("info",("UNSUBSCRIBE[%d] %d", node_id, req_id)); - if (opt_ndb_extra_logging) - { - sql_print_information("NDB Binlog: Node: %d, unsubscribe from node %d," - " Subscriber bitmask %x%x", - pOp->getNdbdNodeId(), - req_id, - tmp_share->subscriber_bitmap[node_id].bitmap[1], - tmp_share->subscriber_bitmap[node_id].bitmap[0]); - } - mysql_mutex_unlock(&tmp_share->mutex); - mysql_cond_signal(&injector_cond); - break; - } - default: - sql_print_error("NDB Binlog: unknown non data event %d for %s. " - "Ignoring...", (unsigned) ev_type, tmp_share->key); - } - } - DBUG_RETURN(0); -} - -/* - process any operations that should be done after - the epoch is complete -*/ -static void -ndb_binlog_thread_handle_schema_event_post_epoch(THD *thd, - List<Cluster_schema> - *post_epoch_log_list, - List<Cluster_schema> - *post_epoch_unlock_list) -{ - if (post_epoch_log_list->elements == 0) - return; - DBUG_ENTER("ndb_binlog_thread_handle_schema_event_post_epoch"); - Cluster_schema *schema; - while ((schema= post_epoch_log_list->pop())) - { - DBUG_PRINT("info", - ("%s.%s: log query_length: %d query: '%s' type: %d", - schema->db, schema->name, - schema->query_length, schema->query, - schema->type)); - int log_query= 0; - { - enum SCHEMA_OP_TYPE schema_type= (enum SCHEMA_OP_TYPE)schema->type; - char key[FN_REFLEN + 1]; - build_table_filename(key, sizeof(key) - 1, schema->db, schema->name, "", 0); - if (schema_type == SOT_CLEAR_SLOCK) - { - mysql_mutex_lock(&ndbcluster_mutex); - NDB_SCHEMA_OBJECT *ndb_schema_object= - (NDB_SCHEMA_OBJECT*) my_hash_search(&ndb_schema_objects, - (uchar*) key, strlen(key)); - if (ndb_schema_object) - { - mysql_mutex_lock(&ndb_schema_object->mutex); - memcpy(ndb_schema_object->slock, schema->slock, - sizeof(ndb_schema_object->slock)); - DBUG_DUMP("ndb_schema_object->slock_bitmap.bitmap", - (uchar*)ndb_schema_object->slock_bitmap.bitmap, - no_bytes_in_map(&ndb_schema_object->slock_bitmap)); - mysql_mutex_unlock(&ndb_schema_object->mutex); - mysql_cond_signal(&injector_cond); - } - mysql_mutex_unlock(&ndbcluster_mutex); - continue; - } - /* ndb_share reference temporary, free below */ - NDB_SHARE *share= get_share(key, 0, FALSE, FALSE); - if (share) - { - DBUG_PRINT("NDB_SHARE", ("%s temporary use_count: %u", - share->key, share->use_count)); - } - switch (schema_type) - { - case SOT_DROP_DB: - log_query= 1; - break; - case SOT_DROP_TABLE: - log_query= 1; - // invalidation already handled by binlog thread - if (share && share->op) - { - break; - } - // fall through - case SOT_RENAME_TABLE: - // fall through - case SOT_ALTER_TABLE: - // invalidation already handled by binlog thread - if (!share || !share->op) - { - { - injector_ndb->setDatabaseName(schema->db); - Ndb_table_guard ndbtab_g(injector_ndb->getDictionary(), - schema->name); - ndbtab_g.invalidate(); - } - TABLE_LIST table_list; - bzero((char*) &table_list,sizeof(table_list)); - table_list.db= schema->db; - table_list.alias= table_list.table_name= schema->name; - close_cached_tables(thd, &table_list, FALSE, LONG_TIMEOUT); - } - if (schema_type != SOT_ALTER_TABLE) - break; - // fall through - case SOT_RENAME_TABLE_NEW: - log_query= 1; - if (ndb_binlog_running && (!share || !share->op)) - { - /* - we need to free any share here as command below - may need to call handle_trailing_share - */ - if (share) - { - /* ndb_share reference temporary free */ - DBUG_PRINT("NDB_SHARE", ("%s temporary free use_count: %u", - share->key, share->use_count)); - free_share(&share); - share= 0; - } - if (ndbcluster_check_if_local_table(schema->db, schema->name)) - { - DBUG_PRINT("info", ("NDB Binlog: Skipping locally defined table '%s.%s'", - schema->db, schema->name)); - sql_print_error("NDB Binlog: Skipping locally defined table '%s.%s' from " - "binlog schema event '%s' from node %d. ", - schema->db, schema->name, schema->query, - schema->node_id); - } - else if (ndb_create_table_from_engine(thd, schema->db, schema->name)) - { - print_could_not_discover_error(thd, schema); - } - } - break; - default: - DBUG_ASSERT(FALSE); - } - if (share) - { - /* ndb_share reference temporary free */ - DBUG_PRINT("NDB_SHARE", ("%s temporary free use_count: %u", - share->key, share->use_count)); - free_share(&share); - share= 0; - } - } - if (ndb_binlog_running && log_query) - ndb_binlog_query(thd, schema); - } - while ((schema= post_epoch_unlock_list->pop())) - { - ndbcluster_update_slock(thd, schema->db, schema->name); - } - DBUG_VOID_RETURN; -} - -/* - Timer class for doing performance measurements -*/ - -/********************************************************************* - Internal helper functions for handeling of the cluster replication tables - - ndb_binlog_index - - ndb_apply_status -*********************************************************************/ - -/* - struct to hold the data to be inserted into the - ndb_binlog_index table -*/ -struct ndb_binlog_index_row { - ulonglong gci; - const char *master_log_file; - ulonglong master_log_pos; - ulonglong n_inserts; - ulonglong n_updates; - ulonglong n_deletes; - ulonglong n_schemaops; -}; - -/* - Open the ndb_binlog_index table -*/ -static int open_ndb_binlog_index(THD *thd, TABLE **ndb_binlog_index) -{ - static char repdb[]= NDB_REP_DB; - static char reptable[]= NDB_REP_TABLE; - const char *save_proc_info= thd->proc_info; - TABLE_LIST *tables= &binlog_tables; - - tables->init_one_table(repdb, strlen(repdb), reptable, strlen(reptable), - reptable, TL_WRITE); - thd->proc_info= "Opening " NDB_REP_DB "." NDB_REP_TABLE; - - tables->required_type= FRMTYPE_TABLE; - thd->clear_error(); - if (open_and_lock_tables(thd, tables, FALSE, 0)) - { - if (thd->killed) - sql_print_error("NDB Binlog: Opening ndb_binlog_index: killed"); - else - sql_print_error("NDB Binlog: Opening ndb_binlog_index: %d, '%s'", - thd->get_stmt_da()->sql_errno(), - thd->get_stmt_da()->message()); - thd->proc_info= save_proc_info; - return -1; - } - *ndb_binlog_index= tables->table; - thd->proc_info= save_proc_info; - (*ndb_binlog_index)->use_all_columns(); - return 0; -} - - -/* - Insert one row in the ndb_binlog_index -*/ - -int ndb_add_ndb_binlog_index(THD *thd, void *_row) -{ - ndb_binlog_index_row &row= *(ndb_binlog_index_row *) _row; - int error= 0; - /* - Turn of binlogging to prevent the table changes to be written to - the binary log. - */ - ulong saved_options= thd->variables.option_bits; - thd->variables.option_bits&= ~OPTION_BIN_LOG; - - if (!ndb_binlog_index && open_ndb_binlog_index(thd, &ndb_binlog_index)) - { - sql_print_error("NDB Binlog: Unable to lock table ndb_binlog_index"); - error= -1; - goto add_ndb_binlog_index_err; - } - - /* - Intialize ndb_binlog_index->record[0] - */ - empty_record(ndb_binlog_index); - - ndb_binlog_index->field[0]->store(row.master_log_pos); - ndb_binlog_index->field[1]->store(row.master_log_file, - strlen(row.master_log_file), - &my_charset_bin); - ndb_binlog_index->field[2]->store(row.gci); - ndb_binlog_index->field[3]->store(row.n_inserts); - ndb_binlog_index->field[4]->store(row.n_updates); - ndb_binlog_index->field[5]->store(row.n_deletes); - ndb_binlog_index->field[6]->store(row.n_schemaops); - - if ((error= ndb_binlog_index->file->ha_write_row(ndb_binlog_index->record[0]))) - { - sql_print_error("NDB Binlog: Writing row to ndb_binlog_index: %d", error); - error= -1; - goto add_ndb_binlog_index_err; - } - -add_ndb_binlog_index_err: - thd->get_stmt_da()->set_overwrite_status(true); - thd->is_error() ? trans_rollback_stmt(thd) : trans_commit_stmt(thd); - thd->get_stmt_da()->set_overwrite_status(false); - close_thread_tables(thd); - /* - There should be no need for rolling back transaction due to deadlock - (since ndb_binlog_index is non transactional). - */ - DBUG_ASSERT(! thd->transaction_rollback_request); - - thd->mdl_context.release_transactional_locks(); - ndb_binlog_index= 0; - thd->variables.option_bits= saved_options; - return error; -} - -/********************************************************************* - Functions for start, stop, wait for ndbcluster binlog thread -*********************************************************************/ - -enum Binlog_thread_state -{ - BCCC_running= 0, - BCCC_exit= 1, - BCCC_restart= 2 -}; - -static enum Binlog_thread_state do_ndbcluster_binlog_close_connection= BCCC_restart; - -int ndbcluster_binlog_start() -{ - DBUG_ENTER("ndbcluster_binlog_start"); - - if (::server_id == 0) - { - sql_print_warning("NDB: server id set to zero will cause any other mysqld " - "with bin log to log with wrong server id"); - } - else if (::server_id & 0x1 << 31) - { - sql_print_error("NDB: server id's with high bit set is reserved for internal " - "purposes"); - DBUG_RETURN(-1); - } - - mysql_mutex_init(key_injector_mutex, &injector_mutex, MY_MUTEX_INIT_FAST); - mysql_cond_init(key_injector_cond, &injector_cond, NULL); - mysql_mutex_init(key_ndb_schema_share_mutex, - &ndb_schema_share_mutex, MY_MUTEX_INIT_FAST); - - /* Create injector thread */ - if (mysql_thread_create(key_thread_ndb_binlog, - &ndb_binlog_thread, &connection_attrib, - ndb_binlog_thread_func, 0)) - { - DBUG_PRINT("error", ("Could not create ndb injector thread")); - mysql_cond_destroy(&injector_cond); - mysql_mutex_destroy(&injector_mutex); - DBUG_RETURN(-1); - } - - ndbcluster_binlog_inited= 1; - - /* Wait for the injector thread to start */ - mysql_mutex_lock(&injector_mutex); - while (!ndb_binlog_thread_running) - mysql_cond_wait(&injector_cond, &injector_mutex); - mysql_mutex_unlock(&injector_mutex); - - if (ndb_binlog_thread_running < 0) - DBUG_RETURN(-1); - - DBUG_RETURN(0); -} - - -/************************************************************** - Internal helper functions for creating/dropping ndb events - used by the client sql threads -**************************************************************/ -void -ndb_rep_event_name(String *event_name,const char *db, const char *tbl) -{ - event_name->set_ascii("REPL$", 5); - event_name->append(db); - if (tbl) - { - event_name->append('/'); - event_name->append(tbl); - } -} - -bool -ndbcluster_check_if_local_table(const char *dbname, const char *tabname) -{ - char key[FN_REFLEN + 1]; - char ndb_file[FN_REFLEN + 1]; - - DBUG_ENTER("ndbcluster_check_if_local_table"); - build_table_filename(key, FN_LEN-1, dbname, tabname, reg_ext, 0); - build_table_filename(ndb_file, FN_LEN-1, dbname, tabname, ha_ndb_ext, 0); - /* Check that any defined table is an ndb table */ - DBUG_PRINT("info", ("Looking for file %s and %s", key, ndb_file)); - if ((! my_access(key, F_OK)) && my_access(ndb_file, F_OK)) - { - DBUG_PRINT("info", ("table file %s not on disk, local table", ndb_file)); - - - DBUG_RETURN(true); - } - - DBUG_RETURN(false); -} - -bool -ndbcluster_check_if_local_tables_in_db(THD *thd, const char *dbname) -{ - DBUG_ENTER("ndbcluster_check_if_local_tables_in_db"); - DBUG_PRINT("info", ("Looking for files in directory %s", dbname)); - LEX_STRING *tabname; - List<LEX_STRING> files; - char path[FN_REFLEN + 1]; - - build_table_filename(path, sizeof(path) - 1, dbname, "", "", 0); - if (find_files(thd, &files, dbname, path, NullS, 0) != FIND_FILES_OK) - { - DBUG_PRINT("info", ("Failed to find files")); - DBUG_RETURN(true); - } - DBUG_PRINT("info",("found: %d files", files.elements)); - while ((tabname= files.pop())) - { - DBUG_PRINT("info", ("Found table %s", tabname->str)); - if (ndbcluster_check_if_local_table(dbname, tabname->str)) - DBUG_RETURN(true); - } - - DBUG_RETURN(false); -} - -/* - Common function for setting up everything for logging a table at - create/discover. -*/ -int ndbcluster_create_binlog_setup(Ndb *ndb, const char *key, - uint key_len, - const char *db, - const char *table_name, - my_bool share_may_exist) -{ - int do_event_op= ndb_binlog_running; - DBUG_ENTER("ndbcluster_create_binlog_setup"); - DBUG_PRINT("enter",("key: %s key_len: %d %s.%s share_may_exist: %d", - key, key_len, db, table_name, share_may_exist)); - DBUG_ASSERT(! IS_NDB_BLOB_PREFIX(table_name)); - DBUG_ASSERT(strlen(key) == key_len); - - mysql_mutex_lock(&ndbcluster_mutex); - - /* Handle any trailing share */ - NDB_SHARE *share= (NDB_SHARE*) my_hash_search(&ndbcluster_open_tables, - (uchar*) key, key_len); - - if (share && share_may_exist) - { - if (share->flags & NSF_NO_BINLOG || - share->op != 0 || - share->op_old != 0) - { - mysql_mutex_unlock(&ndbcluster_mutex); - DBUG_RETURN(0); // replication already setup, or should not - } - } - - if (share) - { - if (share->op || share->op_old) - { - my_errno= HA_ERR_TABLE_EXIST; - mysql_mutex_unlock(&ndbcluster_mutex); - DBUG_RETURN(1); - } - if (!share_may_exist || share->connect_count != - g_ndb_cluster_connection->get_connect_count()) - { - handle_trailing_share(share); - share= NULL; - } - } - - /* Create share which is needed to hold replication information */ - if (share) - { - /* ndb_share reference create */ - ++share->use_count; - DBUG_PRINT("NDB_SHARE", ("%s create use_count: %u", - share->key, share->use_count)); - } - /* ndb_share reference create */ - else if (!(share= get_share(key, 0, TRUE, TRUE))) - { - sql_print_error("NDB Binlog: " - "allocating table share for %s failed", key); - } - else - { - DBUG_PRINT("NDB_SHARE", ("%s create use_count: %u", - share->key, share->use_count)); - } - - if (!ndb_schema_share && - strcmp(share->db, NDB_REP_DB) == 0 && - strcmp(share->table_name, NDB_SCHEMA_TABLE) == 0) - do_event_op= 1; - else if (!ndb_apply_status_share && - strcmp(share->db, NDB_REP_DB) == 0 && - strcmp(share->table_name, NDB_APPLY_TABLE) == 0) - do_event_op= 1; - - if (!do_event_op) - { - share->flags|= NSF_NO_BINLOG; - mysql_mutex_unlock(&ndbcluster_mutex); - DBUG_RETURN(0); - } - mysql_mutex_unlock(&ndbcluster_mutex); - - while (share && !IS_TMP_PREFIX(table_name)) - { - /* - ToDo make sanity check of share so that the table is actually the same - I.e. we need to do open file from frm in this case - Currently awaiting this to be fixed in the 4.1 tree in the general - case - */ - - /* Create the event in NDB */ - ndb->setDatabaseName(db); - - NDBDICT *dict= ndb->getDictionary(); - Ndb_table_guard ndbtab_g(dict, table_name); - const NDBTAB *ndbtab= ndbtab_g.get_table(); - if (ndbtab == 0) - { - if (opt_ndb_extra_logging) - sql_print_information("NDB Binlog: Failed to get table %s from ndb: " - "%s, %d", key, dict->getNdbError().message, - dict->getNdbError().code); - break; // error - } - String event_name(INJECTOR_EVENT_LEN); - ndb_rep_event_name(&event_name, db, table_name); - /* - event should have been created by someone else, - but let's make sure, and create if it doesn't exist - */ - const NDBEVENT *ev= dict->getEvent(event_name.c_ptr()); - if (!ev) - { - if (ndbcluster_create_event(ndb, ndbtab, event_name.c_ptr(), share)) - { - sql_print_error("NDB Binlog: " - "FAILED CREATE (DISCOVER) TABLE Event: %s", - event_name.c_ptr()); - break; // error - } - if (opt_ndb_extra_logging) - sql_print_information("NDB Binlog: " - "CREATE (DISCOVER) TABLE Event: %s", - event_name.c_ptr()); - } - else - { - delete ev; - if (opt_ndb_extra_logging) - sql_print_information("NDB Binlog: DISCOVER TABLE Event: %s", - event_name.c_ptr()); - } - - /* - create the event operations for receiving logging events - */ - if (ndbcluster_create_event_ops(share, ndbtab, event_name.c_ptr())) - { - sql_print_error("NDB Binlog:" - "FAILED CREATE (DISCOVER) EVENT OPERATIONS Event: %s", - event_name.c_ptr()); - /* a warning has been issued to the client */ - DBUG_RETURN(0); - } - DBUG_RETURN(0); - } - DBUG_RETURN(-1); -} - -int -ndbcluster_create_event(Ndb *ndb, const NDBTAB *ndbtab, - const char *event_name, NDB_SHARE *share, - int push_warning) -{ - THD *thd= current_thd; - DBUG_ENTER("ndbcluster_create_event"); - DBUG_PRINT("info", ("table=%s version=%d event=%s share=%s", - ndbtab->getName(), ndbtab->getObjectVersion(), - event_name, share ? share->key : "(nil)")); - DBUG_ASSERT(! IS_NDB_BLOB_PREFIX(ndbtab->getName())); - if (!share) - { - DBUG_PRINT("info", ("share == NULL")); - DBUG_RETURN(0); - } - if (share->flags & NSF_NO_BINLOG) - { - DBUG_PRINT("info", ("share->flags & NSF_NO_BINLOG, flags: %x %d", - share->flags, share->flags & NSF_NO_BINLOG)); - DBUG_RETURN(0); - } - - NDBDICT *dict= ndb->getDictionary(); - NDBEVENT my_event(event_name); - my_event.setTable(*ndbtab); - my_event.addTableEvent(NDBEVENT::TE_ALL); - if (share->flags & NSF_HIDDEN_PK) - { - if (share->flags & NSF_BLOB_FLAG) - { - sql_print_error("NDB Binlog: logging of table %s " - "with BLOB attribute and no PK is not supported", - share->key); - if (push_warning) - push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, - ER_ILLEGAL_HA_CREATE_OPTION, - ER(ER_ILLEGAL_HA_CREATE_OPTION), - ndbcluster_hton_name, - "Binlog of table with BLOB attribute and no PK"); - - share->flags|= NSF_NO_BINLOG; - DBUG_RETURN(-1); - } - /* No primary key, subscribe for all attributes */ - my_event.setReport(NDBEVENT::ER_ALL); - DBUG_PRINT("info", ("subscription all")); - } - else - { - if (ndb_schema_share || strcmp(share->db, NDB_REP_DB) || - strcmp(share->table_name, NDB_SCHEMA_TABLE)) - { - my_event.setReport(NDBEVENT::ER_UPDATED); - DBUG_PRINT("info", ("subscription only updated")); - } - else - { - my_event.setReport((NDBEVENT::EventReport) - (NDBEVENT::ER_ALL | NDBEVENT::ER_SUBSCRIBE)); - DBUG_PRINT("info", ("subscription all and subscribe")); - } - } - if (share->flags & NSF_BLOB_FLAG) - my_event.mergeEvents(TRUE); - - /* add all columns to the event */ - int n_cols= ndbtab->getNoOfColumns(); - for(int a= 0; a < n_cols; a++) - my_event.addEventColumn(a); - - if (dict->createEvent(my_event)) // Add event to database - { - if (dict->getNdbError().classification != NdbError::SchemaObjectExists) - { - /* - failed, print a warning - */ - if (push_warning > 1) - push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, - ER_GET_ERRMSG, ER(ER_GET_ERRMSG), - dict->getNdbError().code, - dict->getNdbError().message, "NDB"); - sql_print_error("NDB Binlog: Unable to create event in database. " - "Event: %s Error Code: %d Message: %s", event_name, - dict->getNdbError().code, dict->getNdbError().message); - DBUG_RETURN(-1); - } - - /* - try retrieving the event, if table version/id matches, we will get - a valid event. Otherwise we have a trailing event from before - */ - const NDBEVENT *ev; - if ((ev= dict->getEvent(event_name))) - { - delete ev; - DBUG_RETURN(0); - } - - /* - trailing event from before; an error, but try to correct it - */ - if (dict->getNdbError().code == NDB_INVALID_SCHEMA_OBJECT && - dict->dropEvent(my_event.getName())) - { - if (push_warning > 1) - push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, - ER_GET_ERRMSG, ER(ER_GET_ERRMSG), - dict->getNdbError().code, - dict->getNdbError().message, "NDB"); - sql_print_error("NDB Binlog: Unable to create event in database. " - " Attempt to correct with drop failed. " - "Event: %s Error Code: %d Message: %s", - event_name, - dict->getNdbError().code, - dict->getNdbError().message); - DBUG_RETURN(-1); - } - - /* - try to add the event again - */ - if (dict->createEvent(my_event)) - { - if (push_warning > 1) - push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, - ER_GET_ERRMSG, ER(ER_GET_ERRMSG), - dict->getNdbError().code, - dict->getNdbError().message, "NDB"); - sql_print_error("NDB Binlog: Unable to create event in database. " - " Attempt to correct with drop ok, but create failed. " - "Event: %s Error Code: %d Message: %s", - event_name, - dict->getNdbError().code, - dict->getNdbError().message); - DBUG_RETURN(-1); - } -#ifdef NDB_BINLOG_EXTRA_WARNINGS - push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, - ER_GET_ERRMSG, ER(ER_GET_ERRMSG), - 0, "NDB Binlog: Removed trailing event", - "NDB"); -#endif - } - - DBUG_RETURN(0); -} - -inline int is_ndb_compatible_type(Field *field) -{ - return - !(field->flags & BLOB_FLAG) && - field->type() != MYSQL_TYPE_BIT && - field->pack_length() != 0; -} - -/* - - create eventOperations for receiving log events - - setup ndb recattrs for reception of log event data - - "start" the event operation - - used at create/discover of tables -*/ -int -ndbcluster_create_event_ops(NDB_SHARE *share, const NDBTAB *ndbtab, - const char *event_name) -{ - THD *thd= current_thd; - /* - we are in either create table or rename table so table should be - locked, hence we can work with the share without locks - */ - - DBUG_ENTER("ndbcluster_create_event_ops"); - DBUG_PRINT("enter", ("table: %s event: %s", ndbtab->getName(), event_name)); - DBUG_ASSERT(! IS_NDB_BLOB_PREFIX(ndbtab->getName())); - - DBUG_ASSERT(share != 0); - - if (share->flags & NSF_NO_BINLOG) - { - DBUG_PRINT("info", ("share->flags & NSF_NO_BINLOG, flags: %x", - share->flags)); - DBUG_RETURN(0); - } - - int do_ndb_schema_share= 0, do_ndb_apply_status_share= 0; - if (!ndb_schema_share && strcmp(share->db, NDB_REP_DB) == 0 && - strcmp(share->table_name, NDB_SCHEMA_TABLE) == 0) - do_ndb_schema_share= 1; - else if (!ndb_apply_status_share && strcmp(share->db, NDB_REP_DB) == 0 && - strcmp(share->table_name, NDB_APPLY_TABLE) == 0) - do_ndb_apply_status_share= 1; - else if (!binlog_filter->db_ok(share->db) || !ndb_binlog_running) - { - share->flags|= NSF_NO_BINLOG; - DBUG_RETURN(0); - } - - if (share->op) - { - assert(share->op->getCustomData() == (void *) share); - - DBUG_ASSERT(share->use_count > 1); - sql_print_error("NDB Binlog: discover reusing old ev op"); - /* ndb_share reference ToDo free */ - DBUG_PRINT("NDB_SHARE", ("%s ToDo free use_count: %u", - share->key, share->use_count)); - free_share(&share); // old event op already has reference - DBUG_RETURN(0); - } - - TABLE *table= share->table; - - int retries= 100; - /* - 100 milliseconds, temporary error on schema operation can - take some time to be resolved - */ - int retry_sleep= 100; - while (1) - { - mysql_mutex_lock(&injector_mutex); - Ndb *ndb= injector_ndb; - if (do_ndb_schema_share) - ndb= schema_ndb; - - if (ndb == 0) - { - mysql_mutex_unlock(&injector_mutex); - DBUG_RETURN(-1); - } - - NdbEventOperation* op; - if (do_ndb_schema_share) - op= ndb->createEventOperation(event_name); - else - { - // set injector_ndb database/schema from table internal name - int ret= ndb->setDatabaseAndSchemaName(ndbtab); - assert(ret == 0); - op= ndb->createEventOperation(event_name); - // reset to catch errors - ndb->setDatabaseName(""); - } - if (!op) - { - sql_print_error("NDB Binlog: Creating NdbEventOperation failed for" - " %s",event_name); - push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, - ER_GET_ERRMSG, ER(ER_GET_ERRMSG), - ndb->getNdbError().code, - ndb->getNdbError().message, - "NDB"); - mysql_mutex_unlock(&injector_mutex); - DBUG_RETURN(-1); - } - - if (share->flags & NSF_BLOB_FLAG) - op->mergeEvents(TRUE); // currently not inherited from event - - DBUG_PRINT("info", ("share->ndb_value[0]: 0x%lx share->ndb_value[1]: 0x%lx", - (long) share->ndb_value[0], - (long) share->ndb_value[1])); - int n_columns= ndbtab->getNoOfColumns(); - int n_fields= table ? table->s->fields : 0; // XXX ??? - for (int j= 0; j < n_columns; j++) - { - const char *col_name= ndbtab->getColumn(j)->getName(); - NdbValue attr0, attr1; - if (j < n_fields) - { - Field *f= share->table->field[j]; - if (is_ndb_compatible_type(f)) - { - DBUG_PRINT("info", ("%s compatible", col_name)); - attr0.rec= op->getValue(col_name, (char*) f->ptr); - attr1.rec= op->getPreValue(col_name, - (f->ptr - share->table->record[0]) + - (char*) share->table->record[1]); - } - else if (! (f->flags & BLOB_FLAG)) - { - DBUG_PRINT("info", ("%s non compatible", col_name)); - attr0.rec= op->getValue(col_name); - attr1.rec= op->getPreValue(col_name); - } - else - { - DBUG_PRINT("info", ("%s blob", col_name)); - DBUG_ASSERT(share->flags & NSF_BLOB_FLAG); - attr0.blob= op->getBlobHandle(col_name); - attr1.blob= op->getPreBlobHandle(col_name); - if (attr0.blob == NULL || attr1.blob == NULL) - { - sql_print_error("NDB Binlog: Creating NdbEventOperation" - " blob field %u handles failed (code=%d) for %s", - j, op->getNdbError().code, event_name); - push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, - ER_GET_ERRMSG, ER(ER_GET_ERRMSG), - op->getNdbError().code, - op->getNdbError().message, - "NDB"); - ndb->dropEventOperation(op); - mysql_mutex_unlock(&injector_mutex); - DBUG_RETURN(-1); - } - } - } - else - { - DBUG_PRINT("info", ("%s hidden key", col_name)); - attr0.rec= op->getValue(col_name); - attr1.rec= op->getPreValue(col_name); - } - share->ndb_value[0][j].ptr= attr0.ptr; - share->ndb_value[1][j].ptr= attr1.ptr; - DBUG_PRINT("info", ("&share->ndb_value[0][%d]: 0x%lx " - "share->ndb_value[0][%d]: 0x%lx", - j, (long) &share->ndb_value[0][j], - j, (long) attr0.ptr)); - DBUG_PRINT("info", ("&share->ndb_value[1][%d]: 0x%lx " - "share->ndb_value[1][%d]: 0x%lx", - j, (long) &share->ndb_value[0][j], - j, (long) attr1.ptr)); - } - op->setCustomData((void *) share); // set before execute - share->op= op; // assign op in NDB_SHARE - if (op->execute()) - { - share->op= NULL; - retries--; - if (op->getNdbError().status != NdbError::TemporaryError && - op->getNdbError().code != 1407) - retries= 0; - if (retries == 0) - { - push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, - ER_GET_ERRMSG, ER(ER_GET_ERRMSG), - op->getNdbError().code, op->getNdbError().message, - "NDB"); - sql_print_error("NDB Binlog: ndbevent->execute failed for %s; %d %s", - event_name, - op->getNdbError().code, op->getNdbError().message); - } - ndb->dropEventOperation(op); - mysql_mutex_unlock(&injector_mutex); - if (retries) - { - my_sleep(retry_sleep); - continue; - } - DBUG_RETURN(-1); - } - mysql_mutex_unlock(&injector_mutex); - break; - } - - /* ndb_share reference binlog */ - get_share(share); - DBUG_PRINT("NDB_SHARE", ("%s binlog use_count: %u", - share->key, share->use_count)); - if (do_ndb_apply_status_share) - { - /* ndb_share reference binlog extra */ - ndb_apply_status_share= get_share(share); - DBUG_PRINT("NDB_SHARE", ("%s binlog extra use_count: %u", - share->key, share->use_count)); - mysql_cond_signal(&injector_cond); - } - else if (do_ndb_schema_share) - { - /* ndb_share reference binlog extra */ - ndb_schema_share= get_share(share); - DBUG_PRINT("NDB_SHARE", ("%s binlog extra use_count: %u", - share->key, share->use_count)); - mysql_cond_signal(&injector_cond); - } - - DBUG_PRINT("info",("%s share->op: 0x%lx share->use_count: %u", - share->key, (long) share->op, share->use_count)); - - if (opt_ndb_extra_logging) - sql_print_information("NDB Binlog: logging %s", share->key); - DBUG_RETURN(0); -} - -/* - when entering the calling thread should have a share lock id share != 0 - then the injector thread will have one as well, i.e. share->use_count == 0 - (unless it has already dropped... then share->op == 0) -*/ -int -ndbcluster_handle_drop_table(Ndb *ndb, const char *event_name, - NDB_SHARE *share, const char *type_str) -{ - DBUG_ENTER("ndbcluster_handle_drop_table"); - THD *thd= current_thd; - - NDBDICT *dict= ndb->getDictionary(); - if (event_name && dict->dropEvent(event_name)) - { - if (dict->getNdbError().code != 4710) - { - /* drop event failed for some reason, issue a warning */ - push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, - ER_GET_ERRMSG, ER(ER_GET_ERRMSG), - dict->getNdbError().code, - dict->getNdbError().message, "NDB"); - /* error is not that the event did not exist */ - sql_print_error("NDB Binlog: Unable to drop event in database. " - "Event: %s Error Code: %d Message: %s", - event_name, - dict->getNdbError().code, - dict->getNdbError().message); - /* ToDo; handle error? */ - if (share && share->op && - share->op->getState() == NdbEventOperation::EO_EXECUTING && - dict->getNdbError().mysql_code != HA_ERR_NO_CONNECTION) - { - DBUG_ASSERT(FALSE); - DBUG_RETURN(-1); - } - } - } - - if (share == 0 || share->op == 0) - { - DBUG_RETURN(0); - } - -/* - Syncronized drop between client thread and injector thread is - neccessary in order to maintain ordering in the binlog, - such that the drop occurs _after_ any inserts/updates/deletes. - - The penalty for this is that the drop table becomes slow. - - This wait is however not strictly neccessary to produce a binlog - that is usable. However the slave does not currently handle - these out of order, thus we are keeping the SYNC_DROP_ defined - for now. -*/ - const char *save_proc_info= thd->proc_info; -#define SYNC_DROP_ -#ifdef SYNC_DROP_ - thd->proc_info= "Syncing ndb table schema operation and binlog"; - mysql_mutex_lock(&share->mutex); - int max_timeout= DEFAULT_SYNC_TIMEOUT; - while (share->op) - { - struct timespec abstime; - set_timespec(abstime, 1); - int ret= mysql_cond_timedwait(&injector_cond, - &share->mutex, - &abstime); - if (thd->killed || - share->op == 0) - break; - if (ret) - { - max_timeout--; - if (max_timeout == 0) - { - sql_print_error("NDB %s: %s timed out. Ignoring...", - type_str, share->key); - break; - } - if (opt_ndb_extra_logging) - ndb_report_waiting(type_str, max_timeout, - type_str, share->key); - } - } - mysql_mutex_unlock(&share->mutex); -#else - mysql_mutex_lock(&share->mutex); - share->op_old= share->op; - share->op= 0; - mysql_mutex_unlock(&share->mutex); -#endif - thd->proc_info= save_proc_info; - - DBUG_RETURN(0); -} - - -/******************************************************************** - Internal helper functions for differentd events from the stoarage nodes - used by the ndb injector thread -********************************************************************/ - -/* - Handle error states on events from the storage nodes -*/ -static int ndb_binlog_thread_handle_error(Ndb *ndb, NdbEventOperation *pOp, - ndb_binlog_index_row &row) -{ - NDB_SHARE *share= (NDB_SHARE *)pOp->getCustomData(); - DBUG_ENTER("ndb_binlog_thread_handle_error"); - - int overrun= pOp->isOverrun(); - if (overrun) - { - /* - ToDo: this error should rather clear the ndb_binlog_index... - and continue - */ - sql_print_error("NDB Binlog: Overrun in event buffer, " - "this means we have dropped events. Cannot " - "continue binlog for %s", share->key); - pOp->clearError(); - DBUG_RETURN(-1); - } - - if (!pOp->isConsistent()) - { - /* - ToDo: this error should rather clear the ndb_binlog_index... - and continue - */ - sql_print_error("NDB Binlog: Not Consistent. Cannot " - "continue binlog for %s. Error code: %d" - " Message: %s", share->key, - pOp->getNdbError().code, - pOp->getNdbError().message); - pOp->clearError(); - DBUG_RETURN(-1); - } - sql_print_error("NDB Binlog: unhandled error %d for table %s", - pOp->hasError(), share->key); - pOp->clearError(); - DBUG_RETURN(0); -} - -static int -ndb_binlog_thread_handle_non_data_event(THD *thd, Ndb *ndb, - NdbEventOperation *pOp, - ndb_binlog_index_row &row) -{ - NDB_SHARE *share= (NDB_SHARE *)pOp->getCustomData(); - NDBEVENT::TableEvent type= pOp->getEventType(); - - switch (type) - { - case NDBEVENT::TE_CLUSTER_FAILURE: - if (opt_ndb_extra_logging) - sql_print_information("NDB Binlog: cluster failure for %s at epoch %u.", - share->key, (unsigned) pOp->getGCI()); - if (ndb_apply_status_share == share) - { - if (opt_ndb_extra_logging && - ndb_binlog_tables_inited && ndb_binlog_running) - sql_print_information("NDB Binlog: ndb tables initially " - "read only on reconnect."); - /* ndb_share reference binlog extra free */ - DBUG_PRINT("NDB_SHARE", ("%s binlog extra free use_count: %u", - share->key, share->use_count)); - free_share(&ndb_apply_status_share); - ndb_apply_status_share= 0; - ndb_binlog_tables_inited= 0; - } - DBUG_PRINT("error", ("CLUSTER FAILURE EVENT: " - "%s received share: 0x%lx op: 0x%lx share op: 0x%lx " - "op_old: 0x%lx", - share->key, (long) share, (long) pOp, - (long) share->op, (long) share->op_old)); - break; - case NDBEVENT::TE_DROP: - if (ndb_apply_status_share == share) - { - if (opt_ndb_extra_logging && - ndb_binlog_tables_inited && ndb_binlog_running) - sql_print_information("NDB Binlog: ndb tables initially " - "read only on reconnect."); - /* ndb_share reference binlog extra free */ - DBUG_PRINT("NDB_SHARE", ("%s binlog extra free use_count: %u", - share->key, share->use_count)); - free_share(&ndb_apply_status_share); - ndb_apply_status_share= 0; - ndb_binlog_tables_inited= 0; - } - /* ToDo: remove printout */ - if (opt_ndb_extra_logging) - sql_print_information("NDB Binlog: drop table %s.", share->key); - // fall through - case NDBEVENT::TE_ALTER: - row.n_schemaops++; - DBUG_PRINT("info", ("TABLE %s EVENT: %s received share: 0x%lx op: 0x%lx " - "share op: 0x%lx op_old: 0x%lx", - type == NDBEVENT::TE_DROP ? "DROP" : "ALTER", - share->key, (long) share, (long) pOp, - (long) share->op, (long) share->op_old)); - break; - case NDBEVENT::TE_NODE_FAILURE: - /* fall through */ - case NDBEVENT::TE_SUBSCRIBE: - /* fall through */ - case NDBEVENT::TE_UNSUBSCRIBE: - /* ignore */ - return 0; - default: - sql_print_error("NDB Binlog: unknown non data event %d for %s. " - "Ignoring...", (unsigned) type, share->key); - return 0; - } - - ndb_handle_schema_change(thd, ndb, pOp, share); - return 0; -} - -/* - Handle data events from the storage nodes -*/ -static int -ndb_binlog_thread_handle_data_event(Ndb *ndb, NdbEventOperation *pOp, - ndb_binlog_index_row &row, - injector::transaction &trans) -{ - NDB_SHARE *share= (NDB_SHARE*) pOp->getCustomData(); - if (share == ndb_apply_status_share) - return 0; - - uint32 originating_server_id= pOp->getAnyValue(); - if (originating_server_id == 0) - originating_server_id= ::server_id; - else if (originating_server_id & NDB_ANYVALUE_RESERVED) - { - if (originating_server_id != NDB_ANYVALUE_FOR_NOLOGGING) - sql_print_warning("NDB: unknown value for binlog signalling 0x%X, " - "event not logged", - originating_server_id); - return 0; - } - else if (!g_ndb_log_slave_updates) - { - /* - This event comes from a slave applier since it has an originating - server id set. Since option to log slave updates is not set, skip it. - */ - return 0; - } - - TABLE *table= share->table; - DBUG_ASSERT(trans.good()); - DBUG_ASSERT(table != 0); - - dbug_print_table("table", table); - - TABLE_SHARE *table_s= table->s; - uint n_fields= table_s->fields; - MY_BITMAP b; - /* Potential buffer for the bitmap */ - uint32 bitbuf[128 / (sizeof(uint32) * 8)]; - my_bitmap_init(&b, n_fields <= sizeof(bitbuf) * 8 ? bitbuf : NULL, - n_fields, FALSE); - bitmap_set_all(&b); - - /* - row data is already in table->record[0] - As we told the NdbEventOperation to do this - (saves moving data about many times) - */ - - /* - for now malloc/free blobs buffer each time - TODO if possible share single permanent buffer with handlers - */ - uchar* blobs_buffer[2] = { 0, 0 }; - uint blobs_buffer_size[2] = { 0, 0 }; - - switch(pOp->getEventType()) - { - case NDBEVENT::TE_INSERT: - row.n_inserts++; - DBUG_PRINT("info", ("INSERT INTO %s.%s", - table_s->db.str, table_s->table_name.str)); - { - if (share->flags & NSF_BLOB_FLAG) - { - my_ptrdiff_t ptrdiff= 0; - int ret __attribute__((unused))= get_ndb_blobs_value(table, share->ndb_value[0], - blobs_buffer[0], - blobs_buffer_size[0], - ptrdiff); - DBUG_ASSERT(ret == 0); - } - ndb_unpack_record(table, share->ndb_value[0], &b, table->record[0]); - int ret __attribute__((unused))= trans.write_row(originating_server_id, - injector::transaction::table(table, - TRUE), - &b, n_fields, table->record[0]); - DBUG_ASSERT(ret == 0); - } - break; - case NDBEVENT::TE_DELETE: - row.n_deletes++; - DBUG_PRINT("info",("DELETE FROM %s.%s", - table_s->db.str, table_s->table_name.str)); - { - /* - table->record[0] contains only the primary key in this case - since we do not have an after image - */ - int n; - if (table->s->primary_key != MAX_KEY) - n= 0; /* - use the primary key only as it save time and space and - it is the only thing needed to log the delete - */ - else - n= 1; /* - we use the before values since we don't have a primary key - since the mysql server does not handle the hidden primary - key - */ - - if (share->flags & NSF_BLOB_FLAG) - { - my_ptrdiff_t ptrdiff= table->record[n] - table->record[0]; - int ret __attribute__((unused))= get_ndb_blobs_value(table, share->ndb_value[n], - blobs_buffer[n], - blobs_buffer_size[n], - ptrdiff); - DBUG_ASSERT(ret == 0); - } - ndb_unpack_record(table, share->ndb_value[n], &b, table->record[n]); - DBUG_EXECUTE("info", print_records(table, table->record[n]);); - int ret __attribute__((unused))= trans.delete_row(originating_server_id, - injector::transaction::table(table, - TRUE), - &b, n_fields, table->record[n]); - DBUG_ASSERT(ret == 0); - } - break; - case NDBEVENT::TE_UPDATE: - row.n_updates++; - DBUG_PRINT("info", ("UPDATE %s.%s", - table_s->db.str, table_s->table_name.str)); - { - if (share->flags & NSF_BLOB_FLAG) - { - my_ptrdiff_t ptrdiff= 0; - int ret __attribute__((unused))= get_ndb_blobs_value(table, share->ndb_value[0], - blobs_buffer[0], - blobs_buffer_size[0], - ptrdiff); - DBUG_ASSERT(ret == 0); - } - ndb_unpack_record(table, share->ndb_value[0], - &b, table->record[0]); - DBUG_EXECUTE("info", print_records(table, table->record[0]);); - if (table->s->primary_key != MAX_KEY) - { - /* - since table has a primary key, we can do a write - using only after values - */ - trans.write_row(originating_server_id, - injector::transaction::table(table, TRUE), - &b, n_fields, table->record[0]);// after values - } - else - { - /* - mysql server cannot handle the ndb hidden key and - therefore needs the before image as well - */ - if (share->flags & NSF_BLOB_FLAG) - { - my_ptrdiff_t ptrdiff= table->record[1] - table->record[0]; - int ret __attribute__((unused))= get_ndb_blobs_value(table, share->ndb_value[1], - blobs_buffer[1], - blobs_buffer_size[1], - ptrdiff); - DBUG_ASSERT(ret == 0); - } - ndb_unpack_record(table, share->ndb_value[1], &b, table->record[1]); - DBUG_EXECUTE("info", print_records(table, table->record[1]);); - int ret __attribute__((unused))= trans.update_row(originating_server_id, - injector::transaction::table(table, - TRUE), - &b, n_fields, - table->record[1], // before values - table->record[0]);// after values - DBUG_ASSERT(ret == 0); - } - } - break; - default: - /* We should REALLY never get here. */ - DBUG_PRINT("info", ("default - uh oh, a brain exploded.")); - break; - } - - if (share->flags & NSF_BLOB_FLAG) - { - my_free(blobs_buffer[0]); - my_free(blobs_buffer[1]); - } - - return 0; -} - -//#define RUN_NDB_BINLOG_TIMER -#ifdef RUN_NDB_BINLOG_TIMER -class Timer -{ -public: - Timer() { start(); } - void start() { gettimeofday(&m_start, 0); } - void stop() { gettimeofday(&m_stop, 0); } - ulong elapsed_ms() - { - return (ulong) - (((longlong) m_stop.tv_sec - (longlong) m_start.tv_sec) * 1000 + - ((longlong) m_stop.tv_usec - - (longlong) m_start.tv_usec + 999) / 1000); - } -private: - struct timeval m_start,m_stop; -}; -#endif - -/**************************************************************** - Injector thread main loop -****************************************************************/ - -static uchar * -ndb_schema_objects_get_key(NDB_SCHEMA_OBJECT *schema_object, - size_t *length, - my_bool not_used __attribute__((unused))) -{ - *length= schema_object->key_length; - return (uchar*) schema_object->key; -} - -static NDB_SCHEMA_OBJECT *ndb_get_schema_object(const char *key, - my_bool create_if_not_exists, - my_bool have_lock) -{ - NDB_SCHEMA_OBJECT *ndb_schema_object; - uint length= (uint) strlen(key); - DBUG_ENTER("ndb_get_schema_object"); - DBUG_PRINT("enter", ("key: '%s'", key)); - - if (!have_lock) - mysql_mutex_lock(&ndbcluster_mutex); - while (!(ndb_schema_object= - (NDB_SCHEMA_OBJECT*) my_hash_search(&ndb_schema_objects, - (uchar*) key, - length))) - { - if (!create_if_not_exists) - { - DBUG_PRINT("info", ("does not exist")); - break; - } - if (!(ndb_schema_object= - (NDB_SCHEMA_OBJECT*) my_malloc(sizeof(*ndb_schema_object) + length + 1, - MYF(MY_WME | MY_ZEROFILL)))) - { - DBUG_PRINT("info", ("malloc error")); - break; - } - ndb_schema_object->key= (char *)(ndb_schema_object+1); - memcpy(ndb_schema_object->key, key, length + 1); - ndb_schema_object->key_length= length; - if (my_hash_insert(&ndb_schema_objects, (uchar*) ndb_schema_object)) - { - my_free(ndb_schema_object); - break; - } - mysql_mutex_init(key_ndb_schema_object_mutex, &ndb_schema_object->mutex, MY_MUTEX_INIT_FAST); - my_bitmap_init(&ndb_schema_object->slock_bitmap, ndb_schema_object->slock, - sizeof(ndb_schema_object->slock)*8, FALSE); - bitmap_clear_all(&ndb_schema_object->slock_bitmap); - break; - } - if (ndb_schema_object) - { - ndb_schema_object->use_count++; - DBUG_PRINT("info", ("use_count: %d", ndb_schema_object->use_count)); - } - if (!have_lock) - mysql_mutex_unlock(&ndbcluster_mutex); - DBUG_RETURN(ndb_schema_object); -} - - -static void ndb_free_schema_object(NDB_SCHEMA_OBJECT **ndb_schema_object, - bool have_lock) -{ - DBUG_ENTER("ndb_free_schema_object"); - DBUG_PRINT("enter", ("key: '%s'", (*ndb_schema_object)->key)); - if (!have_lock) - mysql_mutex_lock(&ndbcluster_mutex); - if (!--(*ndb_schema_object)->use_count) - { - DBUG_PRINT("info", ("use_count: %d", (*ndb_schema_object)->use_count)); - my_hash_delete(&ndb_schema_objects, (uchar*) *ndb_schema_object); - mysql_mutex_destroy(&(*ndb_schema_object)->mutex); - my_free(*ndb_schema_object); - *ndb_schema_object= 0; - } - else - { - DBUG_PRINT("info", ("use_count: %d", (*ndb_schema_object)->use_count)); - } - if (!have_lock) - mysql_mutex_unlock(&ndbcluster_mutex); - DBUG_VOID_RETURN; -} - -extern ulong opt_ndb_report_thresh_binlog_epoch_slip; -extern ulong opt_ndb_report_thresh_binlog_mem_usage; - -pthread_handler_t ndb_binlog_thread_func(void *arg) -{ - THD *thd; /* needs to be first for thread_stack */ - Ndb *i_ndb= 0; - Ndb *s_ndb= 0; - Thd_ndb *thd_ndb=0; - int ndb_update_ndb_binlog_index= 1; - injector *inj= injector::instance(); - uint incident_id= 0; - -#ifdef RUN_NDB_BINLOG_TIMER - Timer main_timer; -#endif - - mysql_mutex_lock(&injector_mutex); - /* - Set up the Thread - */ - my_thread_init(); - DBUG_ENTER("ndb_binlog_thread"); - - thd= new THD; /* note that contructor of THD uses DBUG_ */ - THD_CHECK_SENTRY(thd); - thd->set_current_stmt_binlog_format_row(); - - /* We need to set thd->thread_id before thd->store_globals, or it will - set an invalid value for thd->variables.pseudo_thread_id. - */ - mysql_mutex_lock(&LOCK_thread_count); - thd->thread_id= thread_id++; - mysql_mutex_unlock(&LOCK_thread_count); - - mysql_thread_set_psi_id(thd->thread_id); - - thd->thread_stack= (char*) &thd; /* remember where our stack is */ - if (thd->store_globals()) - { - thd->cleanup(); - delete thd; - ndb_binlog_thread_running= -1; - mysql_mutex_unlock(&injector_mutex); - mysql_cond_signal(&injector_cond); - - DBUG_LEAVE; // Must match DBUG_ENTER() - my_thread_end(); - pthread_exit(0); - return NULL; // Avoid compiler warnings - } - - thd->init_for_queries(); - thd->command= COM_DAEMON; - thd->system_thread= SYSTEM_THREAD_NDBCLUSTER_BINLOG; - thd->main_security_ctx.host_or_ip= ""; - thd->client_capabilities= 0; - my_net_init(&thd->net, 0, MYF(MY_THREAD_SPECIFIC)); - thd->main_security_ctx.master_access= ~0; - thd->main_security_ctx.priv_user[0]= 0; - /* Do not use user-supplied timeout value for system threads. */ - thd->variables.lock_wait_timeout= LONG_TIMEOUT; - - /* - Set up ndb binlog - */ - sql_print_information("Starting MySQL Cluster Binlog Thread"); - - pthread_detach_this_thread(); - thd->real_id= pthread_self(); - mysql_mutex_lock(&LOCK_thread_count); - threads.append(thd); - mysql_mutex_unlock(&LOCK_thread_count); - thd->lex->start_transaction_opt= 0; - - if (!(s_ndb= new Ndb(g_ndb_cluster_connection, "")) || - s_ndb->init()) - { - sql_print_error("NDB Binlog: Getting Schema Ndb object failed"); - ndb_binlog_thread_running= -1; - mysql_mutex_unlock(&injector_mutex); - mysql_cond_signal(&injector_cond); - goto err; - } - - // empty database - if (!(i_ndb= new Ndb(g_ndb_cluster_connection, "")) || - i_ndb->init()) - { - sql_print_error("NDB Binlog: Getting Ndb object failed"); - ndb_binlog_thread_running= -1; - mysql_mutex_unlock(&injector_mutex); - mysql_cond_signal(&injector_cond); - goto err; - } - - /* init hash for schema object distribution */ - (void) my_hash_init(&ndb_schema_objects, system_charset_info, 32, 0, 0, - (my_hash_get_key)ndb_schema_objects_get_key, 0, 0); - - /* - Expose global reference to our ndb object. - - Used by both sql client thread and binlog thread to interact - with the storage - mysql_mutex_lock(&injector_mutex); - */ - injector_thd= thd; - injector_ndb= i_ndb; - p_latest_trans_gci= - injector_ndb->get_ndb_cluster_connection().get_latest_trans_gci(); - schema_ndb= s_ndb; - - if (opt_bin_log) - { - ndb_binlog_running= TRUE; - } - - /* Thread start up completed */ - ndb_binlog_thread_running= 1; - mysql_mutex_unlock(&injector_mutex); - mysql_cond_signal(&injector_cond); - - /* - wait for mysql server to start (so that the binlog is started - and thus can receive the first GAP event) - */ - mysql_mutex_lock(&LOCK_server_started); - while (!mysqld_server_started) - { - struct timespec abstime; - set_timespec(abstime, 1); - mysql_cond_timedwait(&COND_server_started, &LOCK_server_started, - &abstime); - if (ndbcluster_terminating) - { - mysql_mutex_unlock(&LOCK_server_started); - goto err; - } - } - mysql_mutex_unlock(&LOCK_server_started); -restart: - /* - Main NDB Injector loop - */ - while (ndb_binlog_running) - { - /* - check if it is the first log, if so we do not insert a GAP event - as there is really no log to have a GAP in - */ - if (incident_id == 0) - { - LOG_INFO log_info; - mysql_bin_log.get_current_log(&log_info); - int len= strlen(log_info.log_file_name); - uint no= 0; - if ((sscanf(log_info.log_file_name + len - 6, "%u", &no) == 1) && - no == 1) - { - /* this is the fist log, so skip GAP event */ - break; - } - } - - /* - Always insert a GAP event as we cannot know what has happened - in the cluster while not being connected. - */ - LEX_STRING const msg[2]= - { - { C_STRING_WITH_LEN("mysqld startup") }, - { C_STRING_WITH_LEN("cluster disconnect")} - }; - int error __attribute__((unused))= - inj->record_incident(thd, INCIDENT_LOST_EVENTS, msg[incident_id]); - DBUG_ASSERT(!error); - break; - } - incident_id= 1; - { - thd->proc_info= "Waiting for ndbcluster to start"; - - mysql_mutex_lock(&injector_mutex); - while (!ndb_schema_share || - (ndb_binlog_running && !ndb_apply_status_share)) - { - /* ndb not connected yet */ - struct timespec abstime; - set_timespec(abstime, 1); - mysql_cond_timedwait(&injector_cond, &injector_mutex, &abstime); - if (ndbcluster_binlog_terminating) - { - mysql_mutex_unlock(&injector_mutex); - goto err; - } - } - mysql_mutex_unlock(&injector_mutex); - - if (thd_ndb == NULL) - { - DBUG_ASSERT(ndbcluster_hton->slot != ~(uint)0); - if (!(thd_ndb= ha_ndbcluster::seize_thd_ndb())) - { - sql_print_error("Could not allocate Thd_ndb object"); - goto err; - } - set_thd_ndb(thd, thd_ndb); - thd_ndb->options|= TNO_NO_LOG_SCHEMA_OP; - thd->query_id= 0; // to keep valgrind quiet - } - } - - { - // wait for the first event - thd->proc_info= "Waiting for first event from ndbcluster"; - int schema_res, res; - Uint64 schema_gci; - do - { - DBUG_PRINT("info", ("Waiting for the first event")); - - if (ndbcluster_binlog_terminating) - goto err; - - schema_res= s_ndb->pollEvents(100, &schema_gci); - } while (schema_gci == 0 || ndb_latest_received_binlog_epoch == schema_gci); - if (ndb_binlog_running) - { - Uint64 gci= i_ndb->getLatestGCI(); - while (gci < schema_gci || gci == ndb_latest_received_binlog_epoch) - { - if (ndbcluster_binlog_terminating) - goto err; - res= i_ndb->pollEvents(10, &gci); - } - if (gci > schema_gci) - { - schema_gci= gci; - } - } - // now check that we have epochs consistant with what we had before the restart - DBUG_PRINT("info", ("schema_res: %d schema_gci: %lu", schema_res, - (long) schema_gci)); - { - i_ndb->flushIncompleteEvents(schema_gci); - s_ndb->flushIncompleteEvents(schema_gci); - if (schema_gci < ndb_latest_handled_binlog_epoch) - { - sql_print_error("NDB Binlog: cluster has been restarted --initial or with older filesystem. " - "ndb_latest_handled_binlog_epoch: %u, while current epoch: %u. " - "RESET MASTER should be issued. Resetting ndb_latest_handled_binlog_epoch.", - (unsigned) ndb_latest_handled_binlog_epoch, (unsigned) schema_gci); - *p_latest_trans_gci= 0; - ndb_latest_handled_binlog_epoch= 0; - ndb_latest_applied_binlog_epoch= 0; - ndb_latest_received_binlog_epoch= 0; - } - else if (ndb_latest_applied_binlog_epoch > 0) - { - sql_print_warning("NDB Binlog: cluster has reconnected. " - "Changes to the database that occured while " - "disconnected will not be in the binlog"); - } - if (opt_ndb_extra_logging) - { - sql_print_information("NDB Binlog: starting log at epoch %u", - (unsigned)schema_gci); - } - } - } - { - static char db[]= ""; - thd->db= db; - } - do_ndbcluster_binlog_close_connection= BCCC_running; - for ( ; !((ndbcluster_binlog_terminating || - do_ndbcluster_binlog_close_connection) && - ndb_latest_handled_binlog_epoch >= *p_latest_trans_gci) && - do_ndbcluster_binlog_close_connection != BCCC_restart; ) - { -#ifndef DBUG_OFF - if (do_ndbcluster_binlog_close_connection) - { - DBUG_PRINT("info", ("do_ndbcluster_binlog_close_connection: %d, " - "ndb_latest_handled_binlog_epoch: %lu, " - "*p_latest_trans_gci: %lu", - do_ndbcluster_binlog_close_connection, - (ulong) ndb_latest_handled_binlog_epoch, - (ulong) *p_latest_trans_gci)); - } -#endif -#ifdef RUN_NDB_BINLOG_TIMER - main_timer.stop(); - sql_print_information("main_timer %ld ms", main_timer.elapsed_ms()); - main_timer.start(); -#endif - - /* - now we don't want any events before next gci is complete - */ - thd->proc_info= "Waiting for event from ndbcluster"; - thd->set_time(); - - /* wait for event or 1000 ms */ - Uint64 gci= 0, schema_gci; - int res= 0, tot_poll_wait= 1000; - if (ndb_binlog_running) - { - res= i_ndb->pollEvents(tot_poll_wait, &gci); - tot_poll_wait= 0; - } - else - { - /* - Just consume any events, not used if no binlogging - e.g. node failure events - */ - Uint64 tmp_gci; - if (i_ndb->pollEvents(0, &tmp_gci)) - while (i_ndb->nextEvent()) - ; - } - int schema_res= s_ndb->pollEvents(tot_poll_wait, &schema_gci); - ndb_latest_received_binlog_epoch= gci; - - while (gci > schema_gci && schema_res >= 0) - { - static char buf[64]; - thd->proc_info= "Waiting for schema epoch"; - my_snprintf(buf, sizeof(buf), "%s %u(%u)", thd->proc_info, (unsigned) schema_gci, (unsigned) gci); - thd->proc_info= buf; - schema_res= s_ndb->pollEvents(10, &schema_gci); - } - - if ((ndbcluster_binlog_terminating || - do_ndbcluster_binlog_close_connection) && - (ndb_latest_handled_binlog_epoch >= *p_latest_trans_gci || - !ndb_binlog_running)) - break; /* Shutting down server */ - - if (ndb_binlog_index && ndb_binlog_index->s->has_old_version()) - { - if (ndb_binlog_index->s->has_old_version()) - { - trans_commit_stmt(thd); - close_thread_tables(thd); - thd->mdl_context.release_transactional_locks(); - ndb_binlog_index= 0; - } - } - - MEM_ROOT **root_ptr= - my_pthread_getspecific_ptr(MEM_ROOT**, THR_MALLOC); - MEM_ROOT *old_root= *root_ptr; - MEM_ROOT mem_root; - init_sql_alloc(&mem_root, 4096, 0, MYF(0)); - List<Cluster_schema> post_epoch_log_list; - List<Cluster_schema> post_epoch_unlock_list; - *root_ptr= &mem_root; - - if (unlikely(schema_res > 0)) - { - thd->proc_info= "Processing events from schema table"; - s_ndb-> - setReportThreshEventGCISlip(opt_ndb_report_thresh_binlog_epoch_slip); - s_ndb-> - setReportThreshEventFreeMem(opt_ndb_report_thresh_binlog_mem_usage); - NdbEventOperation *pOp= s_ndb->nextEvent(); - while (pOp != NULL) - { - if (!pOp->hasError()) - { - ndb_binlog_thread_handle_schema_event(thd, s_ndb, pOp, - &post_epoch_log_list, - &post_epoch_unlock_list, - &mem_root); - DBUG_PRINT("info", ("s_ndb first: %s", s_ndb->getEventOperation() ? - s_ndb->getEventOperation()->getEvent()->getTable()->getName() : - "<empty>")); - DBUG_PRINT("info", ("i_ndb first: %s", i_ndb->getEventOperation() ? - i_ndb->getEventOperation()->getEvent()->getTable()->getName() : - "<empty>")); - if (i_ndb->getEventOperation() == NULL && - s_ndb->getEventOperation() == NULL && - do_ndbcluster_binlog_close_connection == BCCC_running) - { - DBUG_PRINT("info", ("do_ndbcluster_binlog_close_connection= BCCC_restart")); - do_ndbcluster_binlog_close_connection= BCCC_restart; - if (ndb_latest_received_binlog_epoch < *p_latest_trans_gci && ndb_binlog_running) - { - sql_print_error("NDB Binlog: latest transaction in epoch %lu not in binlog " - "as latest received epoch is %lu", - (ulong) *p_latest_trans_gci, - (ulong) ndb_latest_received_binlog_epoch); - } - } - } - else - sql_print_error("NDB: error %lu (%s) on handling " - "binlog schema event", - (ulong) pOp->getNdbError().code, - pOp->getNdbError().message); - pOp= s_ndb->nextEvent(); - } - } - - if (res > 0) - { - DBUG_PRINT("info", ("pollEvents res: %d", res)); - thd->proc_info= "Processing events"; - NdbEventOperation *pOp= i_ndb->nextEvent(); - ndb_binlog_index_row row; - while (pOp != NULL) - { -#ifdef RUN_NDB_BINLOG_TIMER - Timer gci_timer, write_timer; - int event_count= 0; - gci_timer.start(); -#endif - gci= pOp->getGCI(); - DBUG_PRINT("info", ("Handling gci: %d", (unsigned)gci)); - // sometimes get TE_ALTER with invalid table - DBUG_ASSERT(pOp->getEventType() == NdbDictionary::Event::TE_ALTER || - ! IS_NDB_BLOB_PREFIX(pOp->getEvent()->getTable()->getName())); - DBUG_ASSERT(gci <= ndb_latest_received_binlog_epoch); - - /* initialize some variables for this epoch */ - g_ndb_log_slave_updates= opt_log_slave_updates; - i_ndb-> - setReportThreshEventGCISlip(opt_ndb_report_thresh_binlog_epoch_slip); - i_ndb->setReportThreshEventFreeMem(opt_ndb_report_thresh_binlog_mem_usage); - - bzero((char*) &row, sizeof(row)); - thd->variables.character_set_client= &my_charset_latin1; - injector::transaction trans; - // pass table map before epoch - { - Uint32 iter= 0; - const NdbEventOperation *gci_op; - Uint32 event_types; - while ((gci_op= i_ndb->getGCIEventOperations(&iter, &event_types)) - != NULL) - { - NDB_SHARE *share= (NDB_SHARE*)gci_op->getCustomData(); - DBUG_PRINT("info", ("per gci_op: 0x%lx share: 0x%lx event_types: 0x%x", - (long) gci_op, (long) share, event_types)); - // workaround for interface returning TE_STOP events - // which are normally filtered out below in the nextEvent loop - if ((event_types & ~NdbDictionary::Event::TE_STOP) == 0) - { - DBUG_PRINT("info", ("Skipped TE_STOP on table %s", - gci_op->getEvent()->getTable()->getName())); - continue; - } - // this should not happen - if (share == NULL || share->table == NULL) - { - DBUG_PRINT("info", ("no share or table %s!", - gci_op->getEvent()->getTable()->getName())); - continue; - } - if (share == ndb_apply_status_share) - { - // skip this table, it is handled specially - continue; - } - TABLE *table= share->table; -#ifndef DBUG_OFF - const LEX_STRING &name= table->s->table_name; -#endif - if ((event_types & (NdbDictionary::Event::TE_INSERT | - NdbDictionary::Event::TE_UPDATE | - NdbDictionary::Event::TE_DELETE)) == 0) - { - DBUG_PRINT("info", ("skipping non data event table: %.*s", - (int) name.length, name.str)); - continue; - } - if (!trans.good()) - { - DBUG_PRINT("info", - ("Found new data event, initializing transaction")); - inj->new_trans(thd, &trans); - } - DBUG_PRINT("info", ("use_table: %.*s", - (int) name.length, name.str)); - injector::transaction::table tbl(table, TRUE); - int ret __attribute__((unused))= trans.use_table(::server_id, tbl); - DBUG_ASSERT(ret == 0); - } - } - if (trans.good()) - { - if (ndb_apply_status_share) - { - TABLE *table= ndb_apply_status_share->table; - -#ifndef DBUG_OFF - const LEX_STRING& name= table->s->table_name; - DBUG_PRINT("info", ("use_table: %.*s", - (int) name.length, name.str)); -#endif - injector::transaction::table tbl(table, TRUE); - int ret __attribute__((unused))= trans.use_table(::server_id, tbl); - DBUG_ASSERT(ret == 0); - - /* - Intialize table->record[0] - */ - empty_record(table); - - table->field[0]->store((longlong)::server_id); - table->field[1]->store((longlong)gci); - table->field[2]->store("", 0, &my_charset_bin); - table->field[3]->store((longlong)0); - table->field[4]->store((longlong)0); - trans.write_row(::server_id, - injector::transaction::table(table, TRUE), - &table->s->all_set, table->s->fields, - table->record[0]); - } - else - { - sql_print_error("NDB: Could not get apply status share"); - } - } -#ifdef RUN_NDB_BINLOG_TIMER - write_timer.start(); -#endif - do - { -#ifdef RUN_NDB_BINLOG_TIMER - event_count++; -#endif - if (pOp->hasError() && - ndb_binlog_thread_handle_error(i_ndb, pOp, row) < 0) - goto err; - -#ifndef DBUG_OFF - { - NDB_SHARE *share= (NDB_SHARE*) pOp->getCustomData(); - DBUG_PRINT("info", - ("EVENT TYPE: %d GCI: %ld last applied: %ld " - "share: 0x%lx (%s.%s)", pOp->getEventType(), - (long) gci, - (long) ndb_latest_applied_binlog_epoch, - (long) share, - share ? share->db : "'NULL'", - share ? share->table_name : "'NULL'")); - DBUG_ASSERT(share != 0); - } - // assert that there is consistancy between gci op list - // and event list - { - Uint32 iter= 0; - const NdbEventOperation *gci_op; - Uint32 event_types; - while ((gci_op= i_ndb->getGCIEventOperations(&iter, &event_types)) - != NULL) - { - if (gci_op == pOp) - break; - } - DBUG_ASSERT(gci_op == pOp); - DBUG_ASSERT((event_types & pOp->getEventType()) != 0); - } -#endif - if ((unsigned) pOp->getEventType() < - (unsigned) NDBEVENT::TE_FIRST_NON_DATA_EVENT) - ndb_binlog_thread_handle_data_event(i_ndb, pOp, row, trans); - else - { - // set injector_ndb database/schema from table internal name - int ret __attribute__((unused))= - i_ndb->setDatabaseAndSchemaName(pOp->getEvent()->getTable()); - DBUG_ASSERT(ret == 0); - ndb_binlog_thread_handle_non_data_event(thd, i_ndb, pOp, row); - // reset to catch errors - i_ndb->setDatabaseName(""); - DBUG_PRINT("info", ("s_ndb first: %s", s_ndb->getEventOperation() ? - s_ndb->getEventOperation()->getEvent()->getTable()->getName() : - "<empty>")); - DBUG_PRINT("info", ("i_ndb first: %s", i_ndb->getEventOperation() ? - i_ndb->getEventOperation()->getEvent()->getTable()->getName() : - "<empty>")); - if (i_ndb->getEventOperation() == NULL && - s_ndb->getEventOperation() == NULL && - do_ndbcluster_binlog_close_connection == BCCC_running) - { - DBUG_PRINT("info", ("do_ndbcluster_binlog_close_connection= BCCC_restart")); - do_ndbcluster_binlog_close_connection= BCCC_restart; - if (ndb_latest_received_binlog_epoch < *p_latest_trans_gci && ndb_binlog_running) - { - sql_print_error("NDB Binlog: latest transaction in epoch %lu not in binlog " - "as latest received epoch is %lu", - (ulong) *p_latest_trans_gci, - (ulong) ndb_latest_received_binlog_epoch); - } - } - } - - pOp= i_ndb->nextEvent(); - } while (pOp && pOp->getGCI() == gci); - - /* - note! pOp is not referring to an event in the next epoch - or is == 0 - */ -#ifdef RUN_NDB_BINLOG_TIMER - write_timer.stop(); -#endif - - if (trans.good()) - { - //DBUG_ASSERT(row.n_inserts || row.n_updates || row.n_deletes); - thd->proc_info= "Committing events to binlog"; - injector::transaction::binlog_pos start= trans.start_pos(); - if (int r= trans.commit()) - { - sql_print_error("NDB Binlog: " - "Error during COMMIT of GCI. Error: %d", - r); - /* TODO: Further handling? */ - } - row.gci= gci; - row.master_log_file= start.file_name(); - row.master_log_pos= start.file_pos(); - - DBUG_PRINT("info", ("COMMIT gci: %lu", (ulong) gci)); - if (ndb_update_ndb_binlog_index) - ndb_add_ndb_binlog_index(thd, &row); - ndb_latest_applied_binlog_epoch= gci; - } - ndb_latest_handled_binlog_epoch= gci; -#ifdef RUN_NDB_BINLOG_TIMER - gci_timer.stop(); - sql_print_information("gci %ld event_count %d write time " - "%ld(%d e/s), total time %ld(%d e/s)", - (ulong)gci, event_count, - write_timer.elapsed_ms(), - (1000*event_count) / write_timer.elapsed_ms(), - gci_timer.elapsed_ms(), - (1000*event_count) / gci_timer.elapsed_ms()); -#endif - } - } - - ndb_binlog_thread_handle_schema_event_post_epoch(thd, - &post_epoch_log_list, - &post_epoch_unlock_list); - free_root(&mem_root, MYF(0)); - *root_ptr= old_root; - ndb_latest_handled_binlog_epoch= ndb_latest_received_binlog_epoch; - } - if (do_ndbcluster_binlog_close_connection == BCCC_restart) - { - ndb_binlog_tables_inited= FALSE; - trans_commit_stmt(thd); - close_thread_tables(thd); - thd->mdl_context.release_transactional_locks(); - ndb_binlog_index= 0; - goto restart; - } -err: - sql_print_information("Stopping Cluster Binlog"); - DBUG_PRINT("info",("Shutting down cluster binlog thread")); - thd->proc_info= "Shutting down"; - thd->get_stmt_da()->set_overwrite_status(true); - thd->is_error() ? trans_rollback_stmt(thd) : trans_commit_stmt(thd); - thd->get_stmt_da()->set_overwrite_status(false); - close_thread_tables(thd); - thd->mdl_context.release_transactional_locks(); - mysql_mutex_lock(&injector_mutex); - /* don't mess with the injector_ndb anymore from other threads */ - injector_thd= 0; - injector_ndb= 0; - p_latest_trans_gci= 0; - schema_ndb= 0; - mysql_mutex_unlock(&injector_mutex); - thd->db= 0; // as not to try to free memory - - if (ndb_apply_status_share) - { - /* ndb_share reference binlog extra free */ - DBUG_PRINT("NDB_SHARE", ("%s binlog extra free use_count: %u", - ndb_apply_status_share->key, - ndb_apply_status_share->use_count)); - free_share(&ndb_apply_status_share); - ndb_apply_status_share= 0; - } - if (ndb_schema_share) - { - /* begin protect ndb_schema_share */ - mysql_mutex_lock(&ndb_schema_share_mutex); - /* ndb_share reference binlog extra free */ - DBUG_PRINT("NDB_SHARE", ("%s binlog extra free use_count: %u", - ndb_schema_share->key, - ndb_schema_share->use_count)); - free_share(&ndb_schema_share); - ndb_schema_share= 0; - ndb_binlog_tables_inited= 0; - mysql_mutex_unlock(&ndb_schema_share_mutex); - /* end protect ndb_schema_share */ - } - - /* remove all event operations */ - if (s_ndb) - { - NdbEventOperation *op; - DBUG_PRINT("info",("removing all event operations")); - while ((op= s_ndb->getEventOperation())) - { - DBUG_ASSERT(! IS_NDB_BLOB_PREFIX(op->getEvent()->getTable()->getName())); - DBUG_PRINT("info",("removing event operation on %s", - op->getEvent()->getName())); - NDB_SHARE *share= (NDB_SHARE*) op->getCustomData(); - DBUG_ASSERT(share != 0); - DBUG_ASSERT(share->op == op || - share->op_old == op); - share->op= share->op_old= 0; - /* ndb_share reference binlog free */ - DBUG_PRINT("NDB_SHARE", ("%s binlog free use_count: %u", - share->key, share->use_count)); - free_share(&share); - s_ndb->dropEventOperation(op); - } - delete s_ndb; - s_ndb= 0; - } - if (i_ndb) - { - NdbEventOperation *op; - DBUG_PRINT("info",("removing all event operations")); - while ((op= i_ndb->getEventOperation())) - { - DBUG_ASSERT(! IS_NDB_BLOB_PREFIX(op->getEvent()->getTable()->getName())); - DBUG_PRINT("info",("removing event operation on %s", - op->getEvent()->getName())); - NDB_SHARE *share= (NDB_SHARE*) op->getCustomData(); - DBUG_ASSERT(share != 0); - DBUG_ASSERT(share->op == op || - share->op_old == op); - share->op= share->op_old= 0; - /* ndb_share reference binlog free */ - DBUG_PRINT("NDB_SHARE", ("%s binlog free use_count: %u", - share->key, share->use_count)); - free_share(&share); - i_ndb->dropEventOperation(op); - } - delete i_ndb; - i_ndb= 0; - } - - my_hash_free(&ndb_schema_objects); - - delete thd; - - ndb_binlog_thread_running= -1; - ndb_binlog_running= FALSE; - mysql_cond_signal(&injector_cond); - - DBUG_PRINT("exit", ("ndb_binlog_thread")); - - DBUG_LEAVE; // Must match DBUG_ENTER() - my_thread_end(); - pthread_exit(0); - return NULL; // Avoid compiler warnings -} - -bool -ndbcluster_show_status_binlog(THD* thd, stat_print_fn *stat_print, - enum ha_stat_type stat_type) -{ - char buf[IO_SIZE]; - uint buflen; - ulonglong ndb_latest_epoch= 0; - DBUG_ENTER("ndbcluster_show_status_binlog"); - - mysql_mutex_lock(&injector_mutex); - if (injector_ndb) - { - char buff1[22],buff2[22],buff3[22],buff4[22],buff5[22]; - ndb_latest_epoch= injector_ndb->getLatestGCI(); - mysql_mutex_unlock(&injector_mutex); - - buflen= - snprintf(buf, sizeof(buf), - "latest_epoch=%s, " - "latest_trans_epoch=%s, " - "latest_received_binlog_epoch=%s, " - "latest_handled_binlog_epoch=%s, " - "latest_applied_binlog_epoch=%s", - llstr(ndb_latest_epoch, buff1), - llstr(*p_latest_trans_gci, buff2), - llstr(ndb_latest_received_binlog_epoch, buff3), - llstr(ndb_latest_handled_binlog_epoch, buff4), - llstr(ndb_latest_applied_binlog_epoch, buff5)); - if (stat_print(thd, ndbcluster_hton_name, ndbcluster_hton_name_length, - "binlog", strlen("binlog"), - buf, buflen)) - DBUG_RETURN(TRUE); - } - else - mysql_mutex_unlock(&injector_mutex); - DBUG_RETURN(FALSE); -} - -#endif /* HAVE_NDB_BINLOG */ -#endif diff --git a/sql/ha_ndbcluster_binlog.h b/sql/ha_ndbcluster_binlog.h deleted file mode 100644 index a02f687d76f..00000000000 --- a/sql/ha_ndbcluster_binlog.h +++ /dev/null @@ -1,239 +0,0 @@ -#ifndef HA_NDBCLUSTER_BINLOG_INCLUDED -#define HA_NDBCLUSTER_BINLOG_INCLUDED - -/* Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; version 2 of the License. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -*/ - -#include "sql_class.h" /* THD */ - -// Typedefs for long names -typedef NdbDictionary::Object NDBOBJ; -typedef NdbDictionary::Column NDBCOL; -typedef NdbDictionary::Table NDBTAB; -typedef NdbDictionary::Index NDBINDEX; -typedef NdbDictionary::Dictionary NDBDICT; -typedef NdbDictionary::Event NDBEVENT; - -#define IS_TMP_PREFIX(A) (is_prefix(A, tmp_file_prefix)) - -#define INJECTOR_EVENT_LEN 200 - -#define NDB_INVALID_SCHEMA_OBJECT 241 - -/* server id's with high bit set is reservered */ -#define NDB_ANYVALUE_FOR_NOLOGGING 0xFFFFFFFF -#define NDB_ANYVALUE_RESERVED 0x80000000 - -extern handlerton *ndbcluster_hton; - -/* - The numbers below must not change as they - are passed between mysql servers, and if changed - would break compatablility. Add new numbers to - the end. -*/ -enum SCHEMA_OP_TYPE -{ - SOT_DROP_TABLE= 0, - SOT_CREATE_TABLE= 1, - SOT_RENAME_TABLE_NEW= 2, - SOT_ALTER_TABLE= 3, - SOT_DROP_DB= 4, - SOT_CREATE_DB= 5, - SOT_ALTER_DB= 6, - SOT_CLEAR_SLOCK= 7, - SOT_TABLESPACE= 8, - SOT_LOGFILE_GROUP= 9, - SOT_RENAME_TABLE= 10, - SOT_TRUNCATE_TABLE= 11 -}; - -const uint max_ndb_nodes= 64; /* multiple of 32 */ - -static const char *ha_ndb_ext=".ndb"; -static const char share_prefix[]= "./"; - -class Ndb_table_guard -{ -public: - Ndb_table_guard(NDBDICT *dict, const char *tabname) - : m_dict(dict) - { - DBUG_ENTER("Ndb_table_guard"); - m_ndbtab= m_dict->getTableGlobal(tabname); - m_invalidate= 0; - DBUG_PRINT("info", ("m_ndbtab: %p", m_ndbtab)); - DBUG_VOID_RETURN; - } - ~Ndb_table_guard() - { - DBUG_ENTER("~Ndb_table_guard"); - if (m_ndbtab) - { - DBUG_PRINT("info", ("m_ndbtab: %p m_invalidate: %d", - m_ndbtab, m_invalidate)); - m_dict->removeTableGlobal(*m_ndbtab, m_invalidate); - } - DBUG_VOID_RETURN; - } - const NDBTAB *get_table() { return m_ndbtab; } - void invalidate() { m_invalidate= 1; } - const NDBTAB *release() - { - DBUG_ENTER("Ndb_table_guard::release"); - const NDBTAB *tmp= m_ndbtab; - DBUG_PRINT("info", ("m_ndbtab: %p", m_ndbtab)); - m_ndbtab = 0; - DBUG_RETURN(tmp); - } -private: - const NDBTAB *m_ndbtab; - NDBDICT *m_dict; - int m_invalidate; -}; - -#ifdef HAVE_NDB_BINLOG - -#ifdef HAVE_PSI_INTERFACE -extern PSI_mutex_key key_injector_mutex, key_ndb_schema_share_mutex, - key_ndb_schema_object_mutex; -extern PSI_cond_key key_injector_cond; -extern PSI_thread_key key_thread_ndb_binlog; -#endif /* HAVE_PSI_INTERFACE */ - -extern pthread_t ndb_binlog_thread; -extern mysql_mutex_t injector_mutex; -extern mysql_cond_t injector_cond; - -extern unsigned char g_node_id_map[max_ndb_nodes]; -extern pthread_t ndb_util_thread; -extern mysql_mutex_t LOCK_ndb_util_thread; -extern mysql_cond_t COND_ndb_util_thread; -extern int ndbcluster_util_inited; -extern mysql_mutex_t ndbcluster_mutex; -extern HASH ndbcluster_open_tables; -extern Ndb_cluster_connection* g_ndb_cluster_connection; -extern long ndb_number_of_storage_nodes; - -/* - Initialize the binlog part of the ndb handlerton -*/ -void ndbcluster_binlog_init_handlerton(); -/* - Initialize the binlog part of the NDB_SHARE -*/ -int ndbcluster_binlog_init_share(NDB_SHARE *share, TABLE *table); - -bool ndbcluster_check_if_local_table(const char *dbname, const char *tabname); -bool ndbcluster_check_if_local_tables_in_db(THD *thd, const char *dbname); - -int ndbcluster_create_binlog_setup(Ndb *ndb, const char *key, - uint key_len, - const char *db, - const char *table_name, - my_bool share_may_exist); -int ndbcluster_create_event(Ndb *ndb, const NDBTAB *table, - const char *event_name, NDB_SHARE *share, - int push_warning= 0); -int ndbcluster_create_event_ops(NDB_SHARE *share, - const NDBTAB *ndbtab, - const char *event_name); -int ndbcluster_log_schema_op(THD *thd, NDB_SHARE *share, - const char *query, int query_length, - const char *db, const char *table_name, - uint32 ndb_table_id, - uint32 ndb_table_version, - enum SCHEMA_OP_TYPE type, - const char *new_db, - const char *new_table_name); -int ndbcluster_handle_drop_table(Ndb *ndb, const char *event_name, - NDB_SHARE *share, - const char *type_str); -void ndb_rep_event_name(String *event_name, - const char *db, const char *tbl); -int ndb_create_table_from_engine(THD *thd, const char *db, - const char *table_name); -int ndbcluster_binlog_start(); -pthread_handler_t ndb_binlog_thread_func(void *arg); - -/* - table mysql.ndb_apply_status -*/ -int ndbcluster_setup_binlog_table_shares(THD *thd); -extern NDB_SHARE *ndb_apply_status_share; -extern NDB_SHARE *ndb_schema_share; - -extern THD *injector_thd; -extern my_bool ndb_binlog_running; -extern my_bool ndb_binlog_tables_inited; - -bool -ndbcluster_show_status_binlog(THD* thd, stat_print_fn *stat_print, - enum ha_stat_type stat_type); - -/* - prototypes for ndb handler utility function also needed by - the ndb binlog code -*/ -int cmp_frm(const NDBTAB *ndbtab, const void *pack_data, - uint pack_length); -int ndbcluster_find_all_files(THD *thd); -#endif /* HAVE_NDB_BINLOG */ - -void ndb_unpack_record(TABLE *table, NdbValue *value, - MY_BITMAP *defined, uchar *buf); -char *ndb_pack_varchar(const NDBCOL *col, char *buf, - const char *str, int sz); - -NDB_SHARE *ndbcluster_get_share(const char *key, - TABLE *table, - bool create_if_not_exists, - bool have_lock); -NDB_SHARE *ndbcluster_get_share(NDB_SHARE *share); -void ndbcluster_free_share(NDB_SHARE **share, bool have_lock); -void ndbcluster_real_free_share(NDB_SHARE **share); -int handle_trailing_share(NDB_SHARE *share); -inline NDB_SHARE *get_share(const char *key, - TABLE *table, - bool create_if_not_exists= TRUE, - bool have_lock= FALSE) -{ - return ndbcluster_get_share(key, table, create_if_not_exists, have_lock); -} - -inline NDB_SHARE *get_share(NDB_SHARE *share) -{ - return ndbcluster_get_share(share); -} - -inline void free_share(NDB_SHARE **share, bool have_lock= FALSE) -{ - ndbcluster_free_share(share, have_lock); -} - -inline -Thd_ndb * -get_thd_ndb(THD *thd) -{ return (Thd_ndb *) thd_get_ha_data(thd, ndbcluster_hton); } - -inline -void -set_thd_ndb(THD *thd, Thd_ndb *thd_ndb) -{ thd_set_ha_data(thd, ndbcluster_hton, thd_ndb); } - -Ndb* check_ndb_in_thd(THD* thd); - -#endif /* HA_NDBCLUSTER_BINLOG_INCLUDED */ diff --git a/sql/ha_ndbcluster_cond.cc b/sql/ha_ndbcluster_cond.cc deleted file mode 100644 index fd80304d400..00000000000 --- a/sql/ha_ndbcluster_cond.cc +++ /dev/null @@ -1,1475 +0,0 @@ -/* Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; version 2 of the License. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA -*/ - -/* - This file defines the NDB Cluster handler engine_condition_pushdown -*/ - -#ifdef USE_PRAGMA_IMPLEMENTATION -#pragma implementation // gcc: Class implementation -#endif - -#include "sql_priv.h" -#include "sql_class.h" // set_var.h: THD -#include "my_global.h" // WITH_* -#include "log.h" // sql_print_error - -#ifdef WITH_NDBCLUSTER_STORAGE_ENGINE -#include <ndbapi/NdbApi.hpp> -#include "ha_ndbcluster_cond.h" - -// Typedefs for long names -typedef NdbDictionary::Column NDBCOL; -typedef NdbDictionary::Table NDBTAB; - - -/** - Serialize a constant item into a Ndb_cond node. - - @param const_type item's result type - @param item item to be serialized - @param curr_cond Ndb_cond node the item to be serialized into - @param context Traverse context -*/ - -static void ndb_serialize_const(Item_result const_type, const Item *item, - Ndb_cond *curr_cond, - Ndb_cond_traverse_context *context) -{ - DBUG_ASSERT(item->const_item()); - switch (const_type) { - case STRING_RESULT: - { - NDB_ITEM_QUALIFICATION q; - q.value_type= Item::STRING_ITEM; - curr_cond->ndb_item= new Ndb_item(NDB_VALUE, q, item); - if (! context->expecting_no_field_result()) - { - // We have not seen the field argument yet - context->expect_only(Item::FIELD_ITEM); - context->expect_only_field_result(STRING_RESULT); - context->expect_collation(item->collation.collation); - } - else - { - // Expect another logical expression - context->expect_only(Item::FUNC_ITEM); - context->expect(Item::COND_ITEM); - // Check that string result have correct collation - if (!context->expecting_collation(item->collation.collation)) - { - DBUG_PRINT("info", ("Found non-matching collation %s", - item->collation.collation->name)); - context->supported= FALSE; - } - } - break; - } - case REAL_RESULT: - { - NDB_ITEM_QUALIFICATION q; - q.value_type= Item::REAL_ITEM; - curr_cond->ndb_item= new Ndb_item(NDB_VALUE, q, item); - if (! context->expecting_no_field_result()) - { - // We have not seen the field argument yet - context->expect_only(Item::FIELD_ITEM); - context->expect_only_field_result(REAL_RESULT); - } - else - { - // Expect another logical expression - context->expect_only(Item::FUNC_ITEM); - context->expect(Item::COND_ITEM); - } - break; - } - case INT_RESULT: - { - NDB_ITEM_QUALIFICATION q; - q.value_type= Item::INT_ITEM; - curr_cond->ndb_item= new Ndb_item(NDB_VALUE, q, item); - if (! context->expecting_no_field_result()) - { - // We have not seen the field argument yet - context->expect_only(Item::FIELD_ITEM); - context->expect_only_field_result(INT_RESULT); - } - else - { - // Expect another logical expression - context->expect_only(Item::FUNC_ITEM); - context->expect(Item::COND_ITEM); - } - break; - } - case DECIMAL_RESULT: - { - NDB_ITEM_QUALIFICATION q; - q.value_type= Item::DECIMAL_ITEM; - curr_cond->ndb_item= new Ndb_item(NDB_VALUE, q, item); - if (! context->expecting_no_field_result()) - { - // We have not seen the field argument yet - context->expect_only(Item::FIELD_ITEM); - context->expect_only_field_result(DECIMAL_RESULT); - } - else - { - // Expect another logical expression - context->expect_only(Item::FUNC_ITEM); - context->expect(Item::COND_ITEM); - } - break; - } - default: - break; - } -} -/* - Serialize the item tree into a linked list represented by Ndb_cond - for fast generation of NbdScanFilter. Adds information such as - position of fields that is not directly available in the Item tree. - Also checks if condition is supported. -*/ -void ndb_serialize_cond(const Item *item, void *arg) -{ - Ndb_cond_traverse_context *context= (Ndb_cond_traverse_context *) arg; - DBUG_ENTER("ndb_serialize_cond"); - - // Check if we are skipping arguments to a function to be evaluated - if (context->skip) - { - if (!item) - { - DBUG_PRINT("info", ("Unexpected mismatch of found and expected number of function arguments %u", context->skip)); - sql_print_error("ndb_serialize_cond: Unexpected mismatch of found and " - "expected number of function arguments %u", context->skip); - context->skip= 0; - DBUG_VOID_RETURN; - } - DBUG_PRINT("info", ("Skiping argument %d", context->skip)); - context->skip--; - switch (item->type()) { - case Item::FUNC_ITEM: - { - Item_func *func_item= (Item_func *) item; - context->skip+= func_item->argument_count(); - break; - } - case Item::INT_ITEM: - case Item::REAL_ITEM: - case Item::STRING_ITEM: - case Item::VARBIN_ITEM: - case Item::DECIMAL_ITEM: - break; - default: - context->supported= FALSE; - break; - } - - DBUG_VOID_RETURN; - } - - if (context->supported) - { - Ndb_rewrite_context *rewrite_context2= context->rewrite_stack; - const Item_func *rewrite_func_item; - // Check if we are rewriting some unsupported function call - if (rewrite_context2 && - (rewrite_func_item= rewrite_context2->func_item) && - rewrite_context2->count++ == 0) - { - switch (rewrite_func_item->functype()) { - case Item_func::BETWEEN: - /* - Rewrite - <field>|<const> BETWEEN <const1>|<field1> AND <const2>|<field2> - to <field>|<const> > <const1>|<field1> AND - <field>|<const> < <const2>|<field2> - or actually in prefix format - BEGIN(AND) GT(<field>|<const>, <const1>|<field1>), - LT(<field>|<const>, <const2>|<field2>), END() - */ - case Item_func::IN_FUNC: - { - /* - Rewrite <field>|<const> IN(<const1>|<field1>, <const2>|<field2>,..) - to <field>|<const> = <const1>|<field1> OR - <field> = <const2>|<field2> ... - or actually in prefix format - BEGIN(OR) EQ(<field>|<const>, <const1><field1>), - EQ(<field>|<const>, <const2>|<field2>), ... END() - Each part of the disjunction is added for each call - to ndb_serialize_cond and end of rewrite statement - is wrapped in end of ndb_serialize_cond - */ - if (context->expecting(item->type()) || item->const_item()) - { - // This is the <field>|<const> item, save it in the rewrite context - rewrite_context2->left_hand_item= item; - if (item->type() == Item::FUNC_ITEM) - { - Item_func *func_item= (Item_func *) item; - if ((func_item->functype() == Item_func::UNKNOWN_FUNC || - func_item->functype() == Item_func::NEG_FUNC) && - func_item->const_item()) - { - // Skip any arguments since we will evaluate function instead - DBUG_PRINT("info", ("Skip until end of arguments marker")); - context->skip= func_item->argument_count(); - } - else - { - DBUG_PRINT("info", ("Found unsupported functional expression in BETWEEN|IN")); - context->supported= FALSE; - DBUG_VOID_RETURN; - - } - } - } - else - { - // Non-supported BETWEEN|IN expression - DBUG_PRINT("info", ("Found unexpected item of type %u in BETWEEN|IN", - item->type())); - context->supported= FALSE; - DBUG_VOID_RETURN; - } - break; - } - default: - context->supported= FALSE; - break; - } - DBUG_VOID_RETURN; - } - else - { - Ndb_cond_stack *ndb_stack= context->stack_ptr; - Ndb_cond *prev_cond= context->cond_ptr; - Ndb_cond *curr_cond= context->cond_ptr= new Ndb_cond(); - if (!ndb_stack->ndb_cond) - ndb_stack->ndb_cond= curr_cond; - curr_cond->prev= prev_cond; - if (prev_cond) prev_cond->next= curr_cond; - // Check if we are rewriting some unsupported function call - if (context->rewrite_stack) - { - Ndb_rewrite_context *rewrite_context= context->rewrite_stack; - const Item_func *func_item= rewrite_context->func_item; - switch (func_item->functype()) { - case Item_func::BETWEEN: - { - /* - Rewrite - <field>|<const> BETWEEN <const1>|<field1> AND <const2>|<field2> - to <field>|<const> > <const1>|<field1> AND - <field>|<const> < <const2>|<field2> - or actually in prefix format - BEGIN(AND) GT(<field>|<const>, <const1>|<field1>), - LT(<field>|<const>, <const2>|<field2>), END() - */ - if (rewrite_context->count == 2) - { - // Lower limit of BETWEEN - DBUG_PRINT("info", ("GE_FUNC")); - curr_cond->ndb_item= new Ndb_item(Item_func::GE_FUNC, 2); - } - else if (rewrite_context->count == 3) - { - // Upper limit of BETWEEN - DBUG_PRINT("info", ("LE_FUNC")); - curr_cond->ndb_item= new Ndb_item(Item_func::LE_FUNC, 2); - } - else - { - // Illegal BETWEEN expression - DBUG_PRINT("info", ("Illegal BETWEEN expression")); - context->supported= FALSE; - DBUG_VOID_RETURN; - } - break; - } - case Item_func::IN_FUNC: - { - /* - Rewrite <field>|<const> IN(<const1>|<field1>, <const2>|<field2>,..) - to <field>|<const> = <const1>|<field1> OR - <field> = <const2>|<field2> ... - or actually in prefix format - BEGIN(OR) EQ(<field>|<const>, <const1><field1>), - EQ(<field>|<const>, <const2>|<field2>), ... END() - Each part of the disjunction is added for each call - to ndb_serialize_cond and end of rewrite statement - is wrapped in end of ndb_serialize_cond - */ - DBUG_PRINT("info", ("EQ_FUNC")); - curr_cond->ndb_item= new Ndb_item(Item_func::EQ_FUNC, 2); - break; - } - default: - context->supported= FALSE; - } - // Handle left hand <field>|<const> - context->rewrite_stack= NULL; // Disable rewrite mode - context->expect_only(Item::FIELD_ITEM); - context->expect_field_result(STRING_RESULT); - context->expect_field_result(REAL_RESULT); - context->expect_field_result(INT_RESULT); - context->expect_field_result(DECIMAL_RESULT); - context->expect(Item::INT_ITEM); - context->expect(Item::STRING_ITEM); - context->expect(Item::VARBIN_ITEM); - context->expect(Item::FUNC_ITEM); - ndb_serialize_cond(rewrite_context->left_hand_item, arg); - context->skip= 0; // Any FUNC_ITEM expression has already been parsed - context->rewrite_stack= rewrite_context; // Enable rewrite mode - if (!context->supported) - DBUG_VOID_RETURN; - - prev_cond= context->cond_ptr; - curr_cond= context->cond_ptr= new Ndb_cond(); - prev_cond->next= curr_cond; - } - - // Check for end of AND/OR expression - if (!item) - { - // End marker for condition group - DBUG_PRINT("info", ("End of condition group")); - curr_cond->ndb_item= new Ndb_item(NDB_END_COND); - } - else - { - switch (item->type()) { - case Item::FIELD_ITEM: - { - Item_field *field_item= (Item_field *) item; - Field *field= field_item->field; - enum_field_types type= field->type(); - /* - Check that the field is part of the table of the handler - instance and that we expect a field with of this result type. - */ - if (context->table->s == field->table->s) - { - const NDBTAB *tab= context->ndb_table; - DBUG_PRINT("info", ("FIELD_ITEM")); - DBUG_PRINT("info", ("table %s", tab->getName())); - DBUG_PRINT("info", ("column %s", field->field_name)); - DBUG_PRINT("info", ("type %d", field->type())); - DBUG_PRINT("info", ("result type %d", field->result_type())); - - // Check that we are expecting a field and with the correct - // result type - if (context->expecting(Item::FIELD_ITEM) && - context->expecting_field_type(field->type()) && - (context->expecting_field_result(field->result_type()) || - // Date and year can be written as string or int - ((type == MYSQL_TYPE_TIME || - type == MYSQL_TYPE_DATE || - type == MYSQL_TYPE_YEAR || - type == MYSQL_TYPE_DATETIME) - ? (context->expecting_field_result(STRING_RESULT) || - context->expecting_field_result(INT_RESULT)) - : TRUE)) && - // Bit fields no yet supported in scan filter - type != MYSQL_TYPE_BIT && - // No BLOB support in scan filter - type != MYSQL_TYPE_TINY_BLOB && - type != MYSQL_TYPE_MEDIUM_BLOB && - type != MYSQL_TYPE_LONG_BLOB && - type != MYSQL_TYPE_BLOB) - { - const NDBCOL *col= tab->getColumn(field->field_name); - DBUG_ASSERT(col); - curr_cond->ndb_item= new Ndb_item(field, col->getColumnNo()); - context->dont_expect(Item::FIELD_ITEM); - context->expect_no_field_result(); - if (! context->expecting_nothing()) - { - // We have not seen second argument yet - if (type == MYSQL_TYPE_TIME || - type == MYSQL_TYPE_DATE || - type == MYSQL_TYPE_YEAR || - type == MYSQL_TYPE_DATETIME) - { - context->expect_only(Item::STRING_ITEM); - context->expect(Item::INT_ITEM); - } - else - switch (field->result_type()) { - case STRING_RESULT: - // Expect char string or binary string - context->expect_only(Item::STRING_ITEM); - context->expect(Item::VARBIN_ITEM); - context->expect_collation(field_item->collation.collation); - break; - case REAL_RESULT: - context->expect_only(Item::REAL_ITEM); - context->expect(Item::DECIMAL_ITEM); - context->expect(Item::INT_ITEM); - break; - case INT_RESULT: - context->expect_only(Item::INT_ITEM); - context->expect(Item::VARBIN_ITEM); - break; - case DECIMAL_RESULT: - context->expect_only(Item::DECIMAL_ITEM); - context->expect(Item::REAL_ITEM); - context->expect(Item::INT_ITEM); - break; - default: - break; - } - } - else - { - // Expect another logical expression - context->expect_only(Item::FUNC_ITEM); - context->expect(Item::COND_ITEM); - // Check that field and string constant collations are the same - if ((field->result_type() == STRING_RESULT) && - !context->expecting_collation(item->collation.collation) - && type != MYSQL_TYPE_TIME - && type != MYSQL_TYPE_DATE - && type != MYSQL_TYPE_YEAR - && type != MYSQL_TYPE_DATETIME) - { - DBUG_PRINT("info", ("Found non-matching collation %s", - item->collation.collation->name)); - context->supported= FALSE; - } - } - break; - } - else - { - DBUG_PRINT("info", ("Was not expecting field of type %u(%u)", - field->result_type(), type)); - context->supported= FALSE; - } - } - else - { - DBUG_PRINT("info", ("Was not expecting field from table %s (%s)", - context->table->s->table_name.str, - field->table->s->table_name.str)); - context->supported= FALSE; - } - break; - } - case Item::FUNC_ITEM: - { - Item_func *func_item= (Item_func *) item; - // Check that we expect a function or functional expression here - if (context->expecting(Item::FUNC_ITEM) || - func_item->functype() == Item_func::UNKNOWN_FUNC || - func_item->functype() == Item_func::NEG_FUNC) - context->expect_nothing(); - else - { - // Did not expect function here - context->supported= FALSE; - break; - } - - switch (func_item->functype()) { - case Item_func::EQ_FUNC: - { - DBUG_PRINT("info", ("EQ_FUNC")); - curr_cond->ndb_item= new Ndb_item(func_item->functype(), - func_item); - context->expect(Item::STRING_ITEM); - context->expect(Item::INT_ITEM); - context->expect(Item::REAL_ITEM); - context->expect(Item::DECIMAL_ITEM); - context->expect(Item::VARBIN_ITEM); - context->expect(Item::FIELD_ITEM); - context->expect_field_result(STRING_RESULT); - context->expect_field_result(REAL_RESULT); - context->expect_field_result(INT_RESULT); - context->expect_field_result(DECIMAL_RESULT); - break; - } - case Item_func::NE_FUNC: - { - DBUG_PRINT("info", ("NE_FUNC")); - curr_cond->ndb_item= new Ndb_item(func_item->functype(), - func_item); - context->expect(Item::STRING_ITEM); - context->expect(Item::INT_ITEM); - context->expect(Item::REAL_ITEM); - context->expect(Item::DECIMAL_ITEM); - context->expect(Item::VARBIN_ITEM); - context->expect(Item::FIELD_ITEM); - context->expect_field_result(STRING_RESULT); - context->expect_field_result(REAL_RESULT); - context->expect_field_result(INT_RESULT); - context->expect_field_result(DECIMAL_RESULT); - break; - } - case Item_func::LT_FUNC: - { - DBUG_PRINT("info", ("LT_FUNC")); - curr_cond->ndb_item= new Ndb_item(func_item->functype(), - func_item); - context->expect(Item::STRING_ITEM); - context->expect(Item::INT_ITEM); - context->expect(Item::REAL_ITEM); - context->expect(Item::DECIMAL_ITEM); - context->expect(Item::VARBIN_ITEM); - context->expect(Item::FIELD_ITEM); - context->expect_field_result(STRING_RESULT); - context->expect_field_result(REAL_RESULT); - context->expect_field_result(INT_RESULT); - context->expect_field_result(DECIMAL_RESULT); - break; - } - case Item_func::LE_FUNC: - { - DBUG_PRINT("info", ("LE_FUNC")); - curr_cond->ndb_item= new Ndb_item(func_item->functype(), - func_item); - context->expect(Item::STRING_ITEM); - context->expect(Item::INT_ITEM); - context->expect(Item::REAL_ITEM); - context->expect(Item::DECIMAL_ITEM); - context->expect(Item::VARBIN_ITEM); - context->expect(Item::FIELD_ITEM); - context->expect_field_result(STRING_RESULT); - context->expect_field_result(REAL_RESULT); - context->expect_field_result(INT_RESULT); - context->expect_field_result(DECIMAL_RESULT); - break; - } - case Item_func::GE_FUNC: - { - DBUG_PRINT("info", ("GE_FUNC")); - curr_cond->ndb_item= new Ndb_item(func_item->functype(), - func_item); - context->expect(Item::STRING_ITEM); - context->expect(Item::INT_ITEM); - context->expect(Item::REAL_ITEM); - context->expect(Item::DECIMAL_ITEM); - context->expect(Item::VARBIN_ITEM); - context->expect(Item::FIELD_ITEM); - context->expect_field_result(STRING_RESULT); - context->expect_field_result(REAL_RESULT); - context->expect_field_result(INT_RESULT); - context->expect_field_result(DECIMAL_RESULT); - break; - } - case Item_func::GT_FUNC: - { - DBUG_PRINT("info", ("GT_FUNC")); - curr_cond->ndb_item= new Ndb_item(func_item->functype(), - func_item); - context->expect(Item::STRING_ITEM); - context->expect(Item::REAL_ITEM); - context->expect(Item::DECIMAL_ITEM); - context->expect(Item::INT_ITEM); - context->expect(Item::VARBIN_ITEM); - context->expect(Item::FIELD_ITEM); - context->expect_field_result(STRING_RESULT); - context->expect_field_result(REAL_RESULT); - context->expect_field_result(INT_RESULT); - context->expect_field_result(DECIMAL_RESULT); - break; - } - case Item_func::LIKE_FUNC: - { - DBUG_PRINT("info", ("LIKE_FUNC")); - curr_cond->ndb_item= new Ndb_item(func_item->functype(), - func_item); - context->expect(Item::STRING_ITEM); - context->expect(Item::FIELD_ITEM); - context->expect_only_field_type(MYSQL_TYPE_STRING); - context->expect_field_type(MYSQL_TYPE_VAR_STRING); - context->expect_field_type(MYSQL_TYPE_VARCHAR); - context->expect_field_result(STRING_RESULT); - context->expect(Item::FUNC_ITEM); - break; - } - case Item_func::ISNULL_FUNC: - { - DBUG_PRINT("info", ("ISNULL_FUNC")); - curr_cond->ndb_item= new Ndb_item(func_item->functype(), - func_item); - context->expect(Item::FIELD_ITEM); - context->expect_field_result(STRING_RESULT); - context->expect_field_result(REAL_RESULT); - context->expect_field_result(INT_RESULT); - context->expect_field_result(DECIMAL_RESULT); - break; - } - case Item_func::ISNOTNULL_FUNC: - { - DBUG_PRINT("info", ("ISNOTNULL_FUNC")); - curr_cond->ndb_item= new Ndb_item(func_item->functype(), - func_item); - context->expect(Item::FIELD_ITEM); - context->expect_field_result(STRING_RESULT); - context->expect_field_result(REAL_RESULT); - context->expect_field_result(INT_RESULT); - context->expect_field_result(DECIMAL_RESULT); - break; - } - case Item_func::NOT_FUNC: - { - DBUG_PRINT("info", ("NOT_FUNC")); - curr_cond->ndb_item= new Ndb_item(func_item->functype(), - func_item); - context->expect(Item::FUNC_ITEM); - context->expect(Item::COND_ITEM); - break; - } - case Item_func::BETWEEN: - { - DBUG_PRINT("info", ("BETWEEN, rewriting using AND")); - Item_func_between *between_func= (Item_func_between *) func_item; - Ndb_rewrite_context *rewrite_context= - new Ndb_rewrite_context(func_item); - rewrite_context->next= context->rewrite_stack; - context->rewrite_stack= rewrite_context; - if (between_func->negated) - { - DBUG_PRINT("info", ("NOT_FUNC")); - curr_cond->ndb_item= new Ndb_item(Item_func::NOT_FUNC, 1); - prev_cond= curr_cond; - curr_cond= context->cond_ptr= new Ndb_cond(); - curr_cond->prev= prev_cond; - prev_cond->next= curr_cond; - } - DBUG_PRINT("info", ("COND_AND_FUNC")); - curr_cond->ndb_item= - new Ndb_item(Item_func::COND_AND_FUNC, - func_item->argument_count() - 1); - context->expect_only(Item::FIELD_ITEM); - context->expect(Item::INT_ITEM); - context->expect(Item::STRING_ITEM); - context->expect(Item::VARBIN_ITEM); - context->expect(Item::FUNC_ITEM); - break; - } - case Item_func::IN_FUNC: - { - DBUG_PRINT("info", ("IN_FUNC, rewriting using OR")); - Item_func_in *in_func= (Item_func_in *) func_item; - Ndb_rewrite_context *rewrite_context= - new Ndb_rewrite_context(func_item); - rewrite_context->next= context->rewrite_stack; - context->rewrite_stack= rewrite_context; - if (in_func->negated) - { - DBUG_PRINT("info", ("NOT_FUNC")); - curr_cond->ndb_item= new Ndb_item(Item_func::NOT_FUNC, 1); - prev_cond= curr_cond; - curr_cond= context->cond_ptr= new Ndb_cond(); - curr_cond->prev= prev_cond; - prev_cond->next= curr_cond; - } - DBUG_PRINT("info", ("COND_OR_FUNC")); - curr_cond->ndb_item= new Ndb_item(Item_func::COND_OR_FUNC, - func_item->argument_count() - 1); - context->expect_only(Item::FIELD_ITEM); - context->expect(Item::INT_ITEM); - context->expect(Item::STRING_ITEM); - context->expect(Item::VARBIN_ITEM); - context->expect(Item::FUNC_ITEM); - break; - } - case Item_func::NEG_FUNC: - case Item_func::UNKNOWN_FUNC: - { - DBUG_PRINT("info", ("UNKNOWN_FUNC %s", - func_item->const_item()?"const":"")); - DBUG_PRINT("info", ("result type %d", func_item->result_type())); - if (func_item->const_item()) - { - ndb_serialize_const(func_item->result_type(), item, curr_cond, - context); - - // Skip any arguments since we will evaluate function instead - DBUG_PRINT("info", ("Skip until end of arguments marker")); - context->skip= func_item->argument_count(); - } - else - // Function does not return constant expression - context->supported= FALSE; - break; - } - default: - { - DBUG_PRINT("info", ("Found func_item of type %d", - func_item->functype())); - context->supported= FALSE; - } - } - break; - } - case Item::STRING_ITEM: - DBUG_PRINT("info", ("STRING_ITEM")); - if (context->expecting(Item::STRING_ITEM)) - { -#ifndef DBUG_OFF - char buff[256]; - String str(buff,(uint32) sizeof(buff), system_charset_info); - str.length(0); - Item_string *string_item= (Item_string *) item; - DBUG_PRINT("info", ("value \"%s\"", - string_item->val_str(&str)->ptr())); -#endif - NDB_ITEM_QUALIFICATION q; - q.value_type= Item::STRING_ITEM; - curr_cond->ndb_item= new Ndb_item(NDB_VALUE, q, item); - if (! context->expecting_no_field_result()) - { - // We have not seen the field argument yet - context->expect_only(Item::FIELD_ITEM); - context->expect_only_field_result(STRING_RESULT); - context->expect_collation(item->collation.collation); - } - else - { - // Expect another logical expression - context->expect_only(Item::FUNC_ITEM); - context->expect(Item::COND_ITEM); - // Check that we are comparing with a field with same collation - if (!context->expecting_collation(item->collation.collation)) - { - DBUG_PRINT("info", ("Found non-matching collation %s", - item->collation.collation->name)); - context->supported= FALSE; - } - } - } - else - context->supported= FALSE; - break; - case Item::INT_ITEM: - DBUG_PRINT("info", ("INT_ITEM")); - if (context->expecting(Item::INT_ITEM)) - { - DBUG_PRINT("info", ("value %ld", - (long) ((Item_int*) item)->value)); - NDB_ITEM_QUALIFICATION q; - q.value_type= Item::INT_ITEM; - curr_cond->ndb_item= new Ndb_item(NDB_VALUE, q, item); - if (! context->expecting_no_field_result()) - { - // We have not seen the field argument yet - context->expect_only(Item::FIELD_ITEM); - context->expect_only_field_result(INT_RESULT); - context->expect_field_result(REAL_RESULT); - context->expect_field_result(DECIMAL_RESULT); - } - else - { - // Expect another logical expression - context->expect_only(Item::FUNC_ITEM); - context->expect(Item::COND_ITEM); - } - } - else - context->supported= FALSE; - break; - case Item::REAL_ITEM: - DBUG_PRINT("info", ("REAL_ITEM")); - if (context->expecting(Item::REAL_ITEM)) - { - DBUG_PRINT("info", ("value %f", ((Item_float*) item)->value)); - NDB_ITEM_QUALIFICATION q; - q.value_type= Item::REAL_ITEM; - curr_cond->ndb_item= new Ndb_item(NDB_VALUE, q, item); - if (! context->expecting_no_field_result()) - { - // We have not seen the field argument yet - context->expect_only(Item::FIELD_ITEM); - context->expect_only_field_result(REAL_RESULT); - } - else - { - // Expect another logical expression - context->expect_only(Item::FUNC_ITEM); - context->expect(Item::COND_ITEM); - } - } - else - context->supported= FALSE; - break; - case Item::VARBIN_ITEM: - DBUG_PRINT("info", ("VARBIN_ITEM")); - if (context->expecting(Item::VARBIN_ITEM)) - { - NDB_ITEM_QUALIFICATION q; - q.value_type= Item::VARBIN_ITEM; - curr_cond->ndb_item= new Ndb_item(NDB_VALUE, q, item); - if (! context->expecting_no_field_result()) - { - // We have not seen the field argument yet - context->expect_only(Item::FIELD_ITEM); - context->expect_only_field_result(STRING_RESULT); - } - else - { - // Expect another logical expression - context->expect_only(Item::FUNC_ITEM); - context->expect(Item::COND_ITEM); - } - } - else - context->supported= FALSE; - break; - case Item::DECIMAL_ITEM: - DBUG_PRINT("info", ("DECIMAL_ITEM")); - if (context->expecting(Item::DECIMAL_ITEM)) - { - DBUG_PRINT("info", ("value %f", - ((Item_decimal*) item)->val_real())); - NDB_ITEM_QUALIFICATION q; - q.value_type= Item::DECIMAL_ITEM; - curr_cond->ndb_item= new Ndb_item(NDB_VALUE, q, item); - if (! context->expecting_no_field_result()) - { - // We have not seen the field argument yet - context->expect_only(Item::FIELD_ITEM); - context->expect_only_field_result(REAL_RESULT); - context->expect_field_result(DECIMAL_RESULT); - } - else - { - // Expect another logical expression - context->expect_only(Item::FUNC_ITEM); - context->expect(Item::COND_ITEM); - } - } - else - context->supported= FALSE; - break; - case Item::COND_ITEM: - { - Item_cond *cond_item= (Item_cond *) item; - - if (context->expecting(Item::COND_ITEM)) - { - switch (cond_item->functype()) { - case Item_func::COND_AND_FUNC: - DBUG_PRINT("info", ("COND_AND_FUNC")); - curr_cond->ndb_item= new Ndb_item(cond_item->functype(), - cond_item); - break; - case Item_func::COND_OR_FUNC: - DBUG_PRINT("info", ("COND_OR_FUNC")); - curr_cond->ndb_item= new Ndb_item(cond_item->functype(), - cond_item); - break; - default: - DBUG_PRINT("info", ("COND_ITEM %d", cond_item->functype())); - context->supported= FALSE; - break; - } - } - else - { - /* Did not expect condition */ - context->supported= FALSE; - } - break; - } - case Item::CACHE_ITEM: - { - DBUG_PRINT("info", ("CACHE_ITEM")); - if (item->const_item()) - { - ndb_serialize_const(((Item_cache*)item)->result_type(), item, - curr_cond, context); - } - else - context->supported= FALSE; - - break; - } - default: - { - DBUG_PRINT("info", ("Found item of type %d", item->type())); - context->supported= FALSE; - } - } - } - if (context->supported && context->rewrite_stack) - { - Ndb_rewrite_context *rewrite_context= context->rewrite_stack; - if (rewrite_context->count == - rewrite_context->func_item->argument_count()) - { - // Rewrite is done, wrap an END() at the en - DBUG_PRINT("info", ("End of condition group")); - prev_cond= curr_cond; - curr_cond= context->cond_ptr= new Ndb_cond(); - curr_cond->prev= prev_cond; - prev_cond->next= curr_cond; - curr_cond->ndb_item= new Ndb_item(NDB_END_COND); - // Pop rewrite stack - context->rewrite_stack= rewrite_context->next; - rewrite_context->next= NULL; - delete(rewrite_context); - } - } - } - } - - DBUG_VOID_RETURN; -} - -/* - Push a condition - */ -const -COND* -ha_ndbcluster_cond::cond_push(const COND *cond, - TABLE *table, const NDBTAB *ndb_table) -{ - DBUG_ENTER("cond_push"); - Ndb_cond_stack *ndb_cond = new Ndb_cond_stack(); - if (ndb_cond == NULL) - { - my_errno= HA_ERR_OUT_OF_MEM; - DBUG_RETURN(NULL); - } - if (m_cond_stack) - ndb_cond->next= m_cond_stack; - else - ndb_cond->next= NULL; - m_cond_stack= ndb_cond; - - if (serialize_cond(cond, ndb_cond, table, ndb_table)) - { - DBUG_RETURN(NULL); - } - else - { - cond_pop(); - } - DBUG_RETURN(cond); -} - -/* - Pop the top condition from the condition stack -*/ -void -ha_ndbcluster_cond::cond_pop() -{ - Ndb_cond_stack *ndb_cond_stack= m_cond_stack; - if (ndb_cond_stack) - { - m_cond_stack= ndb_cond_stack->next; - ndb_cond_stack->next= NULL; - delete ndb_cond_stack; - } -} - -/* - Clear the condition stack -*/ -void -ha_ndbcluster_cond::cond_clear() -{ - DBUG_ENTER("cond_clear"); - while (m_cond_stack) - cond_pop(); - - DBUG_VOID_RETURN; -} - -bool -ha_ndbcluster_cond::serialize_cond(const COND *cond, Ndb_cond_stack *ndb_cond, - TABLE *table, const NDBTAB *ndb_table) -{ - DBUG_ENTER("serialize_cond"); - Item *item= (Item *) cond; - Ndb_cond_traverse_context context(table, ndb_table, ndb_cond); - // Expect a logical expression - context.expect(Item::FUNC_ITEM); - context.expect(Item::COND_ITEM); - item->traverse_cond(&ndb_serialize_cond, (void *) &context, Item::PREFIX); - DBUG_PRINT("info", ("The pushed condition is %ssupported", (context.supported)?"":"not ")); - - DBUG_RETURN(context.supported); -} - -int -ha_ndbcluster_cond::build_scan_filter_predicate(Ndb_cond * &cond, - NdbScanFilter *filter, - bool negated) -{ - DBUG_ENTER("build_scan_filter_predicate"); - switch (cond->ndb_item->type) { - case NDB_FUNCTION: - { - if (!cond->next) - break; - Ndb_item *a= cond->next->ndb_item; - Ndb_item *b, *field, *value= NULL; - - switch (cond->ndb_item->argument_count()) { - case 1: - field= (a->type == NDB_FIELD)? a : NULL; - break; - case 2: - if (!cond->next->next) - { - field= NULL; - break; - } - b= cond->next->next->ndb_item; - value= ((a->type == NDB_VALUE) ? a : - (b->type == NDB_VALUE) ? b : - NULL); - field= ((a->type == NDB_FIELD) ? a : - (b->type == NDB_FIELD) ? b : - NULL); - break; - default: - field= NULL; //Keep compiler happy - DBUG_ASSERT(0); - break; - } - switch ((negated) ? - Ndb_item::negate(cond->ndb_item->qualification.function_type) - : cond->ndb_item->qualification.function_type) { - case NDB_EQ_FUNC: - { - if (!value || !field) break; - // Save value in right format for the field type - value->save_in_field(field); - DBUG_PRINT("info", ("Generating EQ filter")); - if (filter->cmp(NdbScanFilter::COND_EQ, - field->get_field_no(), - field->get_val(), - field->pack_length()) == -1) - DBUG_RETURN(1); - cond= cond->next->next->next; - DBUG_RETURN(0); - } - case NDB_NE_FUNC: - { - if (!value || !field) break; - // Save value in right format for the field type - value->save_in_field(field); - DBUG_PRINT("info", ("Generating NE filter")); - if (filter->cmp(NdbScanFilter::COND_NE, - field->get_field_no(), - field->get_val(), - field->pack_length()) == -1) - DBUG_RETURN(1); - cond= cond->next->next->next; - DBUG_RETURN(0); - } - case NDB_LT_FUNC: - { - if (!value || !field) break; - // Save value in right format for the field type - value->save_in_field(field); - if (a == field) - { - DBUG_PRINT("info", ("Generating LT filter")); - if (filter->cmp(NdbScanFilter::COND_LT, - field->get_field_no(), - field->get_val(), - field->pack_length()) == -1) - DBUG_RETURN(1); - } - else - { - DBUG_PRINT("info", ("Generating GT filter")); - if (filter->cmp(NdbScanFilter::COND_GT, - field->get_field_no(), - field->get_val(), - field->pack_length()) == -1) - DBUG_RETURN(1); - } - cond= cond->next->next->next; - DBUG_RETURN(0); - } - case NDB_LE_FUNC: - { - if (!value || !field) break; - // Save value in right format for the field type - value->save_in_field(field); - if (a == field) - { - DBUG_PRINT("info", ("Generating LE filter")); - if (filter->cmp(NdbScanFilter::COND_LE, - field->get_field_no(), - field->get_val(), - field->pack_length()) == -1) - DBUG_RETURN(1); - } - else - { - DBUG_PRINT("info", ("Generating GE filter")); - if (filter->cmp(NdbScanFilter::COND_GE, - field->get_field_no(), - field->get_val(), - field->pack_length()) == -1) - DBUG_RETURN(1); - } - cond= cond->next->next->next; - DBUG_RETURN(0); - } - case NDB_GE_FUNC: - { - if (!value || !field) break; - // Save value in right format for the field type - value->save_in_field(field); - if (a == field) - { - DBUG_PRINT("info", ("Generating GE filter")); - if (filter->cmp(NdbScanFilter::COND_GE, - field->get_field_no(), - field->get_val(), - field->pack_length()) == -1) - DBUG_RETURN(1); - } - else - { - DBUG_PRINT("info", ("Generating LE filter")); - if (filter->cmp(NdbScanFilter::COND_LE, - field->get_field_no(), - field->get_val(), - field->pack_length()) == -1) - DBUG_RETURN(1); - } - cond= cond->next->next->next; - DBUG_RETURN(0); - } - case NDB_GT_FUNC: - { - if (!value || !field) break; - // Save value in right format for the field type - value->save_in_field(field); - if (a == field) - { - DBUG_PRINT("info", ("Generating GT filter")); - if (filter->cmp(NdbScanFilter::COND_GT, - field->get_field_no(), - field->get_val(), - field->pack_length()) == -1) - DBUG_RETURN(1); - } - else - { - DBUG_PRINT("info", ("Generating LT filter")); - if (filter->cmp(NdbScanFilter::COND_LT, - field->get_field_no(), - field->get_val(), - field->pack_length()) == -1) - DBUG_RETURN(1); - } - cond= cond->next->next->next; - DBUG_RETURN(0); - } - case NDB_LIKE_FUNC: - { - if (!value || !field) break; - if ((value->qualification.value_type != Item::STRING_ITEM) && - (value->qualification.value_type != Item::VARBIN_ITEM)) - break; - // Save value in right format for the field type - value->save_in_field(field); - DBUG_PRINT("info", ("Generating LIKE filter: like(%d,%s,%d)", - field->get_field_no(), value->get_val(), - value->pack_length())); - if (filter->cmp(NdbScanFilter::COND_LIKE, - field->get_field_no(), - value->get_val(), - value->pack_length()) == -1) - DBUG_RETURN(1); - cond= cond->next->next->next; - DBUG_RETURN(0); - } - case NDB_NOTLIKE_FUNC: - { - if (!value || !field) break; - if ((value->qualification.value_type != Item::STRING_ITEM) && - (value->qualification.value_type != Item::VARBIN_ITEM)) - break; - // Save value in right format for the field type - value->save_in_field(field); - DBUG_PRINT("info", ("Generating NOTLIKE filter: notlike(%d,%s,%d)", - field->get_field_no(), value->get_val(), - value->pack_length())); - if (filter->cmp(NdbScanFilter::COND_NOT_LIKE, - field->get_field_no(), - value->get_val(), - value->pack_length()) == -1) - DBUG_RETURN(1); - cond= cond->next->next->next; - DBUG_RETURN(0); - } - case NDB_ISNULL_FUNC: - if (!field) - break; - DBUG_PRINT("info", ("Generating ISNULL filter")); - if (filter->isnull(field->get_field_no()) == -1) - DBUG_RETURN(1); - cond= cond->next->next; - DBUG_RETURN(0); - case NDB_ISNOTNULL_FUNC: - { - if (!field) - break; - DBUG_PRINT("info", ("Generating ISNOTNULL filter")); - if (filter->isnotnull(field->get_field_no()) == -1) - DBUG_RETURN(1); - cond= cond->next->next; - DBUG_RETURN(0); - } - default: - break; - } - break; - } - default: - break; - } - DBUG_PRINT("info", ("Found illegal condition")); - DBUG_RETURN(1); -} - - -int -ha_ndbcluster_cond::build_scan_filter_group(Ndb_cond* &cond, - NdbScanFilter *filter) -{ - uint level=0; - bool negated= FALSE; - DBUG_ENTER("build_scan_filter_group"); - - do - { - if (!cond) - DBUG_RETURN(1); - switch (cond->ndb_item->type) { - case NDB_FUNCTION: - { - switch (cond->ndb_item->qualification.function_type) { - case NDB_COND_AND_FUNC: - { - level++; - DBUG_PRINT("info", ("Generating %s group %u", (negated)?"NAND":"AND", - level)); - if ((negated) ? filter->begin(NdbScanFilter::NAND) - : filter->begin(NdbScanFilter::AND) == -1) - DBUG_RETURN(1); - negated= FALSE; - cond= cond->next; - break; - } - case NDB_COND_OR_FUNC: - { - level++; - DBUG_PRINT("info", ("Generating %s group %u", (negated)?"NOR":"OR", - level)); - if ((negated) ? filter->begin(NdbScanFilter::NOR) - : filter->begin(NdbScanFilter::OR) == -1) - DBUG_RETURN(1); - negated= FALSE; - cond= cond->next; - break; - } - case NDB_NOT_FUNC: - { - DBUG_PRINT("info", ("Generating negated query")); - cond= cond->next; - negated= TRUE; - break; - } - default: - if (build_scan_filter_predicate(cond, filter, negated)) - DBUG_RETURN(1); - negated= FALSE; - break; - } - break; - } - case NDB_END_COND: - DBUG_PRINT("info", ("End of group %u", level)); - level--; - if (cond) cond= cond->next; - if (filter->end() == -1) - DBUG_RETURN(1); - if (!negated) - break; - // else fall through (NOT END is an illegal condition) - default: - { - DBUG_PRINT("info", ("Illegal scan filter")); - } - } - } while (level > 0 || negated); - - DBUG_RETURN(0); -} - - -int -ha_ndbcluster_cond::build_scan_filter(Ndb_cond * &cond, NdbScanFilter *filter) -{ - bool simple_cond= TRUE; - DBUG_ENTER("build_scan_filter"); - - switch (cond->ndb_item->type) { - case NDB_FUNCTION: - switch (cond->ndb_item->qualification.function_type) { - case NDB_COND_AND_FUNC: - case NDB_COND_OR_FUNC: - simple_cond= FALSE; - break; - default: - break; - } - break; - default: - break; - } - if (simple_cond && filter->begin() == -1) - DBUG_RETURN(1); - if (build_scan_filter_group(cond, filter)) - DBUG_RETURN(1); - if (simple_cond && filter->end() == -1) - DBUG_RETURN(1); - - DBUG_RETURN(0); -} - -int -ha_ndbcluster_cond::generate_scan_filter(NdbScanOperation *op) -{ - DBUG_ENTER("generate_scan_filter"); - - if (m_cond_stack) - { - NdbScanFilter filter(op, false); // don't abort on too large - - int ret=generate_scan_filter_from_cond(filter); - if (ret != 0) - { - const NdbError& err=filter.getNdbError(); - if (err.code == NdbScanFilter::FilterTooLarge) - { - // err.message has static storage - DBUG_PRINT("info", ("%s", err.message)); - push_warning(current_thd, Sql_condition::WARN_LEVEL_WARN, - err.code, err.message); - ret=0; - } - } - if (ret != 0) - DBUG_RETURN(ret); - } - else - { - DBUG_PRINT("info", ("Empty stack")); - } - - DBUG_RETURN(0); -} - - -int -ha_ndbcluster_cond::generate_scan_filter_from_cond(NdbScanFilter& filter) -{ - bool multiple_cond= FALSE; - DBUG_ENTER("generate_scan_filter_from_cond"); - - // Wrap an AND group around multiple conditions - if (m_cond_stack->next) - { - multiple_cond= TRUE; - if (filter.begin() == -1) - DBUG_RETURN(1); - } - for (Ndb_cond_stack *stack= m_cond_stack; - (stack); - stack= stack->next) - { - Ndb_cond *cond= stack->ndb_cond; - - if (build_scan_filter(cond, &filter)) - { - DBUG_PRINT("info", ("build_scan_filter failed")); - DBUG_RETURN(1); - } - } - if (multiple_cond && filter.end() == -1) - DBUG_RETURN(1); - - DBUG_RETURN(0); -} - - -int ha_ndbcluster_cond::generate_scan_filter_from_key(NdbScanOperation *op, - const KEY* key_info, - const uchar *key, - uint key_len, - uchar *buf) -{ - KEY_PART_INFO* key_part= key_info->key_part; - KEY_PART_INFO* end= key_part+key_info->user_defined_key_parts; - NdbScanFilter filter(op, true); // abort on too large - int res; - DBUG_ENTER("generate_scan_filter_from_key"); - - filter.begin(NdbScanFilter::AND); - for (; key_part != end; key_part++) - { - Field* field= key_part->field; - uint32 pack_len= field->pack_length(); - const uchar* ptr= key; - DBUG_PRINT("info", ("Filtering value for %s", field->field_name)); - DBUG_DUMP("key", ptr, pack_len); - if (key_part->null_bit) - { - DBUG_PRINT("info", ("Generating ISNULL filter")); - if (filter.isnull(key_part->fieldnr-1) == -1) - DBUG_RETURN(1); - } - else - { - DBUG_PRINT("info", ("Generating EQ filter")); - if (filter.cmp(NdbScanFilter::COND_EQ, - key_part->fieldnr-1, - ptr, - pack_len) == -1) - DBUG_RETURN(1); - } - key += key_part->store_length; - } - // Add any pushed condition - if (m_cond_stack && - (res= generate_scan_filter_from_cond(filter))) - DBUG_RETURN(res); - - if (filter.end() == -1) - DBUG_RETURN(1); - - DBUG_RETURN(0); -} - -#endif diff --git a/sql/ha_ndbcluster_cond.h b/sql/ha_ndbcluster_cond.h deleted file mode 100644 index 952b705bfc2..00000000000 --- a/sql/ha_ndbcluster_cond.h +++ /dev/null @@ -1,500 +0,0 @@ -#ifndef HA_NDBCLUSTER_COND_INCLUDED -#define HA_NDBCLUSTER_COND_INCLUDED - -/* Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; version 2 of the License. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */ - -/* - This file defines the data structures used by engine condition pushdown in - the NDB Cluster handler -*/ - -#ifdef USE_PRAGMA_INTERFACE -#pragma interface /* gcc class implementation */ -#endif - -/* - It is necessary to include set_var.h instead of item.h because there - are dependencies on include order for set_var.h and item.h. This - will be resolved later. -*/ -#include "set_var.h" /* Item, Item_field */ - -typedef enum ndb_item_type { - NDB_VALUE = 0, // Qualified more with Item::Type - NDB_FIELD = 1, // Qualified from table definition - NDB_FUNCTION = 2,// Qualified from Item_func::Functype - NDB_END_COND = 3 // End marker for condition group -} NDB_ITEM_TYPE; - -typedef enum ndb_func_type { - NDB_EQ_FUNC = 0, - NDB_NE_FUNC = 1, - NDB_LT_FUNC = 2, - NDB_LE_FUNC = 3, - NDB_GT_FUNC = 4, - NDB_GE_FUNC = 5, - NDB_ISNULL_FUNC = 6, - NDB_ISNOTNULL_FUNC = 7, - NDB_LIKE_FUNC = 8, - NDB_NOTLIKE_FUNC = 9, - NDB_NOT_FUNC = 10, - NDB_UNKNOWN_FUNC = 11, - NDB_COND_AND_FUNC = 12, - NDB_COND_OR_FUNC = 13, - NDB_UNSUPPORTED_FUNC = 14 -} NDB_FUNC_TYPE; - -typedef union ndb_item_qualification { - Item::Type value_type; - enum_field_types field_type; // Instead of Item::FIELD_ITEM - NDB_FUNC_TYPE function_type; // Instead of Item::FUNC_ITEM -} NDB_ITEM_QUALIFICATION; - -typedef struct ndb_item_field_value { - Field* field; - int column_no; -} NDB_ITEM_FIELD_VALUE; - -typedef union ndb_item_value { - const Item *item; - NDB_ITEM_FIELD_VALUE *field_value; - uint arg_count; -} NDB_ITEM_VALUE; - -struct negated_function_mapping -{ - NDB_FUNC_TYPE pos_fun; - NDB_FUNC_TYPE neg_fun; -}; - -/* - Define what functions can be negated in condition pushdown. - Note, these HAVE to be in the same order as in definition enum -*/ -static const negated_function_mapping neg_map[]= -{ - {NDB_EQ_FUNC, NDB_NE_FUNC}, - {NDB_NE_FUNC, NDB_EQ_FUNC}, - {NDB_LT_FUNC, NDB_GE_FUNC}, - {NDB_LE_FUNC, NDB_GT_FUNC}, - {NDB_GT_FUNC, NDB_LE_FUNC}, - {NDB_GE_FUNC, NDB_LT_FUNC}, - {NDB_ISNULL_FUNC, NDB_ISNOTNULL_FUNC}, - {NDB_ISNOTNULL_FUNC, NDB_ISNULL_FUNC}, - {NDB_LIKE_FUNC, NDB_NOTLIKE_FUNC}, - {NDB_NOTLIKE_FUNC, NDB_LIKE_FUNC}, - {NDB_NOT_FUNC, NDB_UNSUPPORTED_FUNC}, - {NDB_UNKNOWN_FUNC, NDB_UNSUPPORTED_FUNC}, - {NDB_COND_AND_FUNC, NDB_UNSUPPORTED_FUNC}, - {NDB_COND_OR_FUNC, NDB_UNSUPPORTED_FUNC}, - {NDB_UNSUPPORTED_FUNC, NDB_UNSUPPORTED_FUNC} -}; - -/* - This class is the construction element for serialization of Item tree - in condition pushdown. - An instance of Ndb_Item represents a constant, table field reference, - unary or binary comparison predicate, and start/end of AND/OR. - Instances of Ndb_Item are stored in a linked list implemented by Ndb_cond - class. - The order of elements produced by Ndb_cond::next corresponds to - breadth-first traversal of the Item (i.e. expression) tree in prefix order. - AND and OR have arbitrary arity, so the end of AND/OR group is marked with - Ndb_item with type == NDB_END_COND. - NOT items represent negated conditions and generate NAND/NOR groups. -*/ -class Ndb_item : public Sql_alloc -{ -public: - Ndb_item(NDB_ITEM_TYPE item_type) : type(item_type) {}; - Ndb_item(NDB_ITEM_TYPE item_type, - NDB_ITEM_QUALIFICATION item_qualification, - const Item *item_value) - : type(item_type), qualification(item_qualification) - { - switch(item_type) { - case(NDB_VALUE): - value.item= item_value; - break; - case(NDB_FIELD): { - NDB_ITEM_FIELD_VALUE *field_value= new NDB_ITEM_FIELD_VALUE(); - Item_field *field_item= (Item_field *) item_value; - field_value->field= field_item->field; - field_value->column_no= -1; // Will be fetched at scan filter generation - value.field_value= field_value; - break; - } - case(NDB_FUNCTION): - value.item= item_value; - value.arg_count= ((Item_func *) item_value)->argument_count(); - break; - case(NDB_END_COND): - break; - } - }; - Ndb_item(Field *field, int column_no) : type(NDB_FIELD) - { - NDB_ITEM_FIELD_VALUE *field_value= new NDB_ITEM_FIELD_VALUE(); - qualification.field_type= field->type(); - field_value->field= field; - field_value->column_no= column_no; - value.field_value= field_value; - }; - Ndb_item(Item_func::Functype func_type, const Item *item_value) - : type(NDB_FUNCTION) - { - qualification.function_type= item_func_to_ndb_func(func_type); - value.item= item_value; - value.arg_count= ((Item_func *) item_value)->argument_count(); - }; - Ndb_item(Item_func::Functype func_type, uint no_args) - : type(NDB_FUNCTION) - { - qualification.function_type= item_func_to_ndb_func(func_type); - value.arg_count= no_args; - }; - ~Ndb_item() - { - if (type == NDB_FIELD) - { - delete value.field_value; - value.field_value= NULL; - } - }; - - uint32 pack_length() - { - switch(type) { - case(NDB_VALUE): - if(qualification.value_type == Item::STRING_ITEM) - return value.item->str_value.length(); - break; - case(NDB_FIELD): - return value.field_value->field->pack_length(); - default: - break; - } - - return 0; - }; - - Field * get_field() { return value.field_value->field; }; - - int get_field_no() { return value.field_value->column_no; }; - - int argument_count() - { - return value.arg_count; - }; - - const char* get_val() - { - switch(type) { - case(NDB_VALUE): - if(qualification.value_type == Item::STRING_ITEM) - return value.item->str_value.ptr(); - break; - case(NDB_FIELD): - return (char*) value.field_value->field->ptr; - default: - break; - } - - return NULL; - }; - - void save_in_field(Ndb_item *field_item) - { - Field *field = field_item->value.field_value->field; - const Item *item= value.item; - - if (item && field) - { - my_bitmap_map *old_map= - dbug_tmp_use_all_columns(field->table, field->table->write_set); - ((Item *)item)->save_in_field(field, FALSE); - dbug_tmp_restore_column_map(field->table->write_set, old_map); - } - }; - - static NDB_FUNC_TYPE item_func_to_ndb_func(Item_func::Functype fun) - { - switch (fun) { - case (Item_func::EQ_FUNC): { return NDB_EQ_FUNC; } - case (Item_func::NE_FUNC): { return NDB_NE_FUNC; } - case (Item_func::LT_FUNC): { return NDB_LT_FUNC; } - case (Item_func::LE_FUNC): { return NDB_LE_FUNC; } - case (Item_func::GT_FUNC): { return NDB_GT_FUNC; } - case (Item_func::GE_FUNC): { return NDB_GE_FUNC; } - case (Item_func::ISNULL_FUNC): { return NDB_ISNULL_FUNC; } - case (Item_func::ISNOTNULL_FUNC): { return NDB_ISNOTNULL_FUNC; } - case (Item_func::LIKE_FUNC): { return NDB_LIKE_FUNC; } - case (Item_func::NOT_FUNC): { return NDB_NOT_FUNC; } - case (Item_func::NEG_FUNC): { return NDB_UNKNOWN_FUNC; } - case (Item_func::UNKNOWN_FUNC): { return NDB_UNKNOWN_FUNC; } - case (Item_func::COND_AND_FUNC): { return NDB_COND_AND_FUNC; } - case (Item_func::COND_OR_FUNC): { return NDB_COND_OR_FUNC; } - default: { return NDB_UNSUPPORTED_FUNC; } - } - }; - - static NDB_FUNC_TYPE negate(NDB_FUNC_TYPE fun) - { - uint i= (uint) fun; - DBUG_ASSERT(fun == neg_map[i].pos_fun); - return neg_map[i].neg_fun; - }; - - NDB_ITEM_TYPE type; - NDB_ITEM_QUALIFICATION qualification; - private: - NDB_ITEM_VALUE value; -}; - -/* - This class implements a linked list used for storing a - serialization of the Item tree for condition pushdown. - */ -class Ndb_cond : public Sql_alloc -{ - public: - Ndb_cond() : ndb_item(NULL), next(NULL), prev(NULL) {}; - ~Ndb_cond() - { - if (ndb_item) delete ndb_item; - ndb_item= NULL; - /* - First item in the linked list deletes all in a loop - Note - doing it recursively causes stack issues for - big IN clauses - */ - Ndb_cond *n= next; - while (n) - { - Ndb_cond *tmp= n; - n= n->next; - tmp->next= NULL; - delete tmp; - } - next= prev= NULL; - }; - Ndb_item *ndb_item; - Ndb_cond *next; - Ndb_cond *prev; -}; - -/* - This class implements a stack for storing several conditions - for pushdown (represented as serialized Item trees using Ndb_cond). - The current implementation only pushes one condition, but is - prepared for handling several (C1 AND C2 ...) if the logic for - pushing conditions is extended in sql_select. -*/ -class Ndb_cond_stack : public Sql_alloc -{ - public: - Ndb_cond_stack() : ndb_cond(NULL), next(NULL) {}; - ~Ndb_cond_stack() - { - if (ndb_cond) delete ndb_cond; - ndb_cond= NULL; - if (next) delete next; - next= NULL; - }; - Ndb_cond *ndb_cond; - Ndb_cond_stack *next; -}; - -class Ndb_rewrite_context : public Sql_alloc -{ -public: - Ndb_rewrite_context(Item_func *func) - : func_item(func), left_hand_item(NULL), count(0) {}; - ~Ndb_rewrite_context() - { - if (next) delete next; - } - const Item_func *func_item; - const Item *left_hand_item; - uint count; - Ndb_rewrite_context *next; -}; - -/* - This class is used for storing the context when traversing - the Item tree. It stores a reference to the table the condition - is defined on, the serialized representation being generated, - if the condition found is supported, and information what is - expected next in the tree inorder for the condition to be supported. -*/ -class Ndb_cond_traverse_context : public Sql_alloc -{ - public: - Ndb_cond_traverse_context(TABLE *tab, const NdbDictionary::Table *ndb_tab, - Ndb_cond_stack* stack) - : table(tab), ndb_table(ndb_tab), - supported(TRUE), stack_ptr(stack), cond_ptr(NULL), - skip(0), collation(NULL), rewrite_stack(NULL) - { - // Allocate type checking bitmaps - my_bitmap_init(&expect_mask, 0, 512, FALSE); - my_bitmap_init(&expect_field_type_mask, 0, 512, FALSE); - my_bitmap_init(&expect_field_result_mask, 0, 512, FALSE); - - if (stack) - cond_ptr= stack->ndb_cond; - }; - ~Ndb_cond_traverse_context() - { - my_bitmap_free(&expect_mask); - my_bitmap_free(&expect_field_type_mask); - my_bitmap_free(&expect_field_result_mask); - if (rewrite_stack) delete rewrite_stack; - } - void expect(Item::Type type) - { - bitmap_set_bit(&expect_mask, (uint) type); - if (type == Item::FIELD_ITEM) expect_all_field_types(); - }; - void dont_expect(Item::Type type) - { - bitmap_clear_bit(&expect_mask, (uint) type); - }; - bool expecting(Item::Type type) - { - return bitmap_is_set(&expect_mask, (uint) type); - }; - void expect_nothing() - { - bitmap_clear_all(&expect_mask); - }; - bool expecting_nothing() - { - return bitmap_is_clear_all(&expect_mask); - } - void expect_only(Item::Type type) - { - expect_nothing(); - expect(type); - }; - - void expect_field_type(enum_field_types type) - { - bitmap_set_bit(&expect_field_type_mask, (uint) type); - }; - void expect_all_field_types() - { - bitmap_set_all(&expect_field_type_mask); - }; - bool expecting_field_type(enum_field_types type) - { - return bitmap_is_set(&expect_field_type_mask, (uint) type); - }; - void expect_no_field_type() - { - bitmap_clear_all(&expect_field_type_mask); - }; - bool expecting_no_field_type() - { - return bitmap_is_clear_all(&expect_field_type_mask); - } - void expect_only_field_type(enum_field_types result) - { - expect_no_field_type(); - expect_field_type(result); - }; - - void expect_field_result(Item_result result) - { - bitmap_set_bit(&expect_field_result_mask, (uint) result); - }; - bool expecting_field_result(Item_result result) - { - return bitmap_is_set(&expect_field_result_mask, (uint) result); - }; - void expect_no_field_result() - { - bitmap_clear_all(&expect_field_result_mask); - }; - bool expecting_no_field_result() - { - return bitmap_is_clear_all(&expect_field_result_mask); - } - void expect_only_field_result(Item_result result) - { - expect_no_field_result(); - expect_field_result(result); - }; - void expect_collation(CHARSET_INFO* col) - { - collation= col; - }; - bool expecting_collation(CHARSET_INFO* col) - { - bool matching= (!collation) ? true : (collation == col); - collation= NULL; - - return matching; - }; - - TABLE* table; - const NdbDictionary::Table *ndb_table; - bool supported; - Ndb_cond_stack* stack_ptr; - Ndb_cond* cond_ptr; - MY_BITMAP expect_mask; - MY_BITMAP expect_field_type_mask; - MY_BITMAP expect_field_result_mask; - uint skip; - CHARSET_INFO* collation; - Ndb_rewrite_context *rewrite_stack; -}; - -class ha_ndbcluster; - -class ha_ndbcluster_cond -{ -public: - ha_ndbcluster_cond() - : m_cond_stack(NULL) - {} - ~ha_ndbcluster_cond() - { if (m_cond_stack) delete m_cond_stack; } - const COND *cond_push(const COND *cond, - TABLE *table, const NdbDictionary::Table *ndb_table); - void cond_pop(); - void cond_clear(); - int generate_scan_filter(NdbScanOperation* op); - int generate_scan_filter_from_cond(NdbScanFilter& filter); - int generate_scan_filter_from_key(NdbScanOperation* op, - const KEY* key_info, - const uchar *key, - uint key_len, - uchar *buf); -private: - bool serialize_cond(const COND *cond, Ndb_cond_stack *ndb_cond, - TABLE *table, const NdbDictionary::Table *ndb_table); - int build_scan_filter_predicate(Ndb_cond* &cond, - NdbScanFilter* filter, - bool negated= false); - int build_scan_filter_group(Ndb_cond* &cond, - NdbScanFilter* filter); - int build_scan_filter(Ndb_cond* &cond, NdbScanFilter* filter); - - Ndb_cond_stack *m_cond_stack; -}; - -#endif /* HA_NDBCLUSTER_COND_INCLUDED */ diff --git a/sql/ha_ndbcluster_tables.h b/sql/ha_ndbcluster_tables.h deleted file mode 100644 index 4d97ca2c254..00000000000 --- a/sql/ha_ndbcluster_tables.h +++ /dev/null @@ -1,29 +0,0 @@ -#ifndef HA_NDBCLUSTER_TABLES_INCLUDED -#define HA_NDBCLUSTER_TABLES_INCLUDED - -/* Copyright (c) 2000-2003, 2006, 2007 MySQL AB, 2009 Sun Microsystems, Inc. - Use is subject to license terms - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; version 2 of the License. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA -*/ - -#define NDB_REP_DB "mysql" -#define OLD_NDB_REP_DB "cluster" -#define NDB_REP_TABLE "ndb_binlog_index" -#define NDB_APPLY_TABLE "ndb_apply_status" -#define OLD_NDB_APPLY_TABLE "apply_status" -#define NDB_SCHEMA_TABLE "ndb_schema" -#define OLD_NDB_SCHEMA_TABLE "schema" - -#endif /* HA_NDBCLUSTER_TABLES_INCLUDED */ diff --git a/sql/ha_partition.cc b/sql/ha_partition.cc index f503b2f54e5..da7f3aeff89 100644 --- a/sql/ha_partition.cc +++ b/sql/ha_partition.cc @@ -4944,7 +4944,6 @@ int ha_partition::rnd_next(uchar *buf) end: m_part_spec.start_part= NO_CURRENT_PART_ID; end_dont_reset_start_part: - table->status= STATUS_NOT_FOUND; DBUG_RETURN(result); } @@ -5848,7 +5847,6 @@ int ha_partition::partition_scan_set_up(uchar * buf, bool idx_read_flag) key not found. */ DBUG_PRINT("info", ("scan with no partition to scan")); - table->status= STATUS_NOT_FOUND; DBUG_RETURN(HA_ERR_END_OF_FILE); } if (m_part_spec.start_part == m_part_spec.end_part) @@ -5873,7 +5871,6 @@ int ha_partition::partition_scan_set_up(uchar * buf, bool idx_read_flag) if (start_part == MY_BIT_NONE) { DBUG_PRINT("info", ("scan with no partition to scan")); - table->status= STATUS_NOT_FOUND; DBUG_RETURN(HA_ERR_END_OF_FILE); } if (start_part > m_part_spec.start_part) diff --git a/sql/ha_partition.h b/sql/ha_partition.h index 71ae84b06a0..3ea8d4a855d 100644 --- a/sql/ha_partition.h +++ b/sql/ha_partition.h @@ -424,7 +424,6 @@ public: will be handled by any underlying handlers implementing transactions. There is only one call to each handler type involved per transaction and these go directly to the handlers supporting transactions - currently InnoDB, BDB and NDB). ------------------------------------------------------------------------- */ virtual THR_LOCK_DATA **store_lock(THD * thd, THR_LOCK_DATA ** to, @@ -799,14 +798,14 @@ public: the handler always has a primary key (hidden if not defined) and this index is used for scanning rather than a full table scan in all situations. - (InnoDB, BDB, Federated) + (InnoDB, Federated) HA_REC_NOT_IN_SEQ: This flag is set for handlers that cannot guarantee that the rows are returned accroding to incremental positions (0, 1, 2, 3...). This also means that rnd_next() should return HA_ERR_RECORD_DELETED if it finds a deleted row. - (MyISAM (not fixed length row), BDB, HEAP, NDB, InooDB) + (MyISAM (not fixed length row), HEAP, InnoDB) HA_CAN_GEOMETRY: Can the storage engine handle spatial data. @@ -819,13 +818,13 @@ public: finding a row by key as by position. This flag is used in a very special situation in conjunction with filesort's. For further explanation see intro to init_read_record. - (BDB, HEAP, InnoDB) + (HEAP, InnoDB) HA_NULL_IN_KEY: Is NULL values allowed in indexes. If this is not allowed then it is not possible to use an index on a NULLable field. - (BDB, HEAP, MyISAM, NDB, InnoDB) + (HEAP, MyISAM, InnoDB) HA_DUPLICATE_POS: Tells that we can the position for the conflicting duplicate key @@ -836,12 +835,12 @@ public: HA_CAN_INDEX_BLOBS: Is the storage engine capable of defining an index of a prefix on a BLOB attribute. - (BDB, Federated, MyISAM, InnoDB) + (Federated, MyISAM, InnoDB) HA_AUTO_PART_KEY: Auto increment fields can be part of a multi-part key. For second part auto-increment keys, the auto_incrementing is done in handler.cc - (BDB, Federated, MyISAM, NDB) + (Federated, MyISAM) HA_REQUIRE_PRIMARY_KEY: Can't define a table without primary key (and cannot handle a table @@ -871,7 +870,7 @@ public: HA_NO_PREFIX_CHAR_KEYS: Indexes on prefixes of character fields is not allowed. - (NDB) + (Federated) HA_CAN_FULLTEXT: Does the storage engine support fulltext indexes @@ -896,11 +895,11 @@ public: Should file names always be in lower case (used by engines that map table names to file names. Since partition handler has a local file this flag is set. - (BDB, Federated, MyISAM) + (Federated, MyISAM) HA_CAN_BIT_FIELD: Is the storage engine capable of handling bit fields? - (MyISAM, NDB) + (MyISAM) HA_NEED_READ_RANGE_BUFFER: Is Read Multi-Range supported => need multi read range buffer @@ -912,7 +911,7 @@ public: not handle this call. There are methods in handler.cc that will transfer those calls into index_read and other calls in the index scan module. - (NDB) + (No handler defines it) HA_PRIMARY_KEY_REQUIRED_FOR_POSITION: Does the storage engine need a PK for position? @@ -942,11 +941,11 @@ public: Does the index support read next, this is assumed in the server code and never checked so all indexes must support this. Note that the handler can be used even if it doesn't have any index. - (BDB, HEAP, MyISAM, Federated, NDB, InnoDB) + (HEAP, MyISAM, Federated, InnoDB) HA_READ_PREV: Can the index be used to scan backwards. - (BDB, HEAP, MyISAM, NDB, InnoDB) + (HEAP, MyISAM, InnoDB) HA_READ_ORDER: Can the index deliver its record in index order. Typically true for @@ -960,19 +959,19 @@ public: order all output started by index_read since most engines do this. With read_multi_range calls there is a specific flag setting order or not order so in those cases ordering of index output can be avoided. - (BDB, InnoDB, HEAP, MyISAM, NDB) + (InnoDB, HEAP, MyISAM) HA_READ_RANGE: Specify whether index can handle ranges, typically true for all ordered indexes and not true for hash indexes. Used by optimiser to check if ranges (as key >= 5) can be optimised by index. - (BDB, InnoDB, NDB, MyISAM, HEAP) + (InnoDB, MyISAM, HEAP) HA_ONLY_WHOLE_INDEX: Can't use part key searches. This is typically true for hash indexes and typically not true for ordered indexes. - (Federated, NDB, HEAP) + (Federated, HEAP) HA_KEYREAD_ONLY: Does the storage engine support index-only scans on this index. @@ -982,7 +981,7 @@ public: only have to fill in the columns the key covers. If HA_PRIMARY_KEY_IN_READ_INDEX is set then also the PRIMARY KEY columns must be updated in the row. - (BDB, InnoDB, MyISAM) + (InnoDB, MyISAM) */ virtual ulong index_flags(uint inx, uint part, bool all_parts) const { @@ -1283,7 +1282,6 @@ public: return h; } - friend int cmp_key_rowid_part_id(void *ptr, uchar *ref1, uchar *ref2); }; diff --git a/sql/handler.cc b/sql/handler.cc index 56e7da6430d..a24f18f4863 100644 --- a/sql/handler.cc +++ b/sql/handler.cc @@ -50,6 +50,9 @@ #include "../storage/maria/ha_maria.h" #endif +#include "wsrep_mysqld.h" +#include "wsrep.h" + /* While we have legacy_db_type, we have this array to check for dups and to find handlerton from legacy_db_type. @@ -74,7 +77,6 @@ ulong savepoint_alloc_size= 0; static const LEX_STRING sys_table_aliases[]= { { C_STRING_WITH_LEN("INNOBASE") }, { C_STRING_WITH_LEN("INNODB") }, - { C_STRING_WITH_LEN("NDB") }, { C_STRING_WITH_LEN("NDBCLUSTER") }, { C_STRING_WITH_LEN("HEAP") }, { C_STRING_WITH_LEN("MEMORY") }, { C_STRING_WITH_LEN("MERGE") }, { C_STRING_WITH_LEN("MRG_MYISAM") }, { C_STRING_WITH_LEN("Maria") }, { C_STRING_WITH_LEN("Aria") }, @@ -1141,6 +1143,25 @@ void trans_register_ha(THD *thd, bool all, handlerton *ht_arg) DBUG_VOID_RETURN; } + +static int prepare_or_error(handlerton *ht, THD *thd, bool all) +{ + int err= ht->prepare(ht, thd, all); + status_var_increment(thd->status_var.ha_prepare_count); + if (err) + { + /* avoid sending error, if we're going to replay the transaction */ +#ifdef WITH_WSREP + if (ht == wsrep_hton && + err != WSREP_TRX_SIZE_EXCEEDED && + thd->wsrep_conflict_state != MUST_REPLAY) +#endif + my_error(ER_ERROR_DURING_COMMIT, MYF(0), err); + } + return err; +} + + /** @retval 0 ok @@ -1158,14 +1179,11 @@ int ha_prepare(THD *thd) { for (; ha_info; ha_info= ha_info->next()) { - int err; handlerton *ht= ha_info->ht(); - status_var_increment(thd->status_var.ha_prepare_count); if (ht->prepare) { - if ((err= ht->prepare(ht, thd, all))) + if (prepare_or_error(ht, thd, all)) { - my_error(ER_ERROR_DURING_COMMIT, MYF(0), err); ha_rollback_trans(thd, all); error=1; break; @@ -1366,8 +1384,9 @@ int ha_commit_trans(THD *thd, bool all) mdl_request.init(MDL_key::COMMIT, "", "", MDL_INTENTION_EXCLUSIVE, MDL_EXPLICIT); - if (thd->mdl_context.acquire_lock(&mdl_request, - thd->variables.lock_wait_timeout)) + if (!WSREP(thd) && + thd->mdl_context.acquire_lock(&mdl_request, + thd->variables.lock_wait_timeout)) { ha_rollback_trans(thd, all); thd->wakeup_subsequent_commits(1); @@ -1398,7 +1417,6 @@ int ha_commit_trans(THD *thd, bool all) for (Ha_trx_info *hi= ha_info; hi; hi= hi->next()) { - int err; handlerton *ht= hi->ht(); /* Do not call two-phase commit if this particular @@ -1411,12 +1429,7 @@ int ha_commit_trans(THD *thd, bool all) Sic: we know that prepare() is not NULL since otherwise trans->no_2pc would have been set. */ - err= ht->prepare(ht, thd, all); - status_var_increment(thd->status_var.ha_prepare_count); - if (err) - my_error(ER_ERROR_DURING_COMMIT, MYF(0), err); - - if (err) + if (prepare_or_error(ht, thd, all)) goto err; need_prepare_ordered|= (ht->prepare_ordered != NULL); @@ -1425,6 +1438,12 @@ int ha_commit_trans(THD *thd, bool all) DEBUG_SYNC(thd, "ha_commit_trans_after_prepare"); DBUG_EXECUTE_IF("crash_commit_after_prepare", DBUG_SUICIDE();); + if (!error && WSREP_ON && wsrep_is_wsrep_xid(&thd->transaction.xid_state.xid)) + { + // xid was rewritten by wsrep + xid= wsrep_xid_seqno(&thd->transaction.xid_state.xid); + } + if (!is_real_trans) { error= commit_one_phase_2(thd, all, trans, is_real_trans); @@ -1801,7 +1820,9 @@ static my_bool xarecover_handlerton(THD *unused, plugin_ref plugin, got, hton_name(hton)->str); for (int i=0; i < got; i ++) { - my_xid x=info->list[i].get_my_xid(); + my_xid x= WSREP_ON && wsrep_is_wsrep_xid(&info->list[i]) ? + wsrep_xid_seqno(&info->list[i]) : + info->list[i].get_my_xid(); if (!x) // not "mine" - that is generated by external TM { #ifndef DBUG_OFF @@ -3086,10 +3107,12 @@ int handler::update_auto_increment() variables->auto_increment_increment); auto_inc_intervals_count++; /* Row-based replication does not need to store intervals in binlog */ - if (mysql_bin_log.is_open() && !thd->is_current_stmt_binlog_format_row()) - thd->auto_inc_intervals_in_cur_stmt_for_binlog.append(auto_inc_interval_for_cur_row.minimum(), - auto_inc_interval_for_cur_row.values(), - variables->auto_increment_increment); + if (((WSREP(thd) && wsrep_emulate_bin_log ) || mysql_bin_log.is_open()) + && !thd->is_current_stmt_binlog_format_row()) + thd->auto_inc_intervals_in_cur_stmt_for_binlog. + append(auto_inc_interval_for_cur_row.minimum(), + auto_inc_interval_for_cur_row.values(), + variables->auto_increment_increment); } /* @@ -4387,10 +4410,10 @@ handler::ha_rename_partitions(const char *path) /** Tell the storage engine that it is allowed to "disable transaction" in the - handler. It is a hint that ACID is not required - it is used in NDB for + handler. It is a hint that ACID is not required - it was used in NDB for ALTER TABLE, for example, when data are copied to temporary table. A storage engine may treat this hint any way it likes. NDB for example - starts to commit every now and then automatically. + started to commit every now and then automatically. This hint can be safely ignored. */ int ha_enable_transaction(THD *thd, bool on) @@ -5203,145 +5226,6 @@ int ha_discover_table_names(THD *thd, LEX_STRING *db, MY_DIR *dirp, } -#ifdef HAVE_NDB_BINLOG -/* - TODO: change this into a dynamic struct - List<handlerton> does not work as - 1. binlog_end is called when MEM_ROOT is gone - 2. cannot work with thd MEM_ROOT as memory should be freed -*/ -#define MAX_HTON_LIST_ST 63 -struct hton_list_st -{ - handlerton *hton[MAX_HTON_LIST_ST]; - uint sz; -}; - -struct binlog_func_st -{ - enum_binlog_func fn; - void *arg; -}; - -/** @brief - Listing handlertons first to avoid recursive calls and deadlock -*/ -static my_bool binlog_func_list(THD *thd, plugin_ref plugin, void *arg) -{ - hton_list_st *hton_list= (hton_list_st *)arg; - handlerton *hton= plugin_hton(plugin); - if (hton->state == SHOW_OPTION_YES && hton->binlog_func) - { - uint sz= hton_list->sz; - if (sz == MAX_HTON_LIST_ST-1) - { - /* list full */ - return FALSE; - } - hton_list->hton[sz]= hton; - hton_list->sz= sz+1; - } - return FALSE; -} - -static my_bool binlog_func_foreach(THD *thd, binlog_func_st *bfn) -{ - hton_list_st hton_list; - uint i, sz; - - hton_list.sz= 0; - plugin_foreach(thd, binlog_func_list, - MYSQL_STORAGE_ENGINE_PLUGIN, &hton_list); - - for (i= 0, sz= hton_list.sz; i < sz ; i++) - hton_list.hton[i]->binlog_func(hton_list.hton[i], thd, bfn->fn, bfn->arg); - return FALSE; -} - -int ha_reset_logs(THD *thd) -{ - binlog_func_st bfn= {BFN_RESET_LOGS, 0}; - binlog_func_foreach(thd, &bfn); - return 0; -} - -void ha_reset_slave(THD* thd) -{ - binlog_func_st bfn= {BFN_RESET_SLAVE, 0}; - binlog_func_foreach(thd, &bfn); -} - -void ha_binlog_wait(THD* thd) -{ - binlog_func_st bfn= {BFN_BINLOG_WAIT, 0}; - binlog_func_foreach(thd, &bfn); -} - -int ha_binlog_end(THD* thd) -{ - binlog_func_st bfn= {BFN_BINLOG_END, 0}; - binlog_func_foreach(thd, &bfn); - return 0; -} - -int ha_binlog_index_purge_file(THD *thd, const char *file) -{ - binlog_func_st bfn= {BFN_BINLOG_PURGE_FILE, (void *)file}; - binlog_func_foreach(thd, &bfn); - return 0; -} - -struct binlog_log_query_st -{ - enum_binlog_command binlog_command; - const char *query; - uint query_length; - const char *db; - const char *table_name; -}; - -static my_bool binlog_log_query_handlerton2(THD *thd, - handlerton *hton, - void *args) -{ - struct binlog_log_query_st *b= (struct binlog_log_query_st*)args; - if (hton->state == SHOW_OPTION_YES && hton->binlog_log_query) - hton->binlog_log_query(hton, thd, - b->binlog_command, - b->query, - b->query_length, - b->db, - b->table_name); - return FALSE; -} - -static my_bool binlog_log_query_handlerton(THD *thd, - plugin_ref plugin, - void *args) -{ - return binlog_log_query_handlerton2(thd, plugin_hton(plugin), args); -} - -void ha_binlog_log_query(THD *thd, handlerton *hton, - enum_binlog_command binlog_command, - const char *query, uint query_length, - const char *db, const char *table_name) -{ - struct binlog_log_query_st b; - b.binlog_command= binlog_command; - b.query= query; - b.query_length= query_length; - b.db= db; - b.table_name= table_name; - if (hton == 0) - plugin_foreach(thd, binlog_log_query_handlerton, - MYSQL_STORAGE_ENGINE_PLUGIN, &b); - else - binlog_log_query_handlerton2(thd, hton, &b); -} -#endif - - /** Read first row between two ranges. Store ranges for future calls to read_range_next. @@ -5704,10 +5588,13 @@ static bool check_table_binlog_row_based(THD *thd, TABLE *table) DBUG_ASSERT(table->s->cached_row_logging_check == 0 || table->s->cached_row_logging_check == 1); - return (thd->is_current_stmt_binlog_format_row() && + return thd->is_current_stmt_binlog_format_row() && table->s->cached_row_logging_check && (thd->variables.option_bits & OPTION_BIN_LOG) && - mysql_bin_log.is_open()); + /* applier and replayer should not binlog */ + ((IF_WSREP(WSREP_EMULATE_BINLOG(thd) && + thd->wsrep_exec_mode != REPL_RECV, 0)) || + mysql_bin_log.is_open()); } @@ -5807,6 +5694,11 @@ static int binlog_log_row(TABLE* table, bool error= 0; THD *const thd= table->in_use; + /* only InnoDB tables will be replicated through binlog emulation */ + if (WSREP_EMULATE_BINLOG(thd) && + table->file->partition_ht()->db_type != DB_TYPE_INNODB) + return 0; + if (check_table_binlog_row_based(thd, table)) { MY_BITMAP cols; @@ -6136,6 +6028,74 @@ void handler::set_lock_type(enum thr_lock_type lock) table->reginfo.lock_type= lock; } +#ifdef WITH_WSREP +/** + @details + This function makes the storage engine to force the victim transaction + to abort. Currently, only innodb has this functionality, but any SE + implementing the wsrep API should provide this service to support + multi-master operation. + + @param bf_thd brute force THD asking for the abort + @param victim_thd victim THD to be aborted + + @return + always 0 +*/ + +int ha_abort_transaction(THD *bf_thd, THD *victim_thd, my_bool signal) +{ + DBUG_ENTER("ha_abort_transaction"); + if (!WSREP(bf_thd) && + !(wsrep_OSU_method_options == WSREP_OSU_RSU && + bf_thd->wsrep_exec_mode == TOTAL_ORDER)) { + DBUG_RETURN(0); + } + + THD_TRANS *trans= &victim_thd->transaction.all; + Ha_trx_info *ha_info= trans->ha_list, *ha_info_next; + + for (; ha_info; ha_info= ha_info_next) + { + handlerton *hton= ha_info->ht(); + if (!hton->abort_transaction) + WSREP_WARN("cannot abort transaction"); + else + hton->abort_transaction(hton, bf_thd, victim_thd, signal); + ha_info_next= ha_info->next(); + ha_info->reset(); /* keep it conveniently zero-filled */ + } + DBUG_RETURN(0); +} + +void ha_fake_trx_id(THD *thd) +{ + DBUG_ENTER("ha_fake_trx_id"); + if (!WSREP(thd)) + { + DBUG_VOID_RETURN; + } + + THD_TRANS *trans= &thd->transaction.all; + Ha_trx_info *ha_info= trans->ha_list, *ha_info_next; + + for (; ha_info; ha_info= ha_info_next) + { + handlerton *hton= ha_info->ht(); + if (!hton->fake_trx_id) + { + WSREP_WARN("cannot get fake InnoDB transaction ID"); + } + else + hton->fake_trx_id(hton, thd); + ha_info_next= ha_info->next(); + ha_info->reset(); /* keep it conveniently zero-filled */ + } + DBUG_VOID_RETURN; +} +#endif /* WITH_WSREP */ + + #ifdef TRANS_LOG_MGM_EXAMPLE_CODE /* Example of transaction log management functions based on assumption that logs diff --git a/sql/handler.h b/sql/handler.h index 1e4fe5557b6..c91b26f768f 100644 --- a/sql/handler.h +++ b/sql/handler.h @@ -197,17 +197,11 @@ enum enum_alter_inplace_result { #define HA_RECORD_MUST_BE_CLEAN_ON_WRITE (1ULL << 41) /* - Table condition pushdown must be performed regardless of - 'engine_condition_pushdown' setting. - - This flag is aimed at storage engines that come with "special" predicates - that can only be evaluated inside the storage engine. - For example, when one does - select * from sphinx_table where query='{fulltext_query}' - then the "query=..." condition must be always pushed down into storage - engine. + This storage engine supports condition pushdown */ -#define HA_MUST_USE_TABLE_CONDITION_PUSHDOWN (1ULL << 42) +#define HA_CAN_TABLE_CONDITION_PUSHDOWN (1ULL << 42) +/* old name for the same flag */ +#define HA_MUST_USE_TABLE_CONDITION_PUSHDOWN HA_CAN_TABLE_CONDITION_PUSHDOWN /** The handler supports read before write removal optimization @@ -349,9 +343,6 @@ enum enum_alter_inplace_result { /* Note: the following includes binlog and closing 0. - so: innodb + bdb + ndb + binlog + myisam + myisammrg + archive + - example + csv + heap + blackhole + federated + 0 - (yes, the sum is deliberately inaccurate) TODO remove the limit, use dynarrays */ #define MAX_HA 64 @@ -429,7 +420,6 @@ enum legacy_db_type DB_TYPE_MYISAM=9, DB_TYPE_MRG_MYISAM=10, DB_TYPE_INNODB=12, - DB_TYPE_NDBCLUSTER=14, DB_TYPE_EXAMPLE_DB=15, DB_TYPE_ARCHIVE_DB=16, DB_TYPE_CSV_DB=17, @@ -726,7 +716,6 @@ enum enum_schema_tables SCH_ALL_PLUGINS, SCH_APPLICABLE_ROLES, SCH_CHARSETS, - SCH_CLIENT_STATS, SCH_COLLATIONS, SCH_COLLATION_CHARACTER_SET_APPLICABILITY, SCH_COLUMNS, @@ -738,7 +727,6 @@ enum enum_schema_tables SCH_FILES, SCH_GLOBAL_STATUS, SCH_GLOBAL_VARIABLES, - SCH_INDEX_STATS, SCH_KEY_CACHES, SCH_KEY_COLUMN_USAGE, SCH_OPEN_TABLES, @@ -754,17 +742,14 @@ enum enum_schema_tables SCH_SESSION_STATUS, SCH_SESSION_VARIABLES, SCH_STATISTICS, - SCH_STATUS, + SCH_SYSTEM_VARIABLES, SCH_TABLES, SCH_TABLESPACES, SCH_TABLE_CONSTRAINTS, SCH_TABLE_NAMES, SCH_TABLE_PRIVILEGES, - SCH_TABLE_STATS, SCH_TRIGGERS, SCH_USER_PRIVILEGES, - SCH_USER_STATS, - SCH_VARIABLES, SCH_VIEWS }; @@ -1230,6 +1215,11 @@ struct handlerton enum handler_create_iterator_result (*create_iterator)(handlerton *hton, enum handler_iterator_type type, struct handler_iterator *fill_this_in); + int (*abort_transaction)(handlerton *hton, THD *bf_thd, + THD *victim_thd, my_bool signal); + int (*set_checkpoint)(handlerton *hton, const XID* xid); + int (*get_checkpoint)(handlerton *hton, XID* xid); + void (*fake_trx_id)(handlerton *hton, THD *thd); /* Optional clauses in the CREATE/ALTER TABLE */ @@ -4080,6 +4070,12 @@ int ha_rollback_to_savepoint(THD *thd, SAVEPOINT *sv); bool ha_rollback_to_savepoint_can_release_mdl(THD *thd); int ha_savepoint(THD *thd, SAVEPOINT *sv); int ha_release_savepoint(THD *thd, SAVEPOINT *sv); +#ifdef WITH_WSREP +int ha_abort_transaction(THD *bf_thd, THD *victim_thd, my_bool signal); +void ha_fake_trx_id(THD *thd); +#else +inline void ha_fake_trx_id(THD *thd) { } +#endif /* these are called by storage engines */ void trans_register_ha(THD *thd, bool all, handlerton *ht); @@ -4092,25 +4088,6 @@ void trans_register_ha(THD *thd, bool all, handlerton *ht); #define trans_need_2pc(thd, all) ((total_ha_2pc > 1) && \ !((all ? &thd->transaction.all : &thd->transaction.stmt)->no_2pc)) -#ifdef HAVE_NDB_BINLOG -int ha_reset_logs(THD *thd); -int ha_binlog_index_purge_file(THD *thd, const char *file); -void ha_reset_slave(THD *thd); -void ha_binlog_log_query(THD *thd, handlerton *db_type, - enum_binlog_command binlog_command, - const char *query, uint query_length, - const char *db, const char *table_name); -void ha_binlog_wait(THD *thd); -int ha_binlog_end(THD *thd); -#else -#define ha_reset_logs(a) do {} while (0) -#define ha_binlog_index_purge_file(a,b) do {} while (0) -#define ha_reset_slave(a) do {} while (0) -#define ha_binlog_log_query(a,b,c,d,e,f,g) do {} while (0) -#define ha_binlog_wait(a) do {} while (0) -#define ha_binlog_end(a) do {} while (0) -#endif - const char *get_canonical_filename(handler *file, const char *path, char *tmp_path); bool mysql_xa_recover(THD *thd); diff --git a/sql/innodb_priv.h b/sql/innodb_priv.h index b9e471b3b13..ec85aa352f8 100644 --- a/sql/innodb_priv.h +++ b/sql/innodb_priv.h @@ -25,7 +25,6 @@ class THD; int get_quote_char_for_identifier(THD *thd, const char *name, uint length); bool schema_table_store_record(THD *thd, TABLE *table); void localtime_to_TIME(MYSQL_TIME *to, struct tm *from); -bool check_global_access(THD *thd, ulong want_access, bool no_errors=false); uint strconvert(CHARSET_INFO *from_cs, const char *from, uint from_length, CHARSET_INFO *to_cs, char *to, uint to_length, uint *errors); diff --git a/sql/item.cc b/sql/item.cc index 8b1ceac7420..058ddec8a2a 100644 --- a/sql/item.cc +++ b/sql/item.cc @@ -1579,9 +1579,8 @@ Item_splocal::Item_splocal(const LEX_STRING &sp_var_name, enum_field_types sp_var_type, uint pos_in_q, uint len_in_q) :Item_sp_variable(sp_var_name.str, sp_var_name.length), - m_var_idx(sp_var_idx), - limit_clause_param(FALSE), - pos_in_query(pos_in_q), len_in_query(len_in_q) + Rewritable_query_parameter(pos_in_q, len_in_q), + m_var_idx(sp_var_idx) { maybe_null= TRUE; @@ -3236,14 +3235,13 @@ default_set_param_func(Item_param *param, Item_param::Item_param(uint pos_in_query_arg) : - state(NO_VALUE), + Rewritable_query_parameter(pos_in_query_arg, 1), + state(NO_VALUE), inout(IN_PARAM), item_result_type(STRING_RESULT), /* Don't pretend to be a literal unless value for this item is set. */ item_type(PARAM_ITEM), param_type(MYSQL_TYPE_VARCHAR), - pos_in_query(pos_in_query_arg), set_param_func(default_set_param_func), - limit_clause_param(FALSE), m_out_param_info(NULL) { name= (char*) "?"; @@ -4108,6 +4106,13 @@ void Item_param::make_field(Send_field *field) field->type= m_out_param_info->type; } +bool Item_param::append_for_log(THD *thd, String *str) +{ + StringBuffer<STRING_BUFFER_USUAL_SIZE> buf; + const String *val= query_val_str(thd, &buf); + return str->append(*val); +} + /**************************************************************************** Item_copy ****************************************************************************/ diff --git a/sql/item.h b/sql/item.h index 29e727b8d5f..59c30737a10 100644 --- a/sql/item.h +++ b/sql/item.h @@ -528,6 +528,70 @@ public: { return NULL; } }; +/** + This is used for items in the query that needs to be rewritten + before binlogging + + At the moment this applies to Item_param and Item_splocal +*/ +class Rewritable_query_parameter +{ + public: + /* + Offset inside the query text. + Value of 0 means that this object doesn't have to be replaced + (for example SP variables in control statements) + */ + uint pos_in_query; + + /* + Byte length of parameter name in the statement. This is not + Item::name_length because name_length contains byte length of UTF8-encoded + name, but the query string is in the client charset. + */ + uint len_in_query; + + bool limit_clause_param; + + Rewritable_query_parameter(uint pos_in_q= 0, uint len_in_q= 0) + : pos_in_query(pos_in_q), len_in_query(len_in_q), + limit_clause_param(false) + { } + + virtual ~Rewritable_query_parameter() { } + + virtual bool append_for_log(THD *thd, String *str) = 0; +}; + +class Copy_query_with_rewrite +{ + THD *thd; + const char *src; + size_t src_len, from; + String *dst; + + bool copy_up_to(size_t bytes) + { + DBUG_ASSERT(bytes >= from); + return dst->append(src + from, bytes - from); + } + +public: + + Copy_query_with_rewrite(THD *t, const char *s, size_t l, String *d) + :thd(t), src(s), src_len(l), from(0), dst(d) { } + + bool append(Rewritable_query_parameter *p) + { + if (copy_up_to(p->pos_in_query) || p->append_for_log(thd, dst)) + return true; + from= p->pos_in_query + p->len_in_query; + return false; + } + + bool finalize() + { return copy_up_to(src_len); } +}; struct st_dyncall_create_def { @@ -570,6 +634,7 @@ class COND_EQUAL; class st_select_lex_unit; class Item_func_not; +class Item_splocal; class Item { Item(const Item &); /* Prevent use of these */ @@ -1431,7 +1496,9 @@ public: delete this; } - virtual bool is_splocal() { return 0; } /* Needed for error checking */ + virtual Item_splocal *get_item_splocal() { return 0; } + virtual Rewritable_query_parameter *get_rewritable_query_parameter() + { return 0; } /* Return Settable_routine_parameter interface of the Item. Return 0 @@ -1690,7 +1757,8 @@ inline bool Item_sp_variable::send(Protocol *protocol, String *str) *****************************************************************************/ class Item_splocal :public Item_sp_variable, - private Settable_routine_parameter + private Settable_routine_parameter, + public Rewritable_query_parameter { uint m_var_idx; @@ -1698,39 +1766,10 @@ class Item_splocal :public Item_sp_variable, Item_result m_result_type; enum_field_types m_field_type; public: - /* - If this variable is a parameter in LIMIT clause. - Used only during NAME_CONST substitution, to not append - NAME_CONST to the resulting query and thus not break - the slave. - */ - bool limit_clause_param; - /* - Position of this reference to SP variable in the statement (the - statement itself is in sp_instr_stmt::m_query). - This is valid only for references to SP variables in statements, - excluding DECLARE CURSOR statement. It is used to replace references to SP - variables with NAME_CONST calls when putting statements into the binary - log. - Value of 0 means that this object doesn't corresponding to reference to - SP variable in query text. - */ - uint pos_in_query; - /* - Byte length of SP variable name in the statement (see pos_in_query). - The value of this field may differ from the name_length value because - name_length contains byte length of UTF8-encoded item name, but - the query string (see sp_instr_stmt::m_query) is currently stored with - a charset from the SET NAMES statement. - */ - uint len_in_query; - Item_splocal(const LEX_STRING &sp_var_name, uint sp_var_idx, enum_field_types sp_var_type, uint pos_in_q= 0, uint len_in_q= 0); - bool is_splocal() { return 1; } /* Needed for error checking */ - Item *this_item(); const Item *this_item() const; Item **this_item_addr(THD *thd, Item **); @@ -1750,10 +1789,15 @@ private: bool set_value(THD *thd, sp_rcontext *ctx, Item **it); public: + Item_splocal *get_item_splocal() { return this; } + + Rewritable_query_parameter *get_rewritable_query_parameter() + { return this; } + Settable_routine_parameter *get_settable_routine_parameter() - { - return this; - } + { return this; } + + bool append_for_log(THD *thd, String *str); }; /***************************************************************************** @@ -2228,7 +2272,8 @@ public: /* Item represents one placeholder ('?') of prepared statement */ class Item_param :public Item, - private Settable_routine_parameter + private Settable_routine_parameter, + public Rewritable_query_parameter { char cnvbuf[MAX_FIELD_WIDTH]; String cnvstr; @@ -2241,6 +2286,7 @@ public: STRING_VALUE, TIME_VALUE, LONG_DATA_VALUE, DECIMAL_VALUE } state; + enum { IN_PARAM, OUT_PARAM } inout; /* A buffer for string and long data values. Historically all allocated @@ -2292,11 +2338,6 @@ public: supply for this placeholder in mysql_stmt_execute. */ enum enum_field_types param_type; - /* - Offset of placeholder inside statement text. Used to create - no-placeholders version of this statement for the binary log. - */ - uint pos_in_query; Item_param(uint pos_in_query_arg); @@ -2362,17 +2403,16 @@ public: Otherwise return FALSE. */ bool eq(const Item *item, bool binary_cmp) const; - /** Item is a argument to a limit clause. */ - bool limit_clause_param; void set_param_type_and_swap_value(Item_param *from); -private: - virtual inline Settable_routine_parameter * - get_settable_routine_parameter() - { - return this; - } + Rewritable_query_parameter *get_rewritable_query_parameter() + { return this; } + Settable_routine_parameter *get_settable_routine_parameter() + { return this; } + + bool append_for_log(THD *thd, String *str); +private: virtual bool set_value(THD *thd, sp_rcontext *ctx, Item **it); virtual void set_out_param_info(Send_field *info); diff --git a/sql/item_cmpfunc.cc b/sql/item_cmpfunc.cc index 289668f24ca..f897bac6aa8 100644 --- a/sql/item_cmpfunc.cc +++ b/sql/item_cmpfunc.cc @@ -5869,7 +5869,7 @@ void Item_equal::merge(Item_equal *item) Item *c= item->get_const(); if (c) item->equal_items.pop(); - equal_items.concat(&item->equal_items); + equal_items.append(&item->equal_items); if (c) { /* diff --git a/sql/item_cmpfunc.h b/sql/item_cmpfunc.h index bf28b00c908..cd53ee731f7 100644 --- a/sql/item_cmpfunc.h +++ b/sql/item_cmpfunc.h @@ -1652,12 +1652,12 @@ public: void add_at_head(List<Item> *nlist) { DBUG_ASSERT(nlist->elements); - list.prepand(nlist); + list.prepend(nlist); } void add_at_end(List<Item> *nlist) { DBUG_ASSERT(nlist->elements); - list.concat(nlist); + list.append(nlist); } bool fix_fields(THD *, Item **ref); void fix_after_pullout(st_select_lex *new_parent, Item **ref); diff --git a/sql/item_func.cc b/sql/item_func.cc index ccb7ec56021..2b89aa04295 100644 --- a/sql/item_func.cc +++ b/sql/item_func.cc @@ -2769,7 +2769,20 @@ void Item_func_rand::seed_random(Item *arg) TODO: do not do reinit 'rand' for every execute of PS/SP if args[0] is a constant. */ - uint32 tmp= (uint32) arg->val_int(); + uint32 tmp; +#ifdef WITH_WSREP + THD *thd= current_thd; + if (WSREP(thd)) + { + if (thd->wsrep_exec_mode==REPL_RECV) + tmp= thd->wsrep_rand; + else + tmp= thd->wsrep_rand= (uint32) arg->val_int(); + } + else +#endif /* WITH_WSREP */ + tmp= (uint32) arg->val_int(); + my_rnd_init(rand, (uint32) (tmp*0x10001L+55555555L), (uint32) (tmp*0x10000001L)); } diff --git a/sql/item_strfunc.cc b/sql/item_strfunc.cc index ec6ab0f3040..fa6ba706718 100644 --- a/sql/item_strfunc.cc +++ b/sql/item_strfunc.cc @@ -163,8 +163,7 @@ String *Item_func_md5::val_str_ascii(String *str) uchar digest[16]; null_value=0; - compute_md5_hash((char *) digest, (const char *) sptr->ptr(), - sptr->length()); + compute_md5_hash(digest, (const char *) sptr->ptr(), sptr->length()); if (str->alloc(32)) // Ensure that memory is free { null_value=1; diff --git a/sql/keycaches.cc b/sql/keycaches.cc index 120aa7e1029..9426ccc9e95 100644 --- a/sql/keycaches.cc +++ b/sql/keycaches.cc @@ -103,7 +103,7 @@ LEX_STRING default_key_cache_base= {C_STRING_WITH_LEN("default")}; KEY_CACHE zero_key_cache; ///< @@nonexistent_cache.param->value_ptr() points here -KEY_CACHE *get_key_cache(LEX_STRING *cache_name) +KEY_CACHE *get_key_cache(const LEX_STRING *cache_name) { if (!cache_name || ! cache_name->length) cache_name= &default_key_cache_base; diff --git a/sql/keycaches.h b/sql/keycaches.h index 32537339e2e..fff48d51c6f 100644 --- a/sql/keycaches.h +++ b/sql/keycaches.h @@ -40,7 +40,7 @@ extern KEY_CACHE zero_key_cache; extern NAMED_ILIST key_caches; KEY_CACHE *create_key_cache(const char *name, uint length); -KEY_CACHE *get_key_cache(LEX_STRING *cache_name); +KEY_CACHE *get_key_cache(const LEX_STRING *cache_name); KEY_CACHE *get_or_create_key_cache(const char *name, uint length); void free_key_cache(const char *name, KEY_CACHE *key_cache); bool process_key_caches(process_key_cache_t func, void *param); diff --git a/sql/lex.h b/sql/lex.h index d66160ffec0..3921f388857 100644 --- a/sql/lex.h +++ b/sql/lex.h @@ -75,6 +75,7 @@ static SYMBOL symbols[] = { { "ASCII", SYM(ASCII_SYM)}, { "ASENSITIVE", SYM(ASENSITIVE_SYM)}, { "AT", SYM(AT_SYM)}, + { "ATOMIC", SYM(ATOMIC_SYM)}, { "AUTHORS", SYM(AUTHORS_SYM)}, { "AUTO_INCREMENT", SYM(AUTO_INC)}, { "AUTOEXTEND_SIZE", SYM(AUTOEXTEND_SIZE_SYM)}, @@ -115,7 +116,6 @@ static SYMBOL symbols[] = { { "CIPHER", SYM(CIPHER_SYM)}, { "CLASS_ORIGIN", SYM(CLASS_ORIGIN_SYM)}, { "CLIENT", SYM(CLIENT_SYM)}, - { "CLIENT_STATISTICS", SYM(CLIENT_STATS_SYM)}, { "CLOSE", SYM(CLOSE_SYM)}, { "COALESCE", SYM(COALESCE)}, { "CODE", SYM(CODE_SYM)}, @@ -272,14 +272,13 @@ static SYMBOL symbols[] = { { "HOUR_SECOND", SYM(HOUR_SECOND_SYM)}, { "ID", SYM(ID_SYM)}, { "IDENTIFIED", SYM(IDENTIFIED_SYM)}, - { "IF", SYM(IF)}, + { "IF", SYM(IF_SYM)}, { "IGNORE", SYM(IGNORE_SYM)}, { "IGNORE_SERVER_IDS", SYM(IGNORE_SERVER_IDS_SYM)}, { "IMPORT", SYM(IMPORT)}, { "IN", SYM(IN_SYM)}, { "INDEX", SYM(INDEX_SYM)}, { "INDEXES", SYM(INDEXES)}, - { "INDEX_STATISTICS", SYM(INDEX_STATS_SYM)}, { "INFILE", SYM(INFILE)}, { "INITIAL_SIZE", SYM(INITIAL_SIZE_SYM)}, { "INNER", SYM(INNER_SYM)}, @@ -364,6 +363,7 @@ static SYMBOL symbols[] = { { "MAX_QUERIES_PER_HOUR", SYM(MAX_QUERIES_PER_HOUR)}, { "MAX_ROWS", SYM(MAX_ROWS)}, { "MAX_SIZE", SYM(MAX_SIZE_SYM)}, + { "MAX_STATEMENT_TIME", SYM(MAX_STATEMENT_TIME_SYM)}, { "MAX_UPDATES_PER_HOUR", SYM(MAX_UPDATES_PER_HOUR)}, { "MAX_USER_CONNECTIONS", SYM(MAX_USER_CONNECTIONS_SYM)}, { "MAXVALUE", SYM(MAX_VALUE_SYM)}, @@ -395,8 +395,6 @@ static SYMBOL symbols[] = { { "NAMES", SYM(NAMES_SYM)}, { "NATIONAL", SYM(NATIONAL_SYM)}, { "NATURAL", SYM(NATURAL)}, - { "NDB", SYM(NDBCLUSTER_SYM)}, - { "NDBCLUSTER", SYM(NDBCLUSTER_SYM)}, { "NCHAR", SYM(NCHAR_SYM)}, { "NEW", SYM(NEW_SYM)}, { "NEXT", SYM(NEXT_SYM)}, @@ -590,7 +588,6 @@ static SYMBOL symbols[] = { { "TABLE_NAME", SYM(TABLE_NAME_SYM)}, { "TABLES", SYM(TABLES)}, { "TABLESPACE", SYM(TABLESPACE)}, - { "TABLE_STATISTICS", SYM(TABLE_STATS_SYM)}, { "TABLE_CHECKSUM", SYM(TABLE_CHECKSUM_SYM)}, { "TEMPORARY", SYM(TEMPORARY)}, { "TEMPTABLE", SYM(TEMPTABLE_SYM)}, @@ -634,7 +631,6 @@ static SYMBOL symbols[] = { { "USE", SYM(USE_SYM)}, { "USER", SYM(USER)}, { "USER_RESOURCES", SYM(RESOURCES)}, - { "USER_STATISTICS", SYM(USER_STATS_SYM)}, { "USE_FRM", SYM(USE_FRM)}, { "USING", SYM(USING)}, { "UTC_DATE", SYM(UTC_DATE_SYM)}, diff --git a/sql/lock.cc b/sql/lock.cc index 54c7720e750..a74a12c41c3 100644 --- a/sql/lock.cc +++ b/sql/lock.cc @@ -83,6 +83,7 @@ #include "sql_acl.h" // SUPER_ACL #include <hash.h> #include <assert.h> +#include "wsrep_mysqld.h" /** @defgroup Locking Locking @@ -314,6 +315,7 @@ bool mysql_lock_tables(THD *thd, MYSQL_LOCK *sql_lock, uint flags) /* Copy the lock data array. thr_multi_lock() reorders its contents. */ memmove(sql_lock->locks + sql_lock->lock_count, sql_lock->locks, sql_lock->lock_count * sizeof(*sql_lock->locks)); + /* Lock on the copied half of the lock data array. */ rc= thr_lock_errno_to_mysql[(int) thr_multi_lock(sql_lock->locks + sql_lock->lock_count, @@ -329,7 +331,10 @@ end: { thd->send_kill_message(); if (!rc) + { mysql_unlock_tables(thd, sql_lock, 0); + THD_STAGE_INFO(thd, stage_after_table_lock); + } rc= 1; } else if (rc > 1) @@ -380,6 +385,8 @@ static int lock_external(THD *thd, TABLE **tables, uint count) void mysql_unlock_tables(THD *thd, MYSQL_LOCK *sql_lock, bool free_lock) { DBUG_ENTER("mysql_unlock_tables"); + THD_STAGE_INFO(thd, stage_unlocking_tables); + if (sql_lock->table_count) unlock_external(thd, sql_lock->table, sql_lock->table_count); if (sql_lock->lock_count) @@ -1052,6 +1059,13 @@ void Global_read_lock::unlock_global_read_lock(THD *thd) { thd->mdl_context.release_lock(m_mdl_blocks_commits_lock); m_mdl_blocks_commits_lock= NULL; +#ifdef WITH_WSREP + if (WSREP_ON) + { + wsrep_locked_seqno= WSREP_SEQNO_UNDEFINED; + wsrep->resume(wsrep); + } +#endif /* WITH_WSREP */ } thd->mdl_context.release_lock(m_mdl_global_shared_lock); m_mdl_global_shared_lock= NULL; @@ -1084,9 +1098,22 @@ bool Global_read_lock::make_global_read_lock_block_commit(THD *thd) If we didn't succeed lock_global_read_lock(), or if we already suceeded make_global_read_lock_block_commit(), do nothing. */ + if (m_state != GRL_ACQUIRED) DBUG_RETURN(0); +#ifdef WITH_WSREP + if (WSREP_ON && m_mdl_blocks_commits_lock) + { + WSREP_DEBUG("GRL was in block commit mode when entering " + "make_global_read_lock_block_commit"); + thd->mdl_context.release_lock(m_mdl_blocks_commits_lock); + m_mdl_blocks_commits_lock= NULL; + wsrep_locked_seqno= WSREP_SEQNO_UNDEFINED; + wsrep->resume(wsrep); + } +#endif /* WITH_WSREP */ + mdl_request.init(MDL_key::COMMIT, "", "", MDL_SHARED, MDL_EXPLICIT); if (thd->mdl_context.acquire_lock(&mdl_request, @@ -1096,6 +1123,25 @@ bool Global_read_lock::make_global_read_lock_block_commit(THD *thd) m_mdl_blocks_commits_lock= mdl_request.ticket; m_state= GRL_ACQUIRED_AND_BLOCKS_COMMIT; +#ifdef WITH_WSREP + if (WSREP_ON) + { + long long ret = wsrep->pause(wsrep); + if (ret >= 0) + { + wsrep_locked_seqno= ret; + } + else if (ret != -ENOSYS) /* -ENOSYS - no provider */ + { + WSREP_ERROR("Failed to pause provider: %lld (%s)", -ret, strerror(-ret)); + + DBUG_ASSERT(m_mdl_blocks_commits_lock == NULL); + wsrep_locked_seqno= WSREP_SEQNO_UNDEFINED; + my_error(ER_LOCK_DEADLOCK, MYF(0)); + DBUG_RETURN(TRUE); + } + } +#endif /* WITH_WSREP */ DBUG_RETURN(FALSE); } diff --git a/sql/log.cc b/sql/log.cc index 75a895e25f8..dcdf2bcc74d 100644 --- a/sql/log.cc +++ b/sql/log.cc @@ -55,6 +55,7 @@ #include "debug_sync.h" #include "sql_show.h" #include "my_pthread.h" +#include "wsrep_mysqld.h" /* max size of the log message */ #define MAX_LOG_BUFFER_SIZE 1024 @@ -63,6 +64,7 @@ #define FLAGSTR(V,F) ((V)&(F)?#F" ":"") +handlerton *binlog_hton; LOGGER logger; MYSQL_BIN_LOG mysql_bin_log(&sync_binlog_period); @@ -511,13 +513,11 @@ private: binlog_cache_mngr(const binlog_cache_mngr& info); }; -handlerton *binlog_hton; - bool LOGGER::is_log_table_enabled(uint log_table_type) { switch (log_table_type) { case QUERY_LOG_SLOW: - return (table_log_handler != NULL) && opt_slow_log; + return (table_log_handler != NULL) && global_system_variables.sql_log_slow; case QUERY_LOG_GENERAL: return (table_log_handler != NULL) && opt_log ; default: @@ -526,7 +526,6 @@ bool LOGGER::is_log_table_enabled(uint log_table_type) } } - /** Check if a given table is opened log table @@ -1048,7 +1047,7 @@ bool Log_to_file_event_handler::init() { if (!is_initialized) { - if (opt_slow_log) + if (global_system_variables.sql_log_slow) mysql_slow_log.open_slow_log(opt_slow_logname); if (opt_log) @@ -1072,7 +1071,7 @@ void Log_to_file_event_handler::flush() /* reopen log files */ if (opt_log) mysql_log.reopen_file(); - if (opt_slow_log) + if (global_system_variables.sql_log_slow) mysql_slow_log.reopen_file(); } @@ -1200,7 +1199,7 @@ bool LOGGER::flush_slow_log() logger.lock_exclusive(); /* Reopen slow log file */ - if (opt_slow_log) + if (global_system_variables.sql_log_slow) file_log_handler->get_mysql_slow_log()->reopen_file(); /* End of log flush */ @@ -1270,11 +1269,11 @@ bool LOGGER::slow_log_print(THD *thd, const char *query, uint query_length, if (*slow_log_handler_list) { /* do not log slow queries from replication threads */ - if (thd->slave_thread && !opt_log_slow_slave_statements) + if (!thd->variables.sql_log_slow) return 0; lock_shared(); - if (!opt_slow_log) + if (!global_system_variables.sql_log_slow) { unlock(); return 0; @@ -1448,7 +1447,7 @@ bool LOGGER::activate_log_handler(THD* thd, uint log_type) lock_exclusive(); switch (log_type) { case QUERY_LOG_SLOW: - if (!opt_slow_log) + if (!global_system_variables.sql_log_slow) { file_log= file_log_handler->get_mysql_slow_log(); @@ -1462,7 +1461,7 @@ bool LOGGER::activate_log_handler(THD* thd, uint log_type) else { init_slow_log(log_output_options); - opt_slow_log= TRUE; + global_system_variables.sql_log_slow= TRUE; } } break; @@ -1501,7 +1500,7 @@ void LOGGER::deactivate_log_handler(THD *thd, uint log_type) switch (log_type) { case QUERY_LOG_SLOW: - tmp_opt= &opt_slow_log; + tmp_opt= &global_system_variables.sql_log_slow; file_log= file_log_handler->get_mysql_slow_log(); break; case QUERY_LOG_GENERAL: @@ -1577,7 +1576,7 @@ binlog_trans_log_savepos(THD *thd, my_off_t *pos) DBUG_ENTER("binlog_trans_log_savepos"); DBUG_ASSERT(pos != NULL); binlog_cache_mngr *const cache_mngr= thd->binlog_setup_trx_data(); - DBUG_ASSERT(mysql_bin_log.is_open()); + DBUG_ASSERT((WSREP(thd) && wsrep_emulate_bin_log) || mysql_bin_log.is_open()); *pos= cache_mngr->trx_cache.get_byte_position(); DBUG_PRINT("return", ("*pos: %lu", (ulong) *pos)); DBUG_VOID_RETURN; @@ -1625,7 +1624,8 @@ binlog_trans_log_truncate(THD *thd, my_off_t pos) int binlog_init(void *p) { binlog_hton= (handlerton *)p; - binlog_hton->state=opt_bin_log ? SHOW_OPTION_YES : SHOW_OPTION_NO; + binlog_hton->state= (WSREP_ON || opt_bin_log) ? SHOW_OPTION_YES + : SHOW_OPTION_NO; binlog_hton->db_type=DB_TYPE_BINLOG; binlog_hton->savepoint_offset= sizeof(my_off_t); binlog_hton->close_connection= binlog_close_connection; @@ -1745,6 +1745,16 @@ binlog_commit_flush_stmt_cache(THD *thd, bool all, binlog_cache_mngr *cache_mngr) { DBUG_ENTER("binlog_commit_flush_stmt_cache"); +#ifdef WITH_WSREP + if (thd->wsrep_mysql_replicated > 0) + { + DBUG_ASSERT(WSREP_ON); + WSREP_DEBUG("avoiding binlog_commit_flush_trx_cache: %d", + thd->wsrep_mysql_replicated); + return 0; + } +#endif + Query_log_event end_evt(thd, STRING_WITH_LEN("COMMIT"), FALSE, TRUE, TRUE, 0); DBUG_RETURN(binlog_flush_cache(thd, cache_mngr, &end_evt, all, TRUE, FALSE)); @@ -1900,12 +1910,12 @@ static bool trans_cannot_safely_rollback(THD *thd, bool all) return ((thd->variables.option_bits & OPTION_KEEP_LOG) || (trans_has_updated_non_trans_table(thd) && - thd->variables.binlog_format == BINLOG_FORMAT_STMT) || + thd->wsrep_binlog_format() == BINLOG_FORMAT_STMT) || (cache_mngr->trx_cache.changes_to_non_trans_temp_table() && - thd->variables.binlog_format == BINLOG_FORMAT_MIXED) || + thd->wsrep_binlog_format() == BINLOG_FORMAT_MIXED) || (trans_has_updated_non_trans_table(thd) && ending_single_stmt_trans(thd,all) && - thd->variables.binlog_format == BINLOG_FORMAT_MIXED)); + thd->wsrep_binlog_format() == BINLOG_FORMAT_MIXED)); } @@ -1928,6 +1938,12 @@ static int binlog_commit(handlerton *hton, THD *thd, bool all) binlog_cache_mngr *const cache_mngr= (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton); + if (!cache_mngr) + { + DBUG_ASSERT(WSREP(thd)); + DBUG_RETURN(0); + } + DBUG_PRINT("debug", ("all: %d, in_transaction: %s, all.modified_non_trans_table: %s, stmt.modified_non_trans_table: %s", all, @@ -1984,6 +2000,12 @@ static int binlog_rollback(handlerton *hton, THD *thd, bool all) binlog_cache_mngr *const cache_mngr= (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton); + if (!cache_mngr) + { + DBUG_ASSERT(WSREP(thd)); + DBUG_RETURN(0); + } + DBUG_PRINT("debug", ("all: %s, all.modified_non_trans_table: %s, stmt.modified_non_trans_table: %s", YESNO(all), YESNO(thd->transaction.all.modified_non_trans_table), @@ -2011,8 +2033,7 @@ static int binlog_rollback(handlerton *hton, THD *thd, bool all) cache_mngr->reset(false, true); DBUG_RETURN(error); } - - if (mysql_bin_log.check_write_error(thd)) + if (!wsrep_emulate_bin_log && mysql_bin_log.check_write_error(thd)) { /* "all == true" means that a "rollback statement" triggered the error and @@ -2043,9 +2064,9 @@ static int binlog_rollback(handlerton *hton, THD *thd, bool all) else if (ending_trans(thd, all) || (!(thd->variables.option_bits & OPTION_KEEP_LOG) && (!stmt_has_updated_non_trans_table(thd) || - thd->variables.binlog_format != BINLOG_FORMAT_STMT) && + thd->wsrep_binlog_format() != BINLOG_FORMAT_STMT) && (!cache_mngr->trx_cache.changes_to_non_trans_temp_table() || - thd->variables.binlog_format != BINLOG_FORMAT_MIXED))) + thd->wsrep_binlog_format() != BINLOG_FORMAT_MIXED))) error= binlog_truncate_trx_cache(thd, cache_mngr, all); } @@ -2150,8 +2171,11 @@ bool MYSQL_BIN_LOG::check_write_error(THD *thd) static int binlog_savepoint_set(handlerton *hton, THD *thd, void *sv) { - DBUG_ENTER("binlog_savepoint_set"); int error= 1; + DBUG_ENTER("binlog_savepoint_set"); + + if (wsrep_emulate_bin_log) + DBUG_RETURN(0); char buf[1024]; String log_query(buf, sizeof(buf), &my_charset_bin); @@ -2190,7 +2214,8 @@ static int binlog_savepoint_rollback(handlerton *hton, THD *thd, void *sv) non-transactional table. Otherwise, truncate the binlog cache starting from the SAVEPOINT command. */ - if (unlikely(trans_has_updated_non_trans_table(thd) || + if (!wsrep_emulate_bin_log && + unlikely(trans_has_updated_non_trans_table(thd) || (thd->variables.option_bits & OPTION_KEEP_LOG))) { char buf[1024]; @@ -2204,7 +2229,9 @@ static int binlog_savepoint_rollback(handlerton *hton, THD *thd, void *sv) TRUE, FALSE, TRUE, errcode); DBUG_RETURN(mysql_bin_log.write(&qinfo)); } + binlog_trans_log_truncate(thd, *(my_off_t*)sv); + DBUG_RETURN(0); } @@ -3862,8 +3889,6 @@ bool MYSQL_BIN_LOG::reset_logs(THD* thd, bool create_new_log, mysql_mutex_unlock(&LOCK_xid_list); } - if (thd) - ha_reset_logs(thd); /* We need to get both locks to be sure that no one is trying to write to the index log file. @@ -4501,13 +4526,6 @@ int MYSQL_BIN_LOG::purge_index_entry(THD *thd, ulonglong *decrease_log_space, } error= 0; - if (!need_mutex) - { - /* - This is to avoid triggering an error in NDB. - */ - ha_binlog_index_purge_file(current_thd, log_info.log_file_name); - } DBUG_PRINT("info",("purging %s",log_info.log_file_name)); if (!my_delete(log_info.log_file_name, MYF(0))) @@ -5332,7 +5350,8 @@ int THD::binlog_write_table_map(TABLE *table, bool is_transactional, is_transactional= 1; /* Pre-conditions */ - DBUG_ASSERT(is_current_stmt_binlog_format_row() && mysql_bin_log.is_open()); + DBUG_ASSERT(is_current_stmt_binlog_format_row()); + DBUG_ASSERT(WSREP_EMULATE_BINLOG(this) || mysql_bin_log.is_open()); DBUG_ASSERT(table->s->table_map_id != ULONG_MAX); Table_map_log_event @@ -5465,7 +5484,7 @@ MYSQL_BIN_LOG::flush_and_set_pending_rows_event(THD *thd, bool is_transactional) { DBUG_ENTER("MYSQL_BIN_LOG::flush_and_set_pending_rows_event(event)"); - DBUG_ASSERT(mysql_bin_log.is_open()); + DBUG_ASSERT(WSREP_EMULATE_BINLOG(thd) || mysql_bin_log.is_open()); DBUG_PRINT("enter", ("event: 0x%lx", (long) event)); int error= 0; @@ -5791,7 +5810,9 @@ bool MYSQL_BIN_LOG::write(Log_event *event_info, my_bool *with_annotate) mostly called if is_open() *was* true a few instructions before, but it could have changed since. */ - if (likely(is_open())) + /* applier and replayer can skip writing binlog events */ + if ((WSREP_EMULATE_BINLOG(thd) && + IF_WSREP(thd->wsrep_exec_mode != REPL_RECV, 0)) || is_open()) { my_off_t UNINIT_VAR(my_org_b_tell); #ifdef HAVE_REPLICATION @@ -6130,6 +6151,15 @@ int MYSQL_BIN_LOG::rotate(bool force_rotate, bool* check_purge) int error= 0; DBUG_ENTER("MYSQL_BIN_LOG::rotate"); + if (wsrep_to_isolation) + { + DBUG_ASSERT(WSREP_ON); + *check_purge= false; + WSREP_DEBUG("avoiding binlog rotate due to TO isolation: %d", + wsrep_to_isolation); + DBUG_RETURN(0); + } + //todo: fix the macro def and restore safe_mutex_assert_owner(&LOCK_log); *check_purge= false; @@ -6675,6 +6705,9 @@ MYSQL_BIN_LOG::write_transaction_to_binlog(THD *thd, Ha_trx_info *ha_info; DBUG_ENTER("MYSQL_BIN_LOG::write_transaction_to_binlog"); + if (wsrep_emulate_bin_log) + DBUG_RETURN(0); + entry.thd= thd; entry.cache_mngr= cache_mngr; entry.error= 0; @@ -6683,6 +6716,7 @@ MYSQL_BIN_LOG::write_transaction_to_binlog(THD *thd, entry.using_trx_cache= using_trx_cache; entry.need_unlog= false; ha_info= all ? thd->transaction.all.ha_list : thd->transaction.stmt.ha_list; + for (; ha_info; ha_info= ha_info->next()) { if (ha_info->is_started() && ha_info->ht() != binlog_hton && @@ -8819,7 +8853,10 @@ TC_LOG_BINLOG::log_and_order(THD *thd, my_xid xid, bool all, binlog_cache_mngr *cache_mngr= thd->binlog_setup_trx_data(); if (!cache_mngr) + { + WSREP_DEBUG("Skipping empty log_xid: %s", thd->query()); DBUG_RETURN(0); + } cache_mngr->using_xa= TRUE; cache_mngr->xa_xid= xid; @@ -9649,3 +9686,50 @@ maria_declare_plugin(binlog) MariaDB_PLUGIN_MATURITY_STABLE /* maturity */ } maria_declare_plugin_end; + +#ifdef WITH_WSREP +IO_CACHE * get_trans_log(THD * thd) +{ + binlog_cache_mngr *cache_mngr = (binlog_cache_mngr*) + thd_get_ha_data(thd, binlog_hton); + if (cache_mngr) + return cache_mngr->get_binlog_cache_log(true); + + WSREP_DEBUG("binlog cache not initialized, conn :%ld", thd->thread_id); + return NULL; +} + + +bool wsrep_trans_cache_is_empty(THD *thd) +{ + binlog_cache_mngr *const cache_mngr= + (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton); + return (!cache_mngr || cache_mngr->trx_cache.empty()); +} + + +void thd_binlog_trx_reset(THD * thd) +{ + /* + todo: fix autocommit select to not call the caller + */ + if (thd_get_ha_data(thd, binlog_hton) != NULL) + { + binlog_cache_mngr *const cache_mngr= + (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton); + if (cache_mngr) + cache_mngr->reset(false, true); + } + thd->clear_binlog_table_maps(); +} + + +void thd_binlog_rollback_stmt(THD * thd) +{ + WSREP_DEBUG("thd_binlog_rollback_stmt :%ld", thd->thread_id); + binlog_cache_mngr *const cache_mngr= + (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton); + if (cache_mngr) + cache_mngr->trx_cache.set_prev_position(MY_OFF_T_UNDEF); +} +#endif /* WITH_WSREP */ diff --git a/sql/log.h b/sql/log.h index 67fcf068ec4..ac1e9f4a70f 100644 --- a/sql/log.h +++ b/sql/log.h @@ -19,6 +19,8 @@ #include "unireg.h" // REQUIRED: for other includes #include "handler.h" /* my_xid */ +#include "wsrep.h" +#include "wsrep_mysqld.h" class Relay_log_info; @@ -106,7 +108,7 @@ public: int log_and_order(THD *thd, my_xid xid, bool all, bool need_prepare_ordered, bool need_commit_ordered) { - DBUG_ASSERT(0 /* Internal error - TC_LOG_DUMMY::log_and_order() called */); + DBUG_ASSERT(0); return 1; } int unlog(ulong cookie, my_xid xid) { return 0; } @@ -1077,6 +1079,13 @@ end: DBUG_RETURN(error); } - +static inline TC_LOG *get_tc_log_implementation() +{ + if (total_ha_2pc <= 1) + return &tc_log_dummy; + if (opt_bin_log) + return &mysql_bin_log; + return &tc_log_mmap; +} #endif /* LOG_H */ diff --git a/sql/log_event.cc b/sql/log_event.cc index 6a85893803e..600a98916a9 100644 --- a/sql/log_event.cc +++ b/sql/log_event.cc @@ -44,7 +44,7 @@ #include <mysql/psi/mysql_statement.h> #include <strfunc.h> #include "compat56.h" - +#include "wsrep_mysqld.h" #endif /* MYSQL_CLIENT */ #include <base64.h> @@ -409,20 +409,6 @@ inline int idempotent_error_code(int err_code) inline int ignored_error_code(int err_code) { -#ifdef HAVE_NDB_BINLOG - /* - The following error codes are hard-coded and will always be ignored. - */ - switch (err_code) - { - case ER_DB_CREATE_EXISTS: - case ER_DB_DROP_EXISTS: - return 1; - default: - /* Nothing to do */ - break; - } -#endif return ((err_code == ER_SLAVE_IGNORED_TABLE) || (use_slave_mask && bitmap_is_set(&slave_error_mask, err_code))); } @@ -473,6 +459,7 @@ inline bool unexpected_error_code(int unexpected_error) case ER_NET_READ_ERROR: case ER_NET_ERROR_ON_WRITE: case ER_QUERY_INTERRUPTED: + case ER_STATEMENT_TIMEOUT: case ER_CONNECTION_KILLED: case ER_SERVER_SHUTDOWN: case ER_NEW_ABORTING_CONNECTION: @@ -3092,6 +3079,15 @@ Query_log_event::Query_log_event(THD* thd_arg, const char* query_arg, { time_t end_time; +#ifdef WITH_WSREP + /* + If Query_log_event will contain non trans keyword (not BEGIN, COMMIT, + SAVEPOINT or ROLLBACK) we disable PA for this transaction. + */ + if (WSREP_ON && !is_trans_keyword()) + thd->wsrep_PA_safe= false; +#endif /* WITH_WSREP */ + memset(&user, 0, sizeof(user)); memset(&host, 0, sizeof(host)); @@ -4075,36 +4071,8 @@ int Query_log_event::do_apply_event(rpl_group_info *rgi, clear_all_errors(thd, const_cast<Relay_log_info*>(rli)); current_stmt_is_commit= is_commit(); - if (current_stmt_is_commit && rgi->tables_to_lock) - { - /* - Cleaning-up the last statement context: - the terminal event of the current statement flagged with - STMT_END_F got filtered out in ndb circular replication. - */ - int error; - char llbuff[22]; - if ((error= rows_event_stmt_cleanup(rgi, thd))) - { - const_cast<Relay_log_info*>(rli)->report(ERROR_LEVEL, error, - "Error in cleaning up after an event preceding the commit; " - "the group log file/position: %s %s", - const_cast<Relay_log_info*>(rli)->group_master_log_name, - llstr(const_cast<Relay_log_info*>(rli)->group_master_log_pos, - llbuff)); - } - /* - Executing a part of rli->stmt_done() logics that does not deal - with group position change. The part is redundant now but is - future-change-proof addon, e.g if COMMIT handling will start checking - invariants like IN_STMT flag must be off at committing the transaction. - */ - rgi->inc_event_relay_log_pos(); - } - else - { - rgi->slave_close_thread_tables(thd); - } + DBUG_ASSERT(!current_stmt_is_commit || !rgi->tables_to_lock); + rgi->slave_close_thread_tables(thd); /* Note: We do not need to execute reset_one_shot_variables() if this @@ -4280,6 +4248,7 @@ int Query_log_event::do_apply_event(rpl_group_info *rgi, THD_STAGE_INFO(thd, stage_init); MYSQL_SET_STATEMENT_TEXT(thd->m_statement_psi, thd->query(), thd->query_length()); + thd->enable_slow_log= thd->variables.sql_log_slow; mysql_parse(thd, thd->query(), thd->query_length(), &parser_state); /* Finalize server status flags after executing a statement. */ thd->update_server_status(); @@ -4287,18 +4256,6 @@ int Query_log_event::do_apply_event(rpl_group_info *rgi, } thd->variables.option_bits&= ~OPTION_MASTER_SQL_ERROR; - - /* - Resetting the enable_slow_log thd variable. - - We need to reset it back to the opt_log_slow_slave_statements - value after the statement execution (and slow logging - is done). It might have changed if the statement was an - admin statement (in which case, down in mysql_parse execution - thd->enable_slow_log is set to the value of - opt_log_slow_admin_statements). - */ - thd->enable_slow_log= opt_log_slow_slave_statements; } else { @@ -4519,6 +4476,22 @@ Query_log_event::do_shall_skip(rpl_group_info *rgi) DBUG_RETURN(Log_event::EVENT_SKIP_COUNT); } } +#ifdef WITH_WSREP + else if (WSREP_ON && wsrep_mysql_replication_bundle && opt_slave_domain_parallel_threads == 0 && + thd->wsrep_mysql_replicated > 0 && + (is_begin() || is_commit())) + { + if (++thd->wsrep_mysql_replicated < (int)wsrep_mysql_replication_bundle) + { + WSREP_DEBUG("skipping wsrep commit %d", thd->wsrep_mysql_replicated); + DBUG_RETURN(Log_event::EVENT_SKIP_IGNORE); + } + else + { + thd->wsrep_mysql_replicated = 0; + } + } +#endif DBUG_RETURN(Log_event::do_shall_skip(rgi)); } @@ -7348,6 +7321,21 @@ Xid_log_event::do_shall_skip(rpl_group_info *rgi) thd->variables.option_bits&= ~(OPTION_BEGIN | OPTION_GTID_BEGIN); DBUG_RETURN(Log_event::EVENT_SKIP_COUNT); } +#ifdef WITH_WSREP + else if (wsrep_mysql_replication_bundle && WSREP_ON && + opt_slave_domain_parallel_threads == 0) + { + if (++thd->wsrep_mysql_replicated < (int)wsrep_mysql_replication_bundle) + { + WSREP_DEBUG("skipping wsrep commit %d", thd->wsrep_mysql_replicated); + DBUG_RETURN(Log_event::EVENT_SKIP_IGNORE); + } + else + { + thd->wsrep_mysql_replicated = 0; + } + } +#endif DBUG_RETURN(Log_event::do_shall_skip(rgi)); } #endif /* !MYSQL_CLIENT */ @@ -9625,6 +9613,18 @@ int Rows_log_event::do_apply_event(rpl_group_info *rgi) if (open_and_lock_tables(thd, rgi->tables_to_lock, FALSE, 0)) { uint actual_error= thd->get_stmt_da()->sql_errno(); +#ifdef WITH_WSREP + if (WSREP(thd)) + { + WSREP_WARN("BF applier failed to open_and_lock_tables: %u, fatal: %d " + "wsrep = (exec_mode: %d conflict_state: %d seqno: %lld)", + thd->get_stmt_da()->sql_errno(), + thd->is_fatal_error, + thd->wsrep_exec_mode, + thd->wsrep_conflict_state, + (long long)wsrep_thd_trx_seqno(thd)); + } +#endif if (thd->is_slave_error || thd->is_fatal_error) { /* @@ -10771,8 +10771,8 @@ check_table_map(rpl_group_info *rgi, RPL_TABLE_LIST *table_list) DBUG_ENTER("check_table_map"); enum_tbl_map_status res= OK_TO_PROCESS; Relay_log_info *rli= rgi->rli; - - if (rgi->thd->slave_thread /* filtering is for slave only */ && + if ((rgi->thd->slave_thread /* filtering is for slave only */ || + IF_WSREP((WSREP(rgi->thd) && rgi->thd->wsrep_applier), 0)) && (!rli->mi->rpl_filter->db_ok(table_list->db) || (rli->mi->rpl_filter->is_on() && !rli->mi->rpl_filter->tables_ok("", table_list)))) res= FILTERED_OUT; @@ -11066,8 +11066,7 @@ Write_rows_log_event::do_before_row_operations(const Slave_reporting_capability todo: to introduce a property for the event (handler?) which forces applying the event in the replace (idempotent) fashion. */ - if ((slave_exec_mode == SLAVE_EXEC_MODE_IDEMPOTENT) || - (m_table->s->db_type()->db_type == DB_TYPE_NDBCLUSTER)) + if (slave_exec_mode == SLAVE_EXEC_MODE_IDEMPOTENT) { /* We are using REPLACE semantics and not INSERT IGNORE semantics @@ -11080,8 +11079,7 @@ Write_rows_log_event::do_before_row_operations(const Slave_reporting_capability /* Pretend we're executing a REPLACE command: this is needed for - InnoDB and NDB Cluster since they are not (properly) checking the - lex->duplicates flag. + InnoDB since it is not (properly) checking the lex->duplicates flag. */ thd->lex->sql_command= SQLCOM_REPLACE; /* @@ -11089,23 +11087,10 @@ Write_rows_log_event::do_before_row_operations(const Slave_reporting_capability */ m_table->file->extra(HA_EXTRA_IGNORE_DUP_KEY); /* - NDB specific: update from ndb master wrapped as Write_rows - so that the event should be applied to replace slave's row - - Also following is needed in case if we have AFTER DELETE triggers. + The following is needed in case if we have AFTER DELETE triggers. */ m_table->file->extra(HA_EXTRA_WRITE_CAN_REPLACE); - /* - NDB specific: if update from ndb master wrapped as Write_rows - does not find the row it's assumed idempotent binlog applying - is taking place; don't raise the error. - */ m_table->file->extra(HA_EXTRA_IGNORE_NO_KEY); - /* - TODO: the cluster team (Tomas?) says that it's better if the engine knows - how many rows are going to be inserted, then it can allocate needed memory - from the start. - */ } if (slave_run_triggers_for_rbr && !master_had_triggers && m_table->triggers ) m_table->prepare_triggers_for_insert_stmt_or_event(); @@ -11164,8 +11149,7 @@ Write_rows_log_event::do_after_row_operations(const Slave_reporting_capability * } m_table->next_number_field=0; m_table->auto_increment_field_not_null= FALSE; - if ((slave_exec_mode == SLAVE_EXEC_MODE_IDEMPOTENT) || - m_table->s->db_type()->db_type == DB_TYPE_NDBCLUSTER) + if (slave_exec_mode == SLAVE_EXEC_MODE_IDEMPOTENT) { m_table->file->extra(HA_EXTRA_NO_IGNORE_DUP_KEY); m_table->file->extra(HA_EXTRA_WRITE_CANNOT_REPLACE); @@ -11291,8 +11275,7 @@ Rows_log_event::write_row(rpl_group_info *rgi, slave_run_triggers_for_rbr && !master_had_triggers && table->triggers; auto_afree_ptr<char> key(NULL); - prepare_record(table, m_width, - table->file->ht->db_type != DB_TYPE_NDBCLUSTER); + prepare_record(table, m_width, true); /* unpack row into table->record[0] */ if ((error= unpack_current_row(rgi))) @@ -11520,7 +11503,19 @@ int Write_rows_log_event::do_exec_row(rpl_group_info *rgi) { DBUG_ASSERT(m_table != NULL); + const char *tmp= thd->get_proc_info(); + const char *message= "Write_rows_log_event::write_row()"; + +#ifdef WSREP_PROC_INFO + my_snprintf(thd->wsrep_info, sizeof(thd->wsrep_info) - 1, + "Write_rows_log_event::write_row(%lld)", + (long long) wsrep_thd_trx_seqno(thd)); + message= thd->wsrep_info; +#endif /* WSREP_PROC_INFO */ + + thd_proc_info(thd, message); int error= write_row(rgi, slave_exec_mode == SLAVE_EXEC_MODE_IDEMPOTENT); + thd_proc_info(thd, tmp); if (error && !thd->is_error()) { @@ -11564,53 +11559,7 @@ uint8 Write_rows_log_event::get_trg_event_map() */ static bool record_compare(TABLE *table) { - /* - Need to set the X bit and the filler bits in both records since - there are engines that do not set it correctly. - - In addition, since MyISAM checks that one hasn't tampered with the - record, it is necessary to restore the old bytes into the record - after doing the comparison. - - TODO[record format ndb]: Remove it once NDB returns correct - records. Check that the other engines also return correct records. - */ - - DBUG_DUMP("record[0]", table->record[0], table->s->reclength); - DBUG_DUMP("record[1]", table->record[1], table->s->reclength); - bool result= FALSE; - uchar saved_x[2]= {0, 0}, saved_filler[2]= {0, 0}; - - if (table->s->null_bytes > 0) - { - for (int i = 0 ; i < 2 ; ++i) - { - /* - If we have an X bit then we need to take care of it. - */ - if (!(table->s->db_options_in_use & HA_OPTION_PACK_RECORD)) - { - saved_x[i]= table->record[i][0]; - table->record[i][0]|= 1U; - } - - /* - If (last_null_bit_pos == 0 && null_bytes > 1), then: - - X bit (if any) + N nullable fields + M Field_bit fields = 8 bits - - Ie, the entire byte is used. - */ - if (table->s->last_null_bit_pos > 0) - { - saved_filler[i]= table->record[i][table->s->null_bytes - 1]; - table->record[i][table->s->null_bytes - 1]|= - 256U - (1U << table->s->last_null_bit_pos); - } - } - } - /** Compare full record only if: - there are no blob fields (otherwise we would also need @@ -11658,24 +11607,6 @@ static bool record_compare(TABLE *table) } record_compare_exit: - /* - Restore the saved bytes. - - TODO[record format ndb]: Remove this code once NDB returns the - correct record format. - */ - if (table->s->null_bytes > 0) - { - for (int i = 0 ; i < 2 ; ++i) - { - if (!(table->s->db_options_in_use & HA_OPTION_PACK_RECORD)) - table->record[i][0]= saved_x[i]; - - if (table->s->last_null_bit_pos) - table->record[i][table->s->null_bytes - 1]= saved_filler[i]; - } - } - return result; } @@ -12035,21 +11966,6 @@ int Rows_log_event::find_row(rpl_group_info *rgi) while (record_compare(table)) { - /* - We need to set the null bytes to ensure that the filler bit - are all set when returning. There are storage engines that - just set the necessary bits on the bytes and don't set the - filler bits correctly. - - TODO[record format ndb]: Remove this code once NDB returns the - correct record format. - */ - if (table->s->null_bytes > 0) - { - table->record[0][table->s->null_bytes - 1]|= - 256U - (1U << table->s->last_null_bit_pos); - } - while ((error= table->file->ha_index_next(table->record[0]))) { /* We just skip records that has already been deleted */ @@ -12198,15 +12114,34 @@ Delete_rows_log_event::do_after_row_operations(const Slave_reporting_capability int Delete_rows_log_event::do_exec_row(rpl_group_info *rgi) { int error; + const char *tmp= thd->get_proc_info(); + const char *message= "Delete_rows_log_event::find_row()"; const bool invoke_triggers= slave_run_triggers_for_rbr && !master_had_triggers && m_table->triggers; DBUG_ASSERT(m_table != NULL); +#ifdef WSREP_PROC_INFO + my_snprintf(thd->wsrep_info, sizeof(thd->wsrep_info) - 1, + "Delete_rows_log_event::find_row(%lld)", + (long long) wsrep_thd_trx_seqno(thd)); + message= thd->wsrep_info; +#endif /* WSREP_PROC_INFO */ + + thd_proc_info(thd, message); if (!(error= find_row(rgi))) { /* Delete the record found, located in record[0] */ + message= "Delete_rows_log_event::ha_delete_row()"; +#ifdef WSREP_PROC_INFO + snprintf(thd->wsrep_info, sizeof(thd->wsrep_info) - 1, + "Delete_rows_log_event::ha_delete_row(%lld)", + (long long) wsrep_thd_trx_seqno(thd)); + message= thd->wsrep_info; +#endif + thd_proc_info(thd, message); + if (invoke_triggers && process_triggers(TRG_EVENT_DELETE, TRG_ACTION_BEFORE, FALSE)) error= HA_ERR_GENERIC; // in case if error is not set yet @@ -12217,6 +12152,7 @@ int Delete_rows_log_event::do_exec_row(rpl_group_info *rgi) error= HA_ERR_GENERIC; // in case if error is not set yet m_table->file->ha_index_or_rnd_end(); } + thd_proc_info(thd, tmp); return error; } @@ -12344,8 +12280,18 @@ Update_rows_log_event::do_exec_row(rpl_group_info *rgi) { const bool invoke_triggers= slave_run_triggers_for_rbr && !master_had_triggers && m_table->triggers; + const char *tmp= thd->get_proc_info(); + const char *message= "Update_rows_log_event::find_row()"; DBUG_ASSERT(m_table != NULL); +#ifdef WSREP_PROC_INFO + my_snprintf(thd->wsrep_info, sizeof(thd->wsrep_info) - 1, + "Update_rows_log_event::find_row(%lld)", + (long long) wsrep_thd_trx_seqno(thd)); + message= thd->wsrep_info; +#endif /* WSREP_PROC_INFO */ + + thd_proc_info(thd, message); int error= find_row(rgi); if (error) { @@ -12355,6 +12301,7 @@ Update_rows_log_event::do_exec_row(rpl_group_info *rgi) */ m_curr_row= m_curr_row_end; unpack_current_row(rgi); + thd_proc_info(thd, tmp); return error; } @@ -12372,7 +12319,16 @@ Update_rows_log_event::do_exec_row(rpl_group_info *rgi) store_record(m_table,record[1]); m_curr_row= m_curr_row_end; + message= "Update_rows_log_event::unpack_current_row()"; +#ifdef WSREP_PROC_INFO + my_snprintf(thd->wsrep_info, sizeof(thd->wsrep_info) - 1, + "Update_rows_log_event::unpack_current_row(%lld)", + (long long) wsrep_thd_trx_seqno(thd)); + message= thd->wsrep_info; +#endif /* WSREP_PROC_INFO */ + /* this also updates m_curr_row_end */ + thd_proc_info(thd, message); if ((error= unpack_current_row(rgi))) goto err; @@ -12390,6 +12346,15 @@ Update_rows_log_event::do_exec_row(rpl_group_info *rgi) DBUG_DUMP("new values", m_table->record[0], m_table->s->reclength); #endif + message= "Update_rows_log_event::ha_update_row()"; +#ifdef WSREP_PROC_INFO + my_snprintf(thd->wsrep_info, sizeof(thd->wsrep_info) - 1, + "Update_rows_log_event::ha_update_row(%lld)", + (long long) wsrep_thd_trx_seqno(thd)); + message= thd->wsrep_info; +#endif /* WSREP_PROC_INFO */ + + thd_proc_info(thd, message); if (invoke_triggers && process_triggers(TRG_EVENT_UPDATE, TRG_ACTION_BEFORE, TRUE)) { @@ -12405,6 +12370,8 @@ Update_rows_log_event::do_exec_row(rpl_group_info *rgi) process_triggers(TRG_EVENT_UPDATE, TRG_ACTION_AFTER, TRUE)) error= HA_ERR_GENERIC; // in case if error is not set yet + thd_proc_info(thd, tmp); + err: m_table->file->ha_index_or_rnd_end(); return error; @@ -12494,6 +12461,49 @@ void Incident_log_event::pack_info(THD *thd, Protocol *protocol) m_incident, description(), m_message.str); protocol->store(buf, bytes, &my_charset_bin); } +#endif /* MYSQL_CLIENT */ + + +#if WITH_WSREP && !defined(MYSQL_CLIENT) +Format_description_log_event *wsrep_format_desc; // TODO: free them at the end +/* + read the first event from (*buf). The size of the (*buf) is (*buf_len). + At the end (*buf) is shitfed to point to the following event or NULL and + (*buf_len) will be changed to account just being read bytes of the 1st event. +*/ +#define WSREP_MAX_ALLOWED_PACKET 1024*1024*1024 // current protocol max + +Log_event* wsrep_read_log_event( + char **arg_buf, size_t *arg_buf_len, + const Format_description_log_event *description_event) +{ + char *head= (*arg_buf); + uint data_len = uint4korr(head + EVENT_LEN_OFFSET); + char *buf= (*arg_buf); + const char *error= 0; + Log_event *res= 0; + DBUG_ENTER("wsrep_read_log_event"); + + if (data_len > WSREP_MAX_ALLOWED_PACKET) + { + error = "Event too big"; + goto err; + } + + res= Log_event::read_log_event(buf, data_len, &error, description_event, false); + +err: + if (!res) + { + DBUG_ASSERT(error != 0); + sql_print_error("Error in Log_event::read_log_event(): " + "'%s', data_len: %d, event_type: %d", + error,data_len,head[EVENT_TYPE_OFFSET]); + } + (*arg_buf)+= data_len; + (*arg_buf_len)-= data_len; + DBUG_RETURN(res); +} #endif diff --git a/sql/log_event.h b/sql/log_event.h index 2091d968558..212215d97b6 100644 --- a/sql/log_event.h +++ b/sql/log_event.h @@ -1340,7 +1340,11 @@ public: */ int apply_event(rpl_group_info *rgi) { - return do_apply_event(rgi); + int res; + THD_STAGE_INFO(thd, stage_apply_event); + res= do_apply_event(rgi); + THD_STAGE_INFO(thd, stage_after_apply_event); + return res; } diff --git a/sql/log_event_old.cc b/sql/log_event_old.cc index 0cb78686243..eaa882518f5 100644 --- a/sql/log_event_old.cc +++ b/sql/log_event_old.cc @@ -302,50 +302,7 @@ last_uniq_key(TABLE *table, uint keyno) */ static bool record_compare(TABLE *table) { - /* - Need to set the X bit and the filler bits in both records since - there are engines that do not set it correctly. - - In addition, since MyISAM checks that one hasn't tampered with the - record, it is necessary to restore the old bytes into the record - after doing the comparison. - - TODO[record format ndb]: Remove it once NDB returns correct - records. Check that the other engines also return correct records. - */ - bool result= FALSE; - uchar saved_x[2]= {0, 0}, saved_filler[2]= {0, 0}; - - if (table->s->null_bytes > 0) - { - for (int i = 0 ; i < 2 ; ++i) - { - /* - If we have an X bit then we need to take care of it. - */ - if (!(table->s->db_options_in_use & HA_OPTION_PACK_RECORD)) - { - saved_x[i]= table->record[i][0]; - table->record[i][0]|= 1U; - } - - /* - If (last_null_bit_pos == 0 && null_bytes > 1), then: - - X bit (if any) + N nullable fields + M Field_bit fields = 8 bits - - Ie, the entire byte is used. - */ - if (table->s->last_null_bit_pos > 0) - { - saved_filler[i]= table->record[i][table->s->null_bytes - 1]; - table->record[i][table->s->null_bytes - 1]|= - 256U - (1U << table->s->last_null_bit_pos); - } - } - } - if (table->s->blob_fields + table->s->varchar_fields == 0) { result= cmp_record(table,record[1]); @@ -372,24 +329,6 @@ static bool record_compare(TABLE *table) } record_compare_exit: - /* - Restore the saved bytes. - - TODO[record format ndb]: Remove this code once NDB returns the - correct record format. - */ - if (table->s->null_bytes > 0) - { - for (int i = 0 ; i < 2 ; ++i) - { - if (!(table->s->db_options_in_use & HA_OPTION_PACK_RECORD)) - table->record[i][0]= saved_x[i]; - - if (table->s->last_null_bit_pos > 0) - table->record[i][table->s->null_bytes - 1]= saved_filler[i]; - } - } - return result; } @@ -780,21 +719,6 @@ static int find_and_fetch_row(TABLE *table, uchar *key) { int error; - /* - We need to set the null bytes to ensure that the filler bit - are all set when returning. There are storage engines that - just set the necessary bits on the bytes and don't set the - filler bits correctly. - - TODO[record format ndb]: Remove this code once NDB returns the - correct record format. - */ - if (table->s->null_bytes > 0) - { - table->record[1][table->s->null_bytes - 1]|= - 256U - (1U << table->s->last_null_bit_pos); - } - while ((error= table->file->ha_index_next(table->record[1]))) { /* We just skip records that has already been deleted */ @@ -889,34 +813,13 @@ int Write_rows_log_event_old::do_before_row_operations(TABLE *table) /* Tell the storage engine that we are using REPLACE semantics. */ thd->lex->duplicates= DUP_REPLACE; - /* - Pretend we're executing a REPLACE command: this is needed for - InnoDB and NDB Cluster since they are not (properly) checking the - lex->duplicates flag. - */ thd->lex->sql_command= SQLCOM_REPLACE; /* Do not raise the error flag in case of hitting to an unique attribute */ table->file->extra(HA_EXTRA_IGNORE_DUP_KEY); - /* - NDB specific: update from ndb master wrapped as Write_rows - */ - /* - so that the event should be applied to replace slave's row - */ table->file->extra(HA_EXTRA_WRITE_CAN_REPLACE); - /* - NDB specific: if update from ndb master wrapped as Write_rows - does not find the row it's assumed idempotent binlog applying - is taking place; don't raise the error. - */ table->file->extra(HA_EXTRA_IGNORE_NO_KEY); - /* - TODO: the cluster team (Tomas?) says that it's better if the engine knows - how many rows are going to be inserted, then it can allocate needed memory - from the start. - */ table->file->ha_start_bulk_insert(0); return error; } @@ -2375,21 +2278,6 @@ int Old_rows_log_event::find_row(rpl_group_info *rgi) while (record_compare(table)) { - /* - We need to set the null bytes to ensure that the filler bit - are all set when returning. There are storage engines that - just set the necessary bits on the bytes and don't set the - filler bits correctly. - - TODO[record format ndb]: Remove this code once NDB returns the - correct record format. - */ - if (table->s->null_bytes > 0) - { - table->record[0][table->s->null_bytes - 1]|= - 256U - (1U << table->s->last_null_bit_pos); - } - while ((error= table->file->ha_index_next(table->record[0]))) { /* We just skip records that has already been deleted */ @@ -2529,34 +2417,13 @@ Write_rows_log_event_old::do_before_row_operations(const Slave_reporting_capabil /* Tell the storage engine that we are using REPLACE semantics. */ thd->lex->duplicates= DUP_REPLACE; - /* - Pretend we're executing a REPLACE command: this is needed for - InnoDB and NDB Cluster since they are not (properly) checking the - lex->duplicates flag. - */ thd->lex->sql_command= SQLCOM_REPLACE; /* Do not raise the error flag in case of hitting to an unique attribute */ m_table->file->extra(HA_EXTRA_IGNORE_DUP_KEY); - /* - NDB specific: update from ndb master wrapped as Write_rows - */ - /* - so that the event should be applied to replace slave's row - */ m_table->file->extra(HA_EXTRA_WRITE_CAN_REPLACE); - /* - NDB specific: if update from ndb master wrapped as Write_rows - does not find the row it's assumed idempotent binlog applying - is taking place; don't raise the error. - */ m_table->file->extra(HA_EXTRA_IGNORE_NO_KEY); - /* - TODO: the cluster team (Tomas?) says that it's better if the engine knows - how many rows are going to be inserted, then it can allocate needed memory - from the start. - */ m_table->file->ha_start_bulk_insert(0); return error; } diff --git a/sql/mdl.cc b/sql/mdl.cc index 2c2d64e96b2..42f12a47afd 100644 --- a/sql/mdl.cc +++ b/sql/mdl.cc @@ -22,6 +22,8 @@ #include <mysql/plugin.h> #include <mysql/service_thd_wait.h> #include <mysql/psi/mysql_stage.h> +#include "wsrep_mysqld.h" +#include "wsrep_thd.h" #ifdef HAVE_PSI_INTERFACE static PSI_mutex_key key_MDL_map_mutex; @@ -1497,11 +1499,53 @@ void MDL_lock::Ticket_list::add_ticket(MDL_ticket *ticket) called by other threads. */ DBUG_ASSERT(ticket->get_lock()); - /* - Add ticket to the *back* of the queue to ensure fairness - among requests with the same priority. - */ - m_list.push_back(ticket); +#ifdef WITH_WSREP + if ((this == &(ticket->get_lock()->m_waiting)) && + wsrep_thd_is_BF(ticket->get_ctx()->get_thd(), false)) + { + Ticket_iterator itw(ticket->get_lock()->m_waiting); + Ticket_iterator itg(ticket->get_lock()->m_granted); + + DBUG_ASSERT(WSREP_ON); + MDL_ticket *waiting, *granted; + MDL_ticket *prev=NULL; + bool added= false; + + while ((waiting= itw++) && !added) + { + if (!wsrep_thd_is_BF(waiting->get_ctx()->get_thd(), true)) + { + WSREP_DEBUG("MDL add_ticket inserted before: %lu %s", + thd_get_thread_id(waiting->get_ctx()->get_thd()), + wsrep_thd_query(waiting->get_ctx()->get_thd())); + m_list.insert_after(prev, ticket); + added= true; + } + prev= waiting; + } + if (!added) m_list.push_back(ticket); + + while ((granted= itg++)) + { + if (granted->get_ctx() != ticket->get_ctx() && + granted->is_incompatible_when_granted(ticket->get_type())) + { + if (!wsrep_grant_mdl_exception(ticket->get_ctx(), granted)) + { + WSREP_DEBUG("MDL victim killed at add_ticket"); + } + } + } + } + else +#endif /* WITH_WSREP */ + { + /* + Add ticket to the *back* of the queue to ensure fairness + among requests with the same priority. + */ + m_list.push_back(ticket); + } m_bitmap|= MDL_BIT(ticket->get_type()); } @@ -1842,6 +1886,7 @@ MDL_lock::can_grant_lock(enum_mdl_type type_arg, bool can_grant= FALSE; bitmap_t waiting_incompat_map= incompatible_waiting_types_bitmap()[type_arg]; bitmap_t granted_incompat_map= incompatible_granted_types_bitmap()[type_arg]; + bool wsrep_can_grant= TRUE; /* New lock request can be satisfied iff: @@ -1864,10 +1909,49 @@ MDL_lock::can_grant_lock(enum_mdl_type type_arg, { if (ticket->get_ctx() != requestor_ctx && ticket->is_incompatible_when_granted(type_arg)) + { +#ifdef WITH_WSREP + if (wsrep_thd_is_BF(requestor_ctx->get_thd(),false) && + key.mdl_namespace() == MDL_key::GLOBAL) + { + WSREP_DEBUG("global lock granted for BF: %lu %s", + thd_get_thread_id(requestor_ctx->get_thd()), + wsrep_thd_query(requestor_ctx->get_thd())); + can_grant = true; + } + else if (!wsrep_grant_mdl_exception(requestor_ctx, ticket)) + { + wsrep_can_grant= FALSE; + if (wsrep_log_conflicts) + { + MDL_lock * lock = ticket->get_lock(); + WSREP_INFO( + "MDL conflict db=%s table=%s ticket=%d solved by %s", + lock->key.db_name(), lock->key.name(), ticket->get_type(), + "abort" ); + } + } + else + can_grant= TRUE; + /* Continue loop */ +#else break; +#endif /* WITH_WSREP */ + } } - if (ticket == NULL) /* Incompatible locks are our own. */ - can_grant= TRUE; + if ((ticket == NULL) && wsrep_can_grant) + can_grant= TRUE; /* Incompatible locks are our own. */ + } + } + else + { + if (wsrep_thd_is_BF(requestor_ctx->get_thd(), false) && + key.mdl_namespace() == MDL_key::GLOBAL) + { + WSREP_DEBUG("global lock granted for BF (waiting queue): %lu %s", + thd_get_thread_id(requestor_ctx->get_thd()), + wsrep_thd_query(requestor_ctx->get_thd())); + can_grant = true; } } return can_grant; @@ -3222,3 +3306,44 @@ void MDL_context::set_transaction_duration_for_all_locks() ticket->m_duration= MDL_TRANSACTION; #endif } + + + +void MDL_context::release_explicit_locks() +{ + release_locks_stored_before(MDL_EXPLICIT, NULL); +} + + +#ifdef WITH_WSREP +void MDL_ticket::wsrep_report(bool debug) +{ + if (debug) + { + const PSI_stage_info *psi_stage = m_lock->key.get_wait_state_name(); + + WSREP_DEBUG("MDL ticket: type: %s space: %s db: %s name: %s (%s)", + (get_type() == MDL_INTENTION_EXCLUSIVE) ? "intention exclusive" : + ((get_type() == MDL_SHARED) ? "shared" : + ((get_type() == MDL_SHARED_HIGH_PRIO ? "shared high prio" : + ((get_type() == MDL_SHARED_READ) ? "shared read" : + ((get_type() == MDL_SHARED_WRITE) ? "shared write" : + ((get_type() == MDL_SHARED_NO_WRITE) ? "shared no write" : + ((get_type() == MDL_SHARED_NO_READ_WRITE) ? "shared no read write" : + ((get_type() == MDL_EXCLUSIVE) ? "exclusive" : + "UNKNOWN")))))))), + (m_lock->key.mdl_namespace() == MDL_key::GLOBAL) ? "GLOBAL" : + ((m_lock->key.mdl_namespace() == MDL_key::SCHEMA) ? "SCHEMA" : + ((m_lock->key.mdl_namespace() == MDL_key::TABLE) ? "TABLE" : + ((m_lock->key.mdl_namespace() == MDL_key::TABLE) ? "FUNCTION" : + ((m_lock->key.mdl_namespace() == MDL_key::TABLE) ? "PROCEDURE" : + ((m_lock->key.mdl_namespace() == MDL_key::TABLE) ? "TRIGGER" : + ((m_lock->key.mdl_namespace() == MDL_key::TABLE) ? "EVENT" : + ((m_lock->key.mdl_namespace() == MDL_key::COMMIT) ? "COMMIT" : + (char *)"UNKNOWN"))))))), + m_lock->key.db_name(), + m_lock->key.name(), + psi_stage->m_name); + } +} +#endif /* WITH_WSREP */ diff --git a/sql/mdl.h b/sql/mdl.h index 47c587eb3be..27289e621ef 100644 --- a/sql/mdl.h +++ b/sql/mdl.h @@ -586,6 +586,9 @@ public: MDL_ticket *next_in_lock; MDL_ticket **prev_in_lock; public: +#ifdef WITH_WSREP + void wsrep_report(bool debug); +#endif /* WITH_WSREP */ bool has_pending_conflicting_lock() const; MDL_context *get_ctx() const { return m_ctx; } @@ -773,6 +776,10 @@ public: m_tickets[MDL_TRANSACTION].is_empty() && m_tickets[MDL_EXPLICIT].is_empty()); } + inline bool has_transactional_locks() const + { + return !m_tickets[MDL_TRANSACTION].is_empty(); + } MDL_savepoint mdl_savepoint() { @@ -786,6 +793,7 @@ public: void release_statement_locks(); void release_transactional_locks(); + void release_explicit_locks(); void rollback_to_savepoint(const MDL_savepoint &mdl_savepoint); MDL_context_owner *get_owner() { return m_owner; } @@ -910,7 +918,6 @@ private: */ MDL_wait_for_subgraph *m_waiting_for; private: - THD *get_thd() const { return m_owner->get_thd(); } MDL_ticket *find_ticket(MDL_request *mdl_req, enum_mdl_duration *duration); void release_locks_stored_before(enum_mdl_duration duration, MDL_ticket *sentinel); @@ -919,6 +926,7 @@ private: MDL_ticket **out_ticket); public: + THD *get_thd() const { return m_owner->get_thd(); } void find_deadlock(); ulong get_thread_id() const { return thd_get_thread_id(get_thd()); } diff --git a/sql/multi_range_read.cc b/sql/multi_range_read.cc index b63db9ecea2..bb49cebb921 100644 --- a/sql/multi_range_read.cc +++ b/sql/multi_range_read.cc @@ -199,12 +199,6 @@ ha_rows handler::multi_range_read_info(uint keyno, uint n_ranges, uint n_rows, One must have called index_init() before calling this function. Several multi_range_read_init() calls may be made in course of one query. - Until WL#2623 is done (see its text, section 3.2), the following will - also hold: - The caller will guarantee that if "seq->init == mrr_ranges_array_init" - then seq_init_param is an array of n_ranges KEY_MULTI_RANGE structures. - This property will only be used by NDB handler until WL#2623 is done. - Buffer memory management is done according to the following scenario: The caller allocates the buffer and provides it to the callee by filling the members of HANDLER_BUFFER structure. diff --git a/sql/mysqld.cc b/sql/mysqld.cc index 066dae224eb..94503d507fe 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -71,6 +71,11 @@ #include "scheduler.h" #include <waiting_threads.h> #include "debug_sync.h" +#include "wsrep_mysqld.h" +#include "wsrep_var.h" +#include "wsrep_thd.h" +#include "wsrep_sst.h" + #include "sql_callback.h" #include "threadpool.h" @@ -352,7 +357,8 @@ static bool volatile select_thread_in_use, signal_thread_in_use; static volatile bool ready_to_exit; static my_bool opt_debugging= 0, opt_external_locking= 0, opt_console= 0; static my_bool opt_short_log_format= 0; -static uint kill_cached_threads, wake_thread; +uint kill_cached_threads; +static uint wake_thread; ulong max_used_connections; static volatile ulong cached_thread_count= 0; static char *mysqld_user, *mysqld_chroot; @@ -360,20 +366,21 @@ static char *default_character_set_name; static char *character_set_filesystem_name; static char *lc_messages; static char *lc_time_names_name; -static char *my_bind_addr_str; +char *my_bind_addr_str; static char *default_collation_name; char *default_storage_engine, *default_tmp_storage_engine; static char compiled_default_collation_name[]= MYSQL_DEFAULT_COLLATION_NAME; static I_List<THD> thread_cache; static bool binlog_format_used= false; LEX_STRING opt_init_connect, opt_init_slave; -static mysql_cond_t COND_thread_cache, COND_flush_thread_cache; +mysql_cond_t COND_thread_cache; +static mysql_cond_t COND_flush_thread_cache; static DYNAMIC_ARRAY all_options; /* Global variables */ bool opt_bin_log, opt_bin_log_used=0, opt_ignore_builtin_innodb= 0; -my_bool opt_log, opt_slow_log, debug_assert_if_crashed_table= 0, opt_help= 0; +my_bool opt_log, debug_assert_if_crashed_table= 0, opt_help= 0; static my_bool opt_abort; ulonglong log_output_options; my_bool opt_userstat_running; @@ -1398,7 +1405,7 @@ bool mysqld_embedded=0; bool mysqld_embedded=1; #endif -static my_bool plugins_are_initialized= FALSE; +my_bool plugins_are_initialized= FALSE; #ifndef DBUG_OFF static const char* default_dbug_option; @@ -1622,6 +1629,11 @@ static void close_connections(void) if (tmp->slave_thread) continue; +#ifdef WITH_WSREP + /* skip wsrep system threads as well */ + if (WSREP(tmp) && (tmp->wsrep_exec_mode==REPL_RECV || tmp->wsrep_applier)) + continue; +#endif tmp->killed= KILL_SERVER_HARD; MYSQL_CALLBACK(thread_scheduler, post_kill_notification, (tmp)); mysql_mutex_lock(&tmp->LOCK_thd_data); @@ -1698,6 +1710,34 @@ static void close_connections(void) close_connection(tmp,ER_SERVER_SHUTDOWN); } #endif +#ifdef WITH_WSREP + /* + * WSREP_TODO: + * this code block may turn out redundant. wsrep->disconnect() + * should terminate slave threads gracefully, and we don't need + * to signal them here. + * The code here makes sure mysqld will not hang during shutdown + * even if wsrep provider has problems in shutting down. + */ + if (WSREP(tmp) && tmp->wsrep_exec_mode==REPL_RECV) + { + sql_print_information("closing wsrep system thread"); + tmp->killed= KILL_CONNECTION; + MYSQL_CALLBACK(thread_scheduler, post_kill_notification, (tmp)); + if (tmp->mysys_var) + { + tmp->mysys_var->abort=1; + mysql_mutex_lock(&tmp->mysys_var->mutex); + if (tmp->mysys_var->current_cond) + { + mysql_mutex_lock(tmp->mysys_var->current_mutex); + mysql_cond_broadcast(tmp->mysys_var->current_cond); + mysql_mutex_unlock(tmp->mysys_var->current_mutex); + } + mysql_mutex_unlock(&tmp->mysys_var->mutex); + } + } +#endif DBUG_PRINT("quit",("Unlocking LOCK_thread_count")); mysql_mutex_unlock(&LOCK_thread_count); } @@ -1852,7 +1892,14 @@ static void __cdecl kill_server(int sig_ptr) } #endif + if (WSREP_ON) + wsrep_stop_replication(NULL); + close_connections(); + + if (wsrep_inited == 1) + wsrep_deinit(true); + if (sig != MYSQL_KILL_SIGNAL && sig != 0) unireg_abort(1); /* purecov: inspected */ @@ -1947,6 +1994,29 @@ extern "C" void unireg_abort(int exit_code) usage(); if (exit_code) sql_print_error("Aborting\n"); + +#ifdef WITH_WSREP + /* Check if wsrep class is used. If yes, then cleanup wsrep */ + if (wsrep) + { + /* + This is an abort situation, we cannot expect to gracefully close all + wsrep threads here, we can only diconnect from service + */ + wsrep_close_client_connections(FALSE); + shutdown_in_progress= 1; + wsrep->disconnect(wsrep); + WSREP_INFO("Service disconnected."); + wsrep_close_threads(NULL); /* this won't close all threads */ + sleep(1); /* so give some time to exit for those which can */ + WSREP_INFO("Some threads may fail to exit."); + + /* In bootstrap mode we deinitialize wsrep here. */ + if (opt_bootstrap && wsrep_inited) + wsrep_deinit(true); + } +#endif // WITH_WSREP + clean_up(!opt_abort && (exit_code || !opt_bootstrap)); /* purecov: inspected */ DBUG_PRINT("quit",("done with cleanup in unireg_abort")); mysqld_exit(exit_code); @@ -1991,12 +2061,6 @@ void clean_up(bool print_message) stop_handle_manager(); release_ddl_log(); - /* - make sure that handlers finish up - what they have that is dependent on the binlog - */ - ha_binlog_end(current_thd); - logger.cleanup_base(); injector::free_instance(); @@ -2036,6 +2100,9 @@ void clean_up(bool print_message) sp_cache_end(); free_status_vars(); end_thr_alarm(1); /* Free allocated memory */ +#ifndef EMBEDDED_LIBRARY + end_thr_timer(); +#endif my_free_open_file_info(); if (defaults_argv) free_defaults(defaults_argv); @@ -2179,7 +2246,12 @@ static void clean_up_mutexes() ** Init IP and UNIX socket ****************************************************************************/ -#ifndef EMBEDDED_LIBRARY +#ifdef EMBEDDED_LIBRARY +static void set_ports() +{ +} + +#else static void set_ports() { char *env; @@ -2200,10 +2272,13 @@ static void set_ports() #if MYSQL_PORT_DEFAULT == 0 struct servent *serv_ptr; if ((serv_ptr= getservbyname("mysql", "tcp"))) - mysqld_port= ntohs((u_short) serv_ptr->s_port); /* purecov: inspected */ + SYSVAR_AUTOSIZE(mysqld_port, ntohs((u_short) serv_ptr->s_port)); #endif if ((env = getenv("MYSQL_TCP_PORT"))) - mysqld_port= (uint) atoi(env); /* purecov: inspected */ + { + mysqld_port= (uint) atoi(env); + mark_sys_var_value_origin(&mysqld_port, sys_var::ENV); + } } if (!mysqld_unix_port) { @@ -2213,7 +2288,10 @@ static void set_ports() mysqld_unix_port= (char*) MYSQL_UNIX_ADDR; #endif if ((env = getenv("MYSQL_UNIX_PORT"))) - mysqld_unix_port= env; /* purecov: inspected */ + { + mysqld_unix_port= env; + mark_sys_var_value_origin(&mysqld_unix_port, sys_var::ENV); + } } } @@ -2481,6 +2559,11 @@ static MYSQL_SOCKET activate_tcp_port(uint port) socket_errno); unireg_abort(1); } + +#ifdef FD_CLOEXEC + (void) fcntl(mysql_socket_getfd(ip_sock), F_SETFD, FD_CLOEXEC); +#endif + DBUG_RETURN(ip_sock); } @@ -2499,7 +2582,7 @@ static void network_init(void) if (report_port == 0) { - report_port= mysqld_port; + SYSVAR_AUTOSIZE(report_port, mysqld_port); } #ifndef DBUG_OFF if (!opt_disable_networking) @@ -2607,6 +2690,9 @@ static void network_init(void) if (mysql_socket_listen(unix_sock,(int) back_log) < 0) sql_print_warning("listen() on Unix socket failed with error %d", socket_errno); +#ifdef FD_CLOEXEC + (void) fcntl(mysql_socket_getfd(unix_sock), F_SETFD, FD_CLOEXEC); +#endif } #endif DBUG_PRINT("info",("server started")); @@ -2681,6 +2767,16 @@ void thd_cleanup(THD *thd) void dec_connection_count(THD *thd) { +#ifdef WITH_WSREP + /* + Do not decrement when its wsrep system thread. wsrep_applier is set for + applier as well as rollbacker threads. + */ + if (thd->wsrep_applier) + return; +#endif /* WITH_WSREP */ + + DBUG_ASSERT(*thd->scheduler->connection_count > 0); mysql_mutex_lock(&LOCK_connection_count); (*thd->scheduler->connection_count)--; mysql_mutex_unlock(&LOCK_connection_count); @@ -2851,10 +2947,13 @@ static bool cache_thread() bool one_thread_per_connection_end(THD *thd, bool put_in_cache) { DBUG_ENTER("one_thread_per_connection_end"); + const bool wsrep_applier= IF_WSREP(thd->wsrep_applier, false); + unlink_thd(thd); /* Mark that current_thd is not valid anymore */ set_current_thd(0); - if (put_in_cache && cache_thread()) + + if (put_in_cache && cache_thread() && !wsrep_applier) DBUG_RETURN(0); // Thread is reused /* @@ -3329,7 +3428,7 @@ pthread_handler_t signal_hand(void *arg __attribute__((unused))) sql_print_information("Got signal %d to shutdown mysqld",sig); #endif /* switch to the old log message processing */ - logger.set_handlers(LOG_FILE, opt_slow_log ? LOG_FILE:LOG_NONE, + logger.set_handlers(LOG_FILE, global_system_variables.sql_log_slow ? LOG_FILE:LOG_NONE, opt_log ? LOG_FILE:LOG_NONE); DBUG_PRINT("info",("Got signal: %d abort_loop: %d",sig,abort_loop)); if (!abort_loop) @@ -3367,13 +3466,15 @@ pthread_handler_t signal_hand(void *arg __attribute__((unused))) if (log_output_options & LOG_NONE) { logger.set_handlers(LOG_FILE, - opt_slow_log ? LOG_TABLE : LOG_NONE, + global_system_variables.sql_log_slow ? + LOG_TABLE : LOG_NONE, opt_log ? LOG_TABLE : LOG_NONE); } else { logger.set_handlers(LOG_FILE, - opt_slow_log ? log_output_options : LOG_NONE, + global_system_variables.sql_log_slow ? + log_output_options : LOG_NONE, opt_log ? log_output_options : LOG_NONE); } break; @@ -3605,6 +3706,7 @@ SHOW_VAR com_status_vars[]= { {"check", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_CHECK]), SHOW_LONG_STATUS}, {"checksum", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_CHECKSUM]), SHOW_LONG_STATUS}, {"commit", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_COMMIT]), SHOW_LONG_STATUS}, + {"compound_sql", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_COMPOUND]), SHOW_LONG_STATUS}, {"create_db", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_CREATE_DB]), SHOW_LONG_STATUS}, {"create_event", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_CREATE_EVENT]), SHOW_LONG_STATUS}, {"create_function", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_CREATE_SPFUNCTION]), SHOW_LONG_STATUS}, @@ -3673,7 +3775,6 @@ SHOW_VAR com_status_vars[]= { {"show_binlog_events", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_BINLOG_EVENTS]), SHOW_LONG_STATUS}, {"show_binlogs", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_BINLOGS]), SHOW_LONG_STATUS}, {"show_charsets", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_CHARSETS]), SHOW_LONG_STATUS}, - {"show_client_statistics", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_CLIENT_STATS]), SHOW_LONG_STATUS}, {"show_collations", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_COLLATIONS]), SHOW_LONG_STATUS}, {"show_contributors", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_CONTRIBUTORS]), SHOW_LONG_STATUS}, {"show_create_db", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_CREATE_DB]), SHOW_LONG_STATUS}, @@ -3694,8 +3795,8 @@ SHOW_VAR com_status_vars[]= { {"show_function_code", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_FUNC_CODE]), SHOW_LONG_STATUS}, #endif {"show_function_status", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_STATUS_FUNC]), SHOW_LONG_STATUS}, + {"show_generic", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_GENERIC]), SHOW_LONG_STATUS}, {"show_grants", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_GRANTS]), SHOW_LONG_STATUS}, - {"show_index_statistics", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_INDEX_STATS]), SHOW_LONG_STATUS}, {"show_keys", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_KEYS]), SHOW_LONG_STATUS}, {"show_master_status", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_MASTER_STAT]), SHOW_LONG_STATUS}, {"show_open_tables", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_OPEN_TABLES]), SHOW_LONG_STATUS}, @@ -3713,11 +3814,9 @@ SHOW_VAR com_status_vars[]= { {"show_slave_status", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_SLAVE_STAT]), SHOW_LONG_STATUS}, {"show_status", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_STATUS]), SHOW_LONG_STATUS}, {"show_storage_engines", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_STORAGE_ENGINES]), SHOW_LONG_STATUS}, - {"show_table_statistics", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_TABLE_STATS]), SHOW_LONG_STATUS}, {"show_table_status", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_TABLE_STATUS]), SHOW_LONG_STATUS}, {"show_tables", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_TABLES]), SHOW_LONG_STATUS}, {"show_triggers", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_TRIGGERS]), SHOW_LONG_STATUS}, - {"show_user_statistics", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_USER_STATS]), SHOW_LONG_STATUS}, {"show_variables", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_VARIABLES]), SHOW_LONG_STATUS}, {"show_warnings", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_WARNS]), SHOW_LONG_STATUS}, {"shutdown", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHUTDOWN]), SHOW_LONG_STATUS}, @@ -3900,6 +3999,10 @@ static int init_common_variables() return 1; } +#if defined(HAVE_POOL_OF_THREADS) && !defined(_WIN32) + SYSVAR_AUTOSIZE(threadpool_size, my_getncpus()); +#endif + if (init_thread_environment() || mysql_init_variables()) return 1; @@ -3976,11 +4079,17 @@ static int init_common_variables() else opt_log_basename= glob_hostname; - if (!*pidfile_name) +#ifdef WITH_WSREP + if (wsrep_node_name == 0 || wsrep_node_name[0] == 0) { - strmake(pidfile_name, opt_log_basename, sizeof(pidfile_name)-5); - strmov(fn_ext(pidfile_name),".pid"); // Add proper extension + my_free((void *)wsrep_node_name); + wsrep_node_name= my_strdup(glob_hostname, MYF(MY_WME)); } +#endif /* WITH_WSREP */ + + strmake(pidfile_name, opt_log_basename, sizeof(pidfile_name)-5); + strmov(fn_ext(pidfile_name),".pid"); // Add proper extension + SYSVAR_AUTOSIZE(pidfile_name_ptr, pidfile_name); /* The default-storage-engine entry in my_long_options should have a @@ -4050,20 +4159,18 @@ static int init_common_variables() #ifdef HAVE_LARGE_PAGES /* Initialize large page size */ - if (opt_large_pages && (opt_large_page_size= my_get_large_page_size())) + if (opt_large_pages) { + SYSVAR_AUTOSIZE(opt_large_page_size, my_get_large_page_size()); + if (opt_large_page_size) + { DBUG_PRINT("info", ("Large page set, large_page_size = %d", opt_large_page_size)); my_use_large_pages= 1; my_large_page_size= opt_large_page_size; - } - else - { - opt_large_pages= 0; - /* - Either not configured to use large pages or Linux haven't - been compiled with large page support - */ + } + else + SYSVAR_AUTOSIZE(opt_large_pages, 0); } #endif /* HAVE_LARGE_PAGES */ #ifdef HAVE_SOLARIS_LARGE_PAGES @@ -4147,16 +4254,17 @@ static int init_common_variables() If we have requested too much file handles than we bring max_connections in supported bounds. */ - max_connections= (ulong) MY_MIN(files-10-TABLE_OPEN_CACHE_MIN*2, - max_connections); + SYSVAR_AUTOSIZE(max_connections, + (ulong) MY_MIN(files-10-TABLE_OPEN_CACHE_MIN*2, max_connections)); /* Decrease tc_size according to max_connections, but not below TABLE_OPEN_CACHE_MIN. Outer MY_MIN() ensures that we never increase tc_size automatically (that could happen if max_connections is decreased above). */ - tc_size= (ulong) MY_MIN(MY_MAX((files - 10 - max_connections) / 2, - TABLE_OPEN_CACHE_MIN), tc_size); + SYSVAR_AUTOSIZE(tc_size, + (ulong) MY_MIN(MY_MAX((files - 10 - max_connections) / 2, + TABLE_OPEN_CACHE_MIN), tc_size)); DBUG_PRINT("warning", ("Changed limits: max_open_files: %u max_connections: %ld table_cache: %ld", files, max_connections, tc_size)); @@ -4167,7 +4275,7 @@ static int init_common_variables() else if (global_system_variables.log_warnings) sql_print_warning("Could not increase number of max_open_files to more than %u (request: %u)", files, wanted_files); } - open_files_limit= files; + SYSVAR_AUTOSIZE(open_files_limit, files); } unireg_init(opt_specialflag); /* Set up extern variabels */ if (!(my_default_lc_messages= @@ -4263,7 +4371,8 @@ static int init_common_variables() "--log option, log tables are used. " "To enable logging to files use the --log-output option."); - if (opt_slow_log && opt_slow_logname && *opt_slow_logname && + if (global_system_variables.sql_log_slow && opt_slow_logname && + *opt_slow_logname && !(log_output_options & (LOG_FILE | LOG_NONE))) sql_print_warning("Although a path was specified for the " "--log-slow-queries option, log tables are used. " @@ -4296,7 +4405,8 @@ static int init_common_variables() get corrupted if accesses with names of different case. */ DBUG_PRINT("info", ("lower_case_table_names: %d", lower_case_table_names)); - lower_case_file_system= test_if_case_insensitive(mysql_real_data_home); + SYSVAR_AUTOSIZE(lower_case_file_system, + test_if_case_insensitive(mysql_real_data_home)); if (!lower_case_table_names && lower_case_file_system == 1) { if (lower_case_table_names_used) @@ -4312,8 +4422,9 @@ You should consider changing lower_case_table_names to 1 or 2", else { if (global_system_variables.log_warnings) - sql_print_warning("Setting lower_case_table_names=2 because file system for %s is case insensitive", mysql_real_data_home); - lower_case_table_names= 2; + sql_print_warning("Setting lower_case_table_names=2 because file " + "system for %s is case insensitive", mysql_real_data_home); + SYSVAR_AUTOSIZE(lower_case_table_names, 2); } } else if (lower_case_table_names == 2 && @@ -4324,7 +4435,7 @@ You should consider changing lower_case_table_names to 1 or 2", "the file system '%s' is case sensitive. Now setting " "lower_case_table_names to 0 to avoid future problems.", mysql_real_data_home); - lower_case_table_names= 0; + SYSVAR_AUTOSIZE(lower_case_table_names, 0); } else { @@ -4639,6 +4750,14 @@ static int init_server_components() my_rnd_init(&sql_rand,(ulong) server_start_time,(ulong) server_start_time/2); setup_fpu(); init_thr_lock(); +#ifndef EMBEDDED_LIBRARY + if (init_thr_timer(thread_scheduler->max_threads + extra_max_connections)) + { + fprintf(stderr, "Can't initialize timers\n"); + unireg_abort(1); + } +#endif + my_uuid_init((ulong) (my_rnd(&sql_rand))*12345,12345); #ifdef HAVE_REPLICATION init_slave_list(); @@ -4655,16 +4774,17 @@ static int init_server_components() if (opt_error_log && !opt_abort) { if (!log_error_file_ptr[0]) + { fn_format(log_error_file, pidfile_name, mysql_data_home, ".err", MY_REPLACE_EXT); /* replace '.<domain>' by '.err', bug#4997 */ + SYSVAR_AUTOSIZE(log_error_file_ptr, log_error_file); + } else + { fn_format(log_error_file, log_error_file_ptr, mysql_data_home, ".err", MY_UNPACK_FILENAME | MY_SAFE_PATH); - /* - _ptr may have been set to my_disabled_option or "" if no argument was - passed, but we need to show the real name in SHOW VARIABLES: - */ - log_error_file_ptr= log_error_file; + log_error_file_ptr= log_error_file; + } if (!log_error_file[0]) opt_error_log= 0; // Too long file name else @@ -4725,7 +4845,7 @@ static int init_server_components() unireg_abort(1); /* need to configure logging before initializing storage engines */ - if (!opt_bin_log_used) + if (!opt_bin_log_used && !WSREP_ON) { if (opt_log_slave_updates) sql_print_warning("You need to use --log-bin to make " @@ -4802,10 +4922,54 @@ a file name for --log-bin-index option", opt_binlog_index_name); opt_log_basename, ln); } if (ln == buf) - { opt_bin_logname= my_once_strdup(buf, MYF(MY_WME)); + } + + /* + Wsrep initialization must happen at this point, because: + - opt_bin_logname must be known when starting replication + since SST may need it + - SST may modify binlog index file, so it must be opened + after SST has happened + */ + if (WSREP_ON && !wsrep_recovery) /* WSREP BEFORE SE */ + { + if (opt_bootstrap) // bootsrap option given - disable wsrep functionality + { + wsrep_provider_init(WSREP_NONE); + if (wsrep_init()) + unireg_abort(1); } - if (mysql_bin_log.open_index_file(opt_binlog_index_name, ln, TRUE)) + else // full wsrep initialization + { + // add basedir/bin to PATH to resolve wsrep script names + char* const tmp_path= (char*)my_alloca(strlen(mysql_home) + + strlen("/bin") + 1); + if (tmp_path) + { + strcpy(tmp_path, mysql_home); + strcat(tmp_path, "/bin"); + wsrep_prepend_PATH(tmp_path); + } + else + { + WSREP_ERROR("Could not append %s/bin to PATH", mysql_home); + } + my_afree(tmp_path); + + if (wsrep_before_SE()) + { + set_ports(); // this is also called in network_init() later but we need + // to know mysqld_port now - lp:1071882 + wsrep_init_startup(true); + } + } + } + + if (opt_bin_log) + { + if (mysql_bin_log.open_index_file(opt_binlog_index_name, opt_bin_logname, + TRUE)) { unireg_abort(1); } @@ -4830,6 +4994,11 @@ a file name for --log-bin-index option", opt_binlog_index_name); } plugins_are_initialized= TRUE; /* Don't separate from init function */ +#ifdef WITH_WSREP + if (WSREP_ON && wsrep_check_opts()) + global_system_variables.wsrep_on= 0; +#endif + /* we do want to exit if there are any other unknown options */ if (remaining_argc > 1) { @@ -4900,11 +5069,14 @@ a file name for --log-bin-index option", opt_binlog_index_name); /* purecov: begin inspected */ sql_print_error("CSV engine is not present, falling back to the " "log files"); - log_output_options= (log_output_options & ~LOG_TABLE) | LOG_FILE; + SYSVAR_AUTOSIZE(log_output_options, + (log_output_options & ~LOG_TABLE) | LOG_FILE); /* purecov: end */ } - logger.set_handlers(LOG_FILE, opt_slow_log ? log_output_options:LOG_NONE, + logger.set_handlers(LOG_FILE, + global_system_variables.sql_log_slow ? + log_output_options:LOG_NONE, opt_log ? log_output_options:LOG_NONE); } @@ -4930,10 +5102,20 @@ a file name for --log-bin-index option", opt_binlog_index_name); internal_tmp_table_max_key_segments= myisam_max_key_segments(); #endif - tc_log= (total_ha_2pc > 1 ? (opt_bin_log ? - (TC_LOG *) &mysql_bin_log : - (TC_LOG *) &tc_log_mmap) : - (TC_LOG *) &tc_log_dummy); +#ifdef WITH_WSREP + if (WSREP_ON && !opt_bin_log) + { + wsrep_emulate_bin_log= 1; + } +#endif + + tc_log= get_tc_log_implementation(); + + WSREP_DEBUG("Initial TC log open: %s", + (tc_log == &mysql_bin_log) ? "binlog" : + (tc_log == &tc_log_mmap) ? "mmap" : + (tc_log == &tc_log_dummy) ? "dummy" : "unknown" + ); if (tc_log->open(opt_bin_log ? opt_bin_logname : opt_tc_log_file)) { @@ -5338,8 +5520,11 @@ int mysqld_main(int argc, char **argv) init_signals(); - my_thread_stack_size= my_setstacksize(&connection_attrib, - my_thread_stack_size); + ulonglong new_thread_stack_size; + new_thread_stack_size= my_setstacksize(&connection_attrib, + my_thread_stack_size); + if (new_thread_stack_size != my_thread_stack_size) + SYSVAR_AUTOSIZE(my_thread_stack_size, new_thread_stack_size); (void) thr_setconcurrency(concurrency); // 10 by default @@ -5375,7 +5560,7 @@ int mysqld_main(int argc, char **argv) if (opt_bin_log && !global_system_variables.server_id) { - global_system_variables.server_id= ::server_id= 1; + SYSVAR_AUTOSIZE(global_system_variables.server_id, ::server_id= 1); #ifdef EXTRA_DEBUG sql_print_warning("You have enabled the binary log, but you haven't set " "server-id to a non-zero value: we force server id to 1; " @@ -5408,6 +5593,13 @@ int mysqld_main(int argc, char **argv) } #endif + if (WSREP_ON && wsrep_recovery) + { + select_thread_in_use= 0; + wsrep_recover(); + unireg_abort(0); + } + /* init signals & alarm After this we can't quit by a simple unireg_abort @@ -5447,14 +5639,6 @@ int mysqld_main(int argc, char **argv) #ifdef WITH_PERFSCHEMA_STORAGE_ENGINE initialize_performance_schema_acl(opt_bootstrap); - /* - Do not check the structure of the performance schema tables - during bootstrap: - - the tables are not supposed to exist yet, bootstrap will create them - - a check would print spurious error messages - */ - if (! opt_bootstrap) - check_performance_schema(); #endif initialize_information_schema_acl(); @@ -5464,7 +5648,36 @@ int mysqld_main(int argc, char **argv) if (Events::init(opt_noacl || opt_bootstrap)) unireg_abort(1); - if (opt_bootstrap) + if (WSREP_ON) + { + if (opt_bootstrap) + { + /*! bootstrap wsrep init was taken care of above */ + } + else + { + wsrep_SE_initialized(); + + if (wsrep_before_SE()) + { + /*! in case of no SST wsrep waits in view handler callback */ + wsrep_SE_init_grab(); + wsrep_SE_init_done(); + /*! in case of SST wsrep waits for wsrep->sst_received */ + wsrep_sst_continue(); + } + else + { + wsrep_init_startup (false); + } + + wsrep_create_appliers(wsrep_slave_threads - 1); + } + } + else + wsrep_init_startup (false); + + if (opt_bootstrap) { select_thread_in_use= 0; // Allow 'kill' to work bootstrap(mysql_stdin); @@ -5532,6 +5745,7 @@ int mysqld_main(int argc, char **argv) #ifdef EXTRA_DEBUG2 sql_print_error("Before Lock_thread_count"); #endif + WSREP_DEBUG("Before Lock_thread_count"); mysql_mutex_lock(&LOCK_thread_count); DBUG_PRINT("quit", ("Got thread_count mutex")); select_thread_in_use=0; // For close_connections @@ -5797,6 +6011,9 @@ static void bootstrap(MYSQL_FILE *file) DBUG_ENTER("bootstrap"); THD *thd= new THD; +#ifdef WITH_WSREP + thd->variables.wsrep_on= 0; +#endif thd->bootstrap=1; my_net_init(&thd->net,(st_vio*) 0, MYF(0)); thd->max_client_packet_length= thd->net.max_packet; @@ -6198,6 +6415,9 @@ void handle_connections_sockets() sleep(1); // Give other threads some time continue; } +#ifdef FD_CLOEXEC + (void) fcntl(mysql_socket_getfd(new_sock), F_SETFD, FD_CLOEXEC); +#endif #ifdef HAVE_LIBWRAP { @@ -7135,6 +7355,13 @@ struct my_option my_long_options[]= {"table_cache", 0, "Deprecated; use --table-open-cache instead.", &tc_size, &tc_size, 0, GET_ULONG, REQUIRED_ARG, TABLE_OPEN_CACHE_DEFAULT, 1, 512*1024L, 0, 1, 0}, +#ifdef WITH_WSREP + {"wsrep-new-cluster", 0, "Bootstrap a cluster. It works by overriding the " + "current value of wsrep_cluster_address. It is recommended not to add this " + "option to the config file as this will trigger bootstrap on every server " + "start.", &wsrep_new_cluster, &wsrep_new_cluster, 0, GET_BOOL, NO_ARG, + 0, 0, 0, 0, 0, 0}, +#endif /* The following options exist in 5.6 but not in 10.0 */ MYSQL_TO_BE_IMPLEMENTED_OPTION("default-tmp-storage-engine"), @@ -7867,6 +8094,7 @@ SHOW_VAR status_vars[]= { {"Handler_write", (char*) offsetof(STATUS_VAR, ha_write_count), SHOW_LONG_STATUS}, {"Key", (char*) &show_default_keycache, SHOW_FUNC}, {"Last_query_cost", (char*) offsetof(STATUS_VAR, last_query_cost), SHOW_DOUBLE_STATUS}, + {"Max_statement_time_exceeded", (char*) offsetof(STATUS_VAR, max_statement_time_exceeded), SHOW_LONG_STATUS}, {"Max_used_connections", (char*) &max_used_connections, SHOW_LONG}, {"Memory_used", (char*) offsetof(STATUS_VAR, memory_used), SHOW_LONGLONG_STATUS}, {"Not_flushed_delayed_rows", (char*) &delayed_rows_in_use, SHOW_LONG_NOFLUSH}, @@ -7971,6 +8199,9 @@ SHOW_VAR status_vars[]= { #ifdef ENABLED_PROFILING {"Uptime_since_flush_status",(char*) &show_flushstatustime, SHOW_SIMPLE_FUNC}, #endif +#ifdef WITH_WSREP + {"wsrep", (char*) &wsrep_show_status, SHOW_FUNC}, +#endif {NullS, NullS, SHOW_LONG} }; @@ -8039,9 +8270,9 @@ static void print_help() my_print_help((my_option*) all_options.buffer); - /* Add variables that can be shown but not changed, like version numbers */ + /* Add variables that must be shown but not changed, like version numbers */ pop_dynamic(&all_options); - sys_var_add_options(&all_options, sys_var::SHOW_VALUE_IN_HELP); + sys_var_add_options(&all_options, sys_var::GETOPT_ONLY_HELP); sort_dynamic(&all_options, (qsort_cmp) option_cmp); add_terminator(&all_options); my_print_variables((my_option*) all_options.buffer); @@ -8123,7 +8354,7 @@ static int mysql_init_variables(void) /* We can only test for sub paths if my_symlink.c is using realpath */ myisam_test_invalid_symlink= test_if_data_home_dir; #endif - opt_log= opt_slow_log= 0; + opt_log= 0; opt_bin_log= opt_bin_log_used= 0; opt_disable_networking= opt_skip_show_db=0; opt_skip_name_resolve= 0; @@ -8174,7 +8405,6 @@ static int mysql_init_variables(void) opt_specialflag= SPECIAL_ENGLISH; unix_sock= base_ip_sock= extra_ip_sock= MYSQL_INVALID_SOCKET; mysql_home_ptr= mysql_home; - pidfile_name_ptr= pidfile_name; log_error_file_ptr= log_error_file; protocol_version= PROTOCOL_VERSION; what_to_log= ~ (1L << (uint) COM_TIME); @@ -8313,15 +8543,24 @@ static int mysql_init_variables(void) if (!(tmpenv = getenv("MY_BASEDIR_VERSION"))) tmpenv = DEFAULT_MYSQL_HOME; strmake_buf(mysql_home, tmpenv); + mark_sys_var_value_origin(&mysql_home_ptr, sys_var::ENV); #endif + + if (wsrep_init_vars()) + return 1; + return 0; } my_bool -mysqld_get_one_option(int optid, - const struct my_option *opt __attribute__((unused)), - char *argument) +mysqld_get_one_option(int optid, const struct my_option *opt, char *argument) { + if (opt->app_type) + { + sys_var *var= (sys_var*) opt->app_type; + var->value_origin= sys_var::CONFIG; + } + switch(optid) { case '#': #ifndef DBUG_OFF @@ -8355,8 +8594,8 @@ mysqld_get_one_option(int optid, "in later versions.", opt->name); break; case 'a': - global_system_variables.sql_mode= MODE_ANSI; - global_system_variables.tx_isolation= ISO_SERIALIZABLE; + SYSVAR_AUTOSIZE(global_system_variables.sql_mode, MODE_ANSI); + SYSVAR_AUTOSIZE(global_system_variables.tx_isolation, ISO_SERIALIZABLE); break; case 'b': strmake_buf(mysql_home, argument); @@ -8417,22 +8656,33 @@ mysqld_get_one_option(int optid, return 1; } if (log_error_file_ptr != disabled_my_option) - log_error_file_ptr= opt_log_basename; + SYSVAR_AUTOSIZE(log_error_file_ptr, opt_log_basename); make_default_log_name(&opt_logname, ".log", false); make_default_log_name(&opt_slow_logname, "-slow.log", false); make_default_log_name(&opt_bin_logname, "-bin", true); make_default_log_name(&opt_binlog_index_name, "-bin.index", true); + mark_sys_var_value_origin(&opt_logname, sys_var::AUTO); + mark_sys_var_value_origin(&opt_slow_logname, sys_var::AUTO); + if (!opt_logname || !opt_slow_logname || !opt_bin_logname || + !opt_binlog_index_name) + return 1; + +#ifdef HAVE_REPLICATION make_default_log_name(&opt_relay_logname, "-relay-bin", true); make_default_log_name(&opt_relaylog_index_name, "-relay-bin.index", true); + mark_sys_var_value_origin(&opt_relay_logname, sys_var::AUTO); + mark_sys_var_value_origin(&opt_relaylog_index_name, sys_var::AUTO); + if (!opt_relay_logname || !opt_relaylog_index_name) + return 1; +#endif - pidfile_name_ptr= pidfile_name; + SYSVAR_AUTOSIZE(pidfile_name_ptr, pidfile_name); strmake(pidfile_name, argument, sizeof(pidfile_name)-5); strmov(fn_ext(pidfile_name),".pid"); /* check for errors */ - if (!opt_bin_logname || !opt_relaylog_index_name || ! opt_logname || - ! opt_slow_logname || !pidfile_name_ptr) + if (!pidfile_name_ptr) return 1; // out of memory error break; } @@ -8528,11 +8778,11 @@ mysqld_get_one_option(int optid, #endif /* HAVE_REPLICATION */ case (int) OPT_SAFE: opt_specialflag|= SPECIAL_SAFE_MODE | SPECIAL_NO_NEW_FUNC; - delay_key_write_options= (uint) DELAY_KEY_WRITE_NONE; - myisam_recover_options= HA_RECOVER_DEFAULT; + SYSVAR_AUTOSIZE(delay_key_write_options, (uint) DELAY_KEY_WRITE_NONE); + SYSVAR_AUTOSIZE(myisam_recover_options, HA_RECOVER_DEFAULT); ha_open_options&= ~(HA_OPEN_DELAY_KEY_WRITE); #ifdef HAVE_QUERY_CACHE - query_cache_size=0; + SYSVAR_AUTOSIZE(query_cache_size, 0); #endif sql_print_warning("The syntax '--safe-mode' is deprecated and will be " "removed in a future release."); @@ -8577,18 +8827,6 @@ mysqld_get_one_option(int optid, } break; #endif /* defined(ENABLED_DEBUG_SYNC) */ - case OPT_ENGINE_CONDITION_PUSHDOWN: - /* - The last of --engine-condition-pushdown and --optimizer_switch on - command line wins (see get_options(). - */ - if (global_system_variables.engine_condition_pushdown) - global_system_variables.optimizer_switch|= - OPTIMIZER_SWITCH_ENGINE_CONDITION_PUSHDOWN; - else - global_system_variables.optimizer_switch&= - ~OPTIMIZER_SWITCH_ENGINE_CONDITION_PUSHDOWN; - break; case OPT_LOG_ERROR: /* "No --log-error" == "write errors to stderr", @@ -8818,7 +9056,7 @@ static int get_options(int *argc_ptr, char ***argv_ptr) if ((opt_log_slow_admin_statements || opt_log_queries_not_using_indexes || opt_log_slow_slave_statements) && - !opt_slow_log) + !global_system_variables.sql_log_slow) sql_print_warning("options --log-slow-admin-statements, --log-queries-not-using-indexes and --log-slow-slave-statements have no effect if --log_slow_queries is not set"); if (global_system_variables.net_buffer_length > global_system_variables.max_allowed_packet) @@ -8900,7 +9138,7 @@ static int get_options(int *argc_ptr, char ***argv_ptr) if (mysqld_chroot) set_root(mysqld_chroot); #else - thread_handling = SCHEDULER_NO_THREADS; + SYSVAR_AUTOSIZE(thread_handling, SCHEDULER_NO_THREADS); max_allowed_packet= global_system_variables.max_allowed_packet; net_buffer_length= global_system_variables.net_buffer_length; #endif @@ -8928,7 +9166,9 @@ static int get_options(int *argc_ptr, char ***argv_ptr) debug_assert_if_crashed_table= 1; global_system_variables.long_query_time= (ulonglong) - (global_system_variables.long_query_time_double * 1e6); + (global_system_variables.long_query_time_double * 1e6 + 0.1); + global_system_variables.max_statement_time= (ulonglong) + (global_system_variables.max_statement_time_double * 1e6 + 0.1); if (opt_short_log_format) opt_specialflag|= SPECIAL_SHORT_LOG_FORMAT; @@ -8950,7 +9190,7 @@ static int get_options(int *argc_ptr, char ***argv_ptr) /* workaround: disable thread pool on XP */ if (GetProcAddress(GetModuleHandle("kernel32"),"CreateThreadpool") == 0 && thread_handling > SCHEDULER_NO_THREADS) - thread_handling = SCHEDULER_ONE_THREAD_PER_CONNECTION; + SYSVAR_AUTOSIZE(thread_handling, SCHEDULER_ONE_THREAD_PER_CONNECTION); #endif if (thread_handling <= SCHEDULER_ONE_THREAD_PER_CONNECTION) @@ -8967,10 +9207,6 @@ static int get_options(int *argc_ptr, char ***argv_ptr) &extra_connection_count); #endif - global_system_variables.engine_condition_pushdown= - MY_TEST(global_system_variables.optimizer_switch & - OPTIMIZER_SWITCH_ENGINE_CONDITION_PUSHDOWN); - opt_readonly= read_only; /* @@ -8978,24 +9214,27 @@ static int get_options(int *argc_ptr, char ***argv_ptr) value of max_allowed_packet. */ if (!max_long_data_size_used) - max_long_data_size= global_system_variables.max_allowed_packet; + SYSVAR_AUTOSIZE(max_long_data_size, + global_system_variables.max_allowed_packet); /* Remember if max_user_connections was 0 at startup */ max_user_connections_checking= global_system_variables.max_user_connections != 0; +#ifdef HAVE_REPLICATION { sys_var *max_relay_log_size_var, *max_binlog_size_var; /* If max_relay_log_size is 0, then set it to max_binlog_size */ if (!global_system_variables.max_relay_log_size) - global_system_variables.max_relay_log_size= max_binlog_size; + SYSVAR_AUTOSIZE(global_system_variables.max_relay_log_size, + max_binlog_size); /* Fix so that DEFAULT and limit checking works with max_relay_log_size (Yes, this is a hack, but it's required as the definition of max_relay_log_size allows it to be set to 0). */ - max_relay_log_size_var= intern_find_sys_var("max_relay_log_size", 0); - max_binlog_size_var= intern_find_sys_var("max_binlog_size", 0); + max_relay_log_size_var= intern_find_sys_var(STRING_WITH_LEN("max_relay_log_size")); + max_binlog_size_var= intern_find_sys_var(STRING_WITH_LEN("max_binlog_size")); if (max_binlog_size_var && max_relay_log_size_var) { max_relay_log_size_var->option.min_value= @@ -9004,12 +9243,13 @@ static int get_options(int *argc_ptr, char ***argv_ptr) max_binlog_size_var->option.def_value; } } +#endif /* Ensure that some variables are not set higher than needed */ if (back_log > max_connections) - back_log= max_connections; + SYSVAR_AUTOSIZE(back_log, max_connections); if (thread_cache_size > max_connections) - thread_cache_size= max_connections; + SYSVAR_AUTOSIZE(thread_cache_size, max_connections); return 0; } @@ -9029,11 +9269,14 @@ void set_server_version(void) #ifdef EMBEDDED_LIBRARY end= strmov(end, "-embedded"); #endif +#ifdef WITH_WSREP + end= strmov(end, "-wsrep"); +#endif #ifndef DBUG_OFF if (!strstr(MYSQL_SERVER_SUFFIX_STR, "-debug")) end= strmov(end, "-debug"); #endif - if (opt_log || opt_slow_log || opt_bin_log) + if (opt_log || global_system_variables.sql_log_slow || opt_bin_log) strmov(end, "-log"); // This may slow down system } @@ -9174,13 +9417,18 @@ static int fix_paths(void) /* If --character-sets-dir isn't given, use shared library dir */ if (charsets_dir) + { strmake_buf(mysql_charsets_dir, charsets_dir); + charsets_dir= mysql_charsets_dir; + } else + { strxnmov(mysql_charsets_dir, sizeof(mysql_charsets_dir)-1, buff, CHARSET_DIR, NullS); + SYSVAR_AUTOSIZE(charsets_dir, mysql_charsets_dir); + } (void) my_load_path(mysql_charsets_dir, mysql_charsets_dir, buff); convert_dirname(mysql_charsets_dir, mysql_charsets_dir, NullS); - charsets_dir=mysql_charsets_dir; if (init_tmpdir(&mysql_tmpdir_list, opt_mysql_tmpdir)) DBUG_RETURN(1); @@ -9188,7 +9436,7 @@ static int fix_paths(void) opt_mysql_tmpdir= mysql_tmpdir; #ifdef HAVE_REPLICATION if (!slave_load_tmpdir) - slave_load_tmpdir= mysql_tmpdir; + SYSVAR_AUTOSIZE(slave_load_tmpdir, mysql_tmpdir); #endif /* HAVE_REPLICATION */ /* Convert the secure-file-priv option to system format, allowing @@ -9321,6 +9569,10 @@ void refresh_status(THD *thd) /* Reset some global variables */ reset_status_vars(); +#ifdef WITH_WSREP + if (WSREP_ON) + wsrep->stats_reset(wsrep); +#endif /* WITH_WSREP */ /* Reset the counters of all key caches (default and named). */ process_key_caches(reset_key_cache_counters, 0); @@ -9372,6 +9624,7 @@ static PSI_file_info all_server_files[]= }; #endif /* HAVE_PSI_INTERFACE */ +PSI_stage_info stage_after_apply_event= { 0, "after apply log event", 0}; PSI_stage_info stage_after_create= { 0, "After create", 0}; PSI_stage_info stage_after_opening_tables= { 0, "After opening tables", 0}; PSI_stage_info stage_after_table_lock= { 0, "After table lock", 0}; @@ -9379,6 +9632,7 @@ PSI_stage_info stage_allocating_local_table= { 0, "allocating local table", 0}; PSI_stage_info stage_alter_inplace_prepare= { 0, "preparing for alter table", 0}; PSI_stage_info stage_alter_inplace= { 0, "altering table", 0}; PSI_stage_info stage_alter_inplace_commit= { 0, "committing alter table to storage engine", 0}; +PSI_stage_info stage_apply_event= { 0, "apply log event", 0}; PSI_stage_info stage_changing_master= { 0, "Changing master", 0}; PSI_stage_info stage_checking_master_version= { 0, "Checking master version", 0}; PSI_stage_info stage_checking_permissions= { 0, "checking permissions", 0}; @@ -9452,6 +9706,7 @@ PSI_stage_info stage_sql_thd_waiting_until_delay= { 0, "Waiting until MASTER_DEL PSI_stage_info stage_storing_result_in_query_cache= { 0, "storing result in query cache", 0}; PSI_stage_info stage_storing_row_into_queue= { 0, "storing row into queue", 0}; PSI_stage_info stage_system_lock= { 0, "System lock", 0}; +PSI_stage_info stage_unlocking_tables= { 0, "Unlocking tables", 0}; PSI_stage_info stage_update= { 0, "update", 0}; PSI_stage_info stage_updating= { 0, "updating", 0}; PSI_stage_info stage_updating_main_table= { 0, "updating main table", 0}; @@ -9496,6 +9751,7 @@ PSI_stage_info stage_gtid_wait_other_connection= { 0, "Waiting for other master PSI_stage_info *all_server_stages[]= { + & stage_after_apply_event, & stage_after_create, & stage_after_opening_tables, & stage_after_table_lock, @@ -9503,6 +9759,7 @@ PSI_stage_info *all_server_stages[]= & stage_alter_inplace, & stage_alter_inplace_commit, & stage_alter_inplace_prepare, + & stage_apply_event, & stage_binlog_processing_checkpoint_notify, & stage_binlog_stopping_background_thread, & stage_binlog_waiting_background_tasks, @@ -9584,6 +9841,7 @@ PSI_stage_info *all_server_stages[]= & stage_storing_result_in_query_cache, & stage_storing_row_into_queue, & stage_system_lock, + & stage_unlocking_tables, & stage_update, & stage_updating, & stage_updating_main_table, diff --git a/sql/mysqld.h b/sql/mysqld.h index 5a7419bf32b..1f8b6d3eef9 100644 --- a/sql/mysqld.h +++ b/sql/mysqld.h @@ -25,6 +25,7 @@ #include "sql_list.h" /* I_List */ #include "sql_cmd.h" #include <my_rnd.h> +#include "my_pthread.h" class THD; struct handlerton; @@ -80,7 +81,7 @@ extern CHARSET_INFO *character_set_filesystem; extern MY_BITMAP temp_pool; extern bool opt_large_files, server_id_supplied; extern bool opt_update_log, opt_bin_log, opt_error_log; -extern my_bool opt_log, opt_slow_log, opt_bootstrap; +extern my_bool opt_log, opt_bootstrap; extern my_bool opt_backup_history_log; extern my_bool opt_backup_progress_log; extern ulonglong log_output_options; @@ -328,6 +329,7 @@ void init_server_psi_keys(); MAINTAINER: Please keep this list in order, to limit merge collisions. Hint: grep PSI_stage_info | sort -u */ +extern PSI_stage_info stage_apply_event; extern PSI_stage_info stage_after_create; extern PSI_stage_info stage_after_opening_tables; extern PSI_stage_info stage_after_table_lock; @@ -335,6 +337,7 @@ extern PSI_stage_info stage_allocating_local_table; extern PSI_stage_info stage_alter_inplace_prepare; extern PSI_stage_info stage_alter_inplace; extern PSI_stage_info stage_alter_inplace_commit; +extern PSI_stage_info stage_after_apply_event; extern PSI_stage_info stage_changing_master; extern PSI_stage_info stage_checking_master_version; extern PSI_stage_info stage_checking_permissions; @@ -408,6 +411,7 @@ extern PSI_stage_info stage_statistics; extern PSI_stage_info stage_storing_result_in_query_cache; extern PSI_stage_info stage_storing_row_into_queue; extern PSI_stage_info stage_system_lock; +extern PSI_stage_info stage_unlocking_tables; extern PSI_stage_info stage_update; extern PSI_stage_info stage_updating; extern PSI_stage_info stage_updating_main_table; @@ -554,7 +558,6 @@ enum options_mysqld OPT_DEBUG_SYNC_TIMEOUT, OPT_DELAY_KEY_WRITE_ALL, OPT_DEPRECATED_OPTION, - OPT_ENGINE_CONDITION_PUSHDOWN, OPT_IGNORE_DB_DIRECTORY, OPT_ISAM_LOG, OPT_KEY_BUFFER_SIZE, diff --git a/sql/opt_range.cc b/sql/opt_range.cc index 88d8b0551cb..208597590f3 100644 --- a/sql/opt_range.cc +++ b/sql/opt_range.cc @@ -829,6 +829,12 @@ public: */ bool remove_jump_scans; + /* + TRUE <=> Range analyzer should remove parts of condition that are found + to be always FALSE. + */ + bool remove_false_where_parts; + /* used_key_no -> table_key_no translation table. Only makes sense if using_real_indexes==TRUE @@ -908,7 +914,7 @@ static SEL_TREE * get_mm_parts(RANGE_OPT_PARAM *param,COND *cond_func,Field *fie static SEL_ARG *get_mm_leaf(RANGE_OPT_PARAM *param,COND *cond_func,Field *field, KEY_PART *key_part, Item_func::Functype type,Item *value); -static SEL_TREE *get_mm_tree(RANGE_OPT_PARAM *param,COND *cond); +static SEL_TREE *get_mm_tree(RANGE_OPT_PARAM *param, Item **cond); static bool is_key_scan_ror(PARAM *param, uint keynr, uint8 nparts); static ha_rows check_quick_select(PARAM *param, uint idx, bool index_only, @@ -1451,7 +1457,7 @@ mem_err: inline void imerge_list_and_list(List<SEL_IMERGE> *im1, List<SEL_IMERGE> *im2) { - im1->concat(im2); + im1->append(im2); } @@ -1627,7 +1633,7 @@ int imerge_list_or_tree(RANGE_OPT_PARAM *param, it.remove(); } - merges->concat(&additional_merges); + merges->append(&additional_merges); return merges->is_empty(); } @@ -2941,7 +2947,8 @@ static int fill_used_fields_bitmap(PARAM *param) int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use, table_map prev_tables, ha_rows limit, bool force_quick_range, - bool ordered_output) + bool ordered_output, + bool remove_false_parts_of_where) { uint idx; double scan_time; @@ -3000,6 +3007,7 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use, param.imerge_cost_buff_size= 0; param.using_real_indexes= TRUE; param.remove_jump_scans= TRUE; + param.remove_false_where_parts= remove_false_parts_of_where; param.force_default_mrr= ordered_output; param.possible_keys.clear_all(); @@ -3056,7 +3064,7 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use, param.alloced_sel_args= 0; /* Calculate cost of full index read for the shortest covering index */ - if (!head->covering_keys.is_clear_all()) + if (!force_quick_range && !head->covering_keys.is_clear_all()) { int key_for_use= find_shortest_key(head, &head->covering_keys); double key_read_time= head->file->keyread_time(key_for_use, 1, records) + @@ -3073,7 +3081,7 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use, if (cond) { - if ((tree= get_mm_tree(¶m,cond))) + if ((tree= get_mm_tree(¶m, &cond))) { if (tree->type == SEL_TREE::IMPOSSIBLE) { @@ -3415,7 +3423,7 @@ double records_in_column_ranges(PARAM *param, uint idx, TRUE otherwise */ -bool calculate_cond_selectivity_for_table(THD *thd, TABLE *table, Item *cond) +bool calculate_cond_selectivity_for_table(THD *thd, TABLE *table, Item **cond) { uint keynr; uint max_quick_key_parts= 0; @@ -3425,7 +3433,7 @@ bool calculate_cond_selectivity_for_table(THD *thd, TABLE *table, Item *cond) table->cond_selectivity= 1.0; - if (!cond || table_records == 0) + if (!*cond || table_records == 0) DBUG_RETURN(FALSE); if (table->pos_in_table_list->schema_table) @@ -3529,6 +3537,7 @@ bool calculate_cond_selectivity_for_table(THD *thd, TABLE *table, Item *cond) param.old_root= thd->mem_root; param.table= table; param.is_ror_scan= FALSE; + param.remove_false_where_parts= true; if (create_key_parts_for_pseudo_indexes(¶m, used_fields)) goto free_alloc; @@ -3606,7 +3615,7 @@ bool calculate_cond_selectivity_for_table(THD *thd, TABLE *table, Item *cond) ulong check_rows= MY_MIN(thd->variables.optimizer_selectivity_sampling_limit, (ulong) (table_records * SELECTIVITY_SAMPLING_SHARE)); - if (cond && check_rows > SELECTIVITY_SAMPLING_THRESHOLD && + if (*cond && check_rows > SELECTIVITY_SAMPLING_THRESHOLD && thd->variables.optimizer_use_condition_selectivity > 4) { find_selective_predicates_list_processor_data *dt= @@ -3617,8 +3626,8 @@ bool calculate_cond_selectivity_for_table(THD *thd, TABLE *table, Item *cond) DBUG_RETURN(TRUE); dt->list.empty(); dt->table= table; - if (cond->walk(&Item::find_selective_predicates_list_processor, 0, - (uchar*) dt)) + if ((*cond)->walk(&Item::find_selective_predicates_list_processor, 0, + (uchar*) dt)) DBUG_RETURN(TRUE); if (dt->list.elements > 0) { @@ -3951,6 +3960,8 @@ bool prune_partitions(THD *thd, TABLE *table, Item *pprune_cond) /* range_par->cond doesn't need initialization */ range_par->prev_tables= range_par->read_tables= 0; range_par->current_table= table->map; + /* It should be possible to switch the following ON: */ + range_par->remove_false_where_parts= false; range_par->keys= 1; // one index range_par->using_real_indexes= FALSE; @@ -3967,7 +3978,7 @@ bool prune_partitions(THD *thd, TABLE *table, Item *pprune_cond) SEL_TREE *tree; int res; - tree= get_mm_tree(range_par, pprune_cond); + tree= get_mm_tree(range_par, &pprune_cond); if (!tree) goto all_used; @@ -7855,15 +7866,33 @@ static SEL_TREE *get_full_func_mm_tree(RANGE_OPT_PARAM *param, DBUG_RETURN(ftree); } - /* make a select tree of all keys in condition */ +/* + make a select tree of all keys in condition + + @param param Context + @param cond INOUT condition to perform range analysis on. + + @detail + Range analysis may infer that some conditions are never true. + - If the condition is never true, SEL_TREE(type=IMPOSSIBLE) is returned + - if parts of condition are never true, the function may remove these parts + from the condition 'cond'. Sometimes, this will cause the condition to + be substituted for something else. + + + @return + NULL - Could not infer anything from condition cond. + SEL_TREE with type=IMPOSSIBLE - condition can never be true. +*/ -static SEL_TREE *get_mm_tree(RANGE_OPT_PARAM *param,COND *cond) +static SEL_TREE *get_mm_tree(RANGE_OPT_PARAM *param, Item **cond_ptr) { SEL_TREE *tree=0; SEL_TREE *ftree= 0; Item_field *field_item= 0; bool inv= FALSE; Item *value= 0; + Item *cond= *cond_ptr; DBUG_ENTER("get_mm_tree"); if (cond->type() == Item::COND_ITEM) @@ -7876,31 +7905,75 @@ static SEL_TREE *get_mm_tree(RANGE_OPT_PARAM *param,COND *cond) Item *item; while ((item=li++)) { - SEL_TREE *new_tree= get_mm_tree(param,item); + SEL_TREE *new_tree= get_mm_tree(param,li.ref()); if (param->statement_should_be_aborted()) DBUG_RETURN(NULL); tree= tree_and(param,tree,new_tree); if (tree && tree->type == SEL_TREE::IMPOSSIBLE) + { + /* + Do not remove 'item' from 'cond'. We return a SEL_TREE::IMPOSSIBLE + and that is sufficient for the caller to see that the whole + condition is never true. + */ break; + } } } else { // COND OR - tree= get_mm_tree(param,li++); + bool replace_cond= false; + Item *replacement_item= li++; + tree= get_mm_tree(param, li.ref()); if (param->statement_should_be_aborted()) DBUG_RETURN(NULL); if (tree) { + if (tree->type == SEL_TREE::IMPOSSIBLE && + param->remove_false_where_parts) + { + /* See the other li.remove() call below */ + li.remove(); + if (((Item_cond*)cond)->argument_list()->elements <= 1) + replace_cond= true; + } + Item *item; while ((item=li++)) { - SEL_TREE *new_tree=get_mm_tree(param,item); + SEL_TREE *new_tree=get_mm_tree(param,li.ref()); if (new_tree == NULL || param->statement_should_be_aborted()) DBUG_RETURN(NULL); tree= tree_or(param,tree,new_tree); if (tree == NULL || tree->type == SEL_TREE::ALWAYS) + { + replacement_item= *li.ref(); break; + } + + if (new_tree && new_tree->type == SEL_TREE::IMPOSSIBLE && + param->remove_false_where_parts) + { + /* + This is a condition in form + + cond = item1 OR ... OR item_i OR ... itemN + + and item_i produces SEL_TREE(IMPOSSIBLE). We should remove item_i + from cond. This may cause 'cond' to become a degenerate, + one-way OR. In that case, we replace 'cond' with the remaining + item_i. + */ + li.remove(); + if (((Item_cond*)cond)->argument_list()->elements <= 1) + replace_cond= true; + } + else + replacement_item= *li.ref(); } + + if (replace_cond) + *cond_ptr= replacement_item; } } DBUG_RETURN(tree); @@ -8132,8 +8205,15 @@ get_mm_leaf(RANGE_OPT_PARAM *param, COND *conf_func, Field *field, param->thd->mem_root= param->old_root; if (!value) // IS NULL or IS NOT NULL { - if (field->table->maybe_null) // Can't use a key on this - goto end; + /* + No check for field->table->maybe_null. It's perfecly fine to use range + access for cases like + + SELECT * FROM t1 LEFT JOIN t2 ON t2.key IS [NOT] NULL + + ON expression is evaluated before considering NULL-complemented rows, so + IS [NOT] NULL has regular semantics. + */ if (!maybe_null) // Not null field { if (type == Item_func::ISNULL_FUNC) @@ -10610,6 +10690,7 @@ ha_rows check_quick_select(PARAM *param, uint idx, bool index_only, param->table->quick_condition_rows= MY_MIN(param->table->quick_condition_rows, rows); param->table->quick_rows[keynr]= rows; + param->table->quick_costs[keynr]= cost->total_cost(); } } /* Figure out if the key scan is ROR (returns rows in ROWID order) or not */ @@ -11625,14 +11706,6 @@ int QUICK_RANGE_SELECT::reset() mrr_buf_desc->buffer= mrange_buff; mrr_buf_desc->buffer_end= mrange_buff + buf_size; mrr_buf_desc->end_of_used_area= mrange_buff; -#ifdef HAVE_valgrind - /* - We need this until ndb will use the buffer efficiently - (Now ndb stores complete row in here, instead of only the used fields - which gives us valgrind warnings in compare_record[]) - */ - bzero((char*) mrange_buff, buf_size); -#endif } if (!mrr_buf_desc) diff --git a/sql/opt_range.h b/sql/opt_range.h index 1ca245ea420..f602408ea82 100644 --- a/sql/opt_range.h +++ b/sql/opt_range.h @@ -994,7 +994,7 @@ class SQL_SELECT :public Sql_alloc { { key_map tmp; tmp.set_all(); - return test_quick_select(thd, tmp, 0, limit, force_quick_range, FALSE) < 0; + return test_quick_select(thd, tmp, 0, limit, force_quick_range, FALSE, FALSE) < 0; } /* RETURN @@ -1011,7 +1011,7 @@ class SQL_SELECT :public Sql_alloc { } int test_quick_select(THD *thd, key_map keys, table_map prev_tables, ha_rows limit, bool force_quick_range, - bool ordered_output); + bool ordered_output, bool remove_false_parts_of_where); }; @@ -1036,7 +1036,7 @@ SQL_SELECT *make_select(TABLE *head, table_map const_tables, table_map read_tables, COND *conds, bool allow_null_cond, int *error); -bool calculate_cond_selectivity_for_table(THD *thd, TABLE *table, Item *cond); +bool calculate_cond_selectivity_for_table(THD *thd, TABLE *table, Item **cond); #ifdef WITH_PARTITION_STORAGE_ENGINE bool prune_partitions(THD *thd, TABLE *table, Item *pprune_cond); diff --git a/sql/opt_subselect.cc b/sql/opt_subselect.cc index 6e65b5ea177..4902ae8b028 100644 --- a/sql/opt_subselect.cc +++ b/sql/opt_subselect.cc @@ -1515,7 +1515,7 @@ static bool convert_subq_to_sj(JOIN *parent_join, Item_in_subselect *subq_pred) NOTE: We actually insert them at the front! That's because the order is reversed in this list. */ - parent_lex->leaf_tables.concat(&subq_lex->leaf_tables); + parent_lex->leaf_tables.append(&subq_lex->leaf_tables); if (subq_lex->options & OPTION_SCHEMA_TABLE) parent_lex->options |= OPTION_SCHEMA_TABLE; diff --git a/sql/partition_info.cc b/sql/partition_info.cc index 98e796879ad..a753c5052bd 100644 --- a/sql/partition_info.cc +++ b/sql/partition_info.cc @@ -226,11 +226,6 @@ bool partition_info::set_partition_bitmaps(TABLE_LIST *table_list) { if (table->s->db_type()->partition_flags() & HA_USE_AUTO_PARTITION) { - /* - Don't allow PARTITION () clause on a NDB tables yet. - TODO: Add partition name handling to NDB/partition_info. - which is currently ha_partition specific. - */ my_error(ER_PARTITION_CLAUSE_ON_NONPARTITIONED, MYF(0)); DBUG_RETURN(true); } @@ -286,7 +281,7 @@ bool partition_info::can_prune_insert(THD* thd, DBUG_ENTER("partition_info::can_prune_insert"); if (table->s->db_type()->partition_flags() & HA_USE_AUTO_PARTITION) - DBUG_RETURN(false); /* Should not insert prune NDB tables */ + DBUG_RETURN(false); /* If under LOCK TABLES pruning will skip start_stmt instead of external_lock @@ -1110,14 +1105,12 @@ static bool check_engine_condition(partition_element *p_elem, Current check verifies only that all handlers are the same. Later this check will be more sophisticated. (specified partition handler ) specified table handler - (NDB, NDB) NDB OK (MYISAM, MYISAM) - OK (MYISAM, -) - NOT OK (MYISAM, -) MYISAM OK (- , MYISAM) - NOT OK (- , -) MYISAM OK (-,-) - OK - (NDB, MYISAM) * NOT OK */ bool partition_info::check_engine_mix(handlerton *engine_type, diff --git a/sql/protocol.cc b/sql/protocol.cc index 2400dadfadc..fde90102e61 100644 --- a/sql/protocol.cc +++ b/sql/protocol.cc @@ -488,6 +488,8 @@ static uchar *net_store_length_fast(uchar *packet, uint length) void Protocol::end_statement() { + /* sanity check*/ + DBUG_ASSERT_IF_WSREP(!(WSREP(thd) && thd->wsrep_conflict_state == REPLAYING)); DBUG_ENTER("Protocol::end_statement"); DBUG_ASSERT(! thd->get_stmt_da()->is_sent()); bool error= FALSE; diff --git a/sql/protocol.h b/sql/protocol.h index 5129f68d706..048db940804 100644 --- a/sql/protocol.h +++ b/sql/protocol.h @@ -243,6 +243,47 @@ public: return 0; } + void prepare_for_resend() + { +#ifndef DBUG_OFF + field_pos= 0; +#endif + } + + /* + Provide dummy overrides for any storage methods so that we + avoid allocating and copying of data + */ + virtual bool store_null() + { return false; } + virtual bool store_tiny(longlong from) + { return false; } + virtual bool store_short(longlong from) + { return false; } + virtual bool store_long(longlong from) + { return false; } + virtual bool store_longlong(longlong from, bool unsigned_flag) + { return false; } + virtual bool store_decimal(const my_decimal *) + { return false; } + virtual bool store(const char *from, size_t length, CHARSET_INFO *cs) + { return false; } + virtual bool store(const char *from, size_t length, + CHARSET_INFO *fromcs, CHARSET_INFO *tocs) + { return false; } + virtual bool store(MYSQL_TIME *time, int decimals) + { return false; } + virtual bool store_date(MYSQL_TIME *time) + { return false; } + virtual bool store_time(MYSQL_TIME *time, int decimals) + { return false; } + virtual bool store(float nr, uint32 decimals, String *buffer) + { return false; } + virtual bool store(double from, uint32 decimals, String *buffer) + { return false; } + virtual bool store(Field *field) + { return false; } + }; diff --git a/sql/records.cc b/sql/records.cc index 1b230c41156..0c6ecca9a58 100644 --- a/sql/records.cc +++ b/sql/records.cc @@ -287,8 +287,8 @@ bool init_read_record(READ_RECORD *info,THD *thd, TABLE *table, thd->variables.read_buff_size); } /* Condition pushdown to storage engine */ - if ((thd->variables.optimizer_switch & - OPTIMIZER_SWITCH_ENGINE_CONDITION_PUSHDOWN) && + if ((table->file->ha_table_flags() & + HA_CAN_TABLE_CONDITION_PUSHDOWN) && select && select->cond && (select->cond->used_tables() & table->map) && !table->file->pushed_cond) diff --git a/sql/rpl_constants.h b/sql/rpl_constants.h index f83588ce321..cc6fb8145ad 100644 --- a/sql/rpl_constants.h +++ b/sql/rpl_constants.h @@ -36,8 +36,6 @@ enum Incident { Enumeration of the reserved formats of Binlog extra row information */ enum ExtraRowInfoFormat { - /** Ndb format */ - ERIF_NDB = 0, /** Reserved formats 0 -> 63 inclusive */ ERIF_LASTRESERVED = 63, diff --git a/sql/rpl_handler.cc b/sql/rpl_handler.cc index 2777dabf451..a706fcd37ee 100644 --- a/sql/rpl_handler.cc +++ b/sql/rpl_handler.cc @@ -188,8 +188,8 @@ void delegates_destroy() } s; \ DYNAMIC_ARRAY *plugins= &s.plugins; \ plugin_ref *plugins_buffer= s.plugins_buffer; \ - my_init_dynamic_array2(plugins, sizeof(plugin_ref), \ - plugins_buffer, 8, 8, MYF(0)); \ + init_dynamic_array2(plugins, sizeof(plugin_ref), \ + plugins_buffer, 8, 8, MYF(0)); \ read_lock(); \ Observer_info_iterator iter= observer_info_iter(); \ Observer_info *info= iter++; \ diff --git a/sql/rpl_mi.cc b/sql/rpl_mi.cc index 7764400becb..977dec96982 100644 --- a/sql/rpl_mi.cc +++ b/sql/rpl_mi.cc @@ -1078,7 +1078,7 @@ bool Master_info_index::write_master_name_to_index_file(LEX_STRING *name, */ Master_info * -Master_info_index::get_master_info(LEX_STRING *connection_name, +Master_info_index::get_master_info(const LEX_STRING *connection_name, Sql_condition::enum_warning_level warning) { Master_info *mi; diff --git a/sql/rpl_mi.h b/sql/rpl_mi.h index af739b1dad4..f20c2e21a5f 100644 --- a/sql/rpl_mi.h +++ b/sql/rpl_mi.h @@ -209,7 +209,7 @@ public: const char *host, uint port); bool add_master_info(Master_info *mi, bool write_to_file); bool remove_master_info(LEX_STRING *connection_name); - Master_info *get_master_info(LEX_STRING *connection_name, + Master_info *get_master_info(const LEX_STRING *connection_name, Sql_condition::enum_warning_level warning); bool give_error_if_slave_running(); bool start_all_slaves(THD *thd); diff --git a/sql/rpl_parallel.cc b/sql/rpl_parallel.cc index e72d3470a7f..90ee2360eb7 100644 --- a/sql/rpl_parallel.cc +++ b/sql/rpl_parallel.cc @@ -235,7 +235,7 @@ handle_rpl_parallel_thread(void *arg) thd->security_ctx->skip_grants(); thd->variables.max_allowed_packet= slave_max_allowed_packet; thd->slave_thread= 1; - thd->enable_slow_log= opt_log_slow_slave_statements; + thd->variables.sql_log_slow= opt_log_slow_slave_statements; thd->variables.log_slow_filter= global_system_variables.log_slow_filter; set_slave_thread_options(thd); thd->client_capabilities = CLIENT_LOCAL_FILES; diff --git a/sql/rpl_record.cc b/sql/rpl_record.cc index a6d93d10f11..b1cca04d947 100644 --- a/sql/rpl_record.cc +++ b/sql/rpl_record.cc @@ -305,9 +305,8 @@ unpack_row(rpl_group_info *rgi, normal unpack operation. */ uint16 const metadata= tabledef->field_metadata(i); -#ifndef DBUG_OFF uchar const *const old_pack_ptr= pack_ptr; -#endif + pack_ptr= f->unpack(f->ptr, pack_ptr, row_end, metadata); DBUG_PRINT("debug", ("field: %s; metadata: 0x%x;" " pack_ptr: 0x%lx; pack_ptr': 0x%lx; bytes: %d", @@ -316,6 +315,22 @@ unpack_row(rpl_group_info *rgi, (int) (pack_ptr - old_pack_ptr))); if (!pack_ptr) { + if (WSREP_ON) + { + /* + Debug message to troubleshoot bug: + https://mariadb.atlassian.net/browse/MDEV-4404 + Galera Node throws "Could not read field" error and drops out of cluster + */ + WSREP_WARN("ROW event unpack field: %s metadata: 0x%x;" + " pack_ptr: 0x%lx; conv_table %p conv_field %p table %s" + " row_end: 0x%lx", + f->field_name, metadata, + (ulong) old_pack_ptr, conv_table, conv_field, + (table_found) ? "found" : "not found", (ulong)row_end + ); + } + rgi->rli->report(ERROR_LEVEL, ER_SLAVE_CORRUPT_EVENT, "Could not read field '%s' of table '%s.%s'", f->field_name, table->s->db.str, diff --git a/sql/set_var.cc b/sql/set_var.cc index 5c1e00af33e..bae65118112 100644 --- a/sql/set_var.cc +++ b/sql/set_var.cc @@ -35,6 +35,7 @@ #include "tztime.h" // my_tz_find, my_tz_SYSTEM, struct Time_zone #include "sql_acl.h" // SUPER_ACL #include "sql_select.h" // free_underlaid_joins +#include "sql_show.h" #include "sql_view.h" // updatable_views_with_limit_typelib #include "lock.h" // lock_global_read_lock, // make_global_read_lock_block_commit, @@ -142,8 +143,7 @@ sys_var::sys_var(sys_var_chain *chain, const char *name_arg, on_check_function on_check_func, on_update_function on_update_func, const char *substitute) : - next(0), - binlog_status(binlog_status_arg), + next(0), binlog_status(binlog_status_arg), value_origin(COMPILE_TIME), flags(flags_arg), show_val_type(show_val_type_arg), guard(lock), offset(off), on_check(on_check_func), on_update(on_update_func), deprecation_substitute(substitute), @@ -171,6 +171,7 @@ sys_var::sys_var(sys_var_chain *chain, const char *name_arg, option.arg_type= getopt_arg_type; option.value= (uchar **)global_var_ptr(); option.def_value= def_val; + option.app_type= this; if (chain->last) chain->last->next= this; @@ -192,6 +193,7 @@ bool sys_var::update(THD *thd, set_var *var) */ AutoWLock lock1(&PLock_global_system_variables); AutoWLock lock2(guard); + value_origin= SQL; return global_update(thd, var) || (on_update && on_update(this, thd, OPT_GLOBAL)); } @@ -200,12 +202,12 @@ bool sys_var::update(THD *thd, set_var *var) (on_update && on_update(this, thd, OPT_SESSION)); } -uchar *sys_var::session_value_ptr(THD *thd, LEX_STRING *base) +uchar *sys_var::session_value_ptr(THD *thd, const LEX_STRING *base) { return session_var_ptr(thd); } -uchar *sys_var::global_value_ptr(THD *thd, LEX_STRING *base) +uchar *sys_var::global_value_ptr(THD *thd, const LEX_STRING *base) { return global_var_ptr(); } @@ -238,8 +240,9 @@ bool sys_var::check(THD *thd, set_var *var) return false; } -uchar *sys_var::value_ptr(THD *thd, enum_var_type type, LEX_STRING *base) +uchar *sys_var::value_ptr(THD *thd, enum_var_type type, const LEX_STRING *base) { + DBUG_ASSERT(base); if (type == OPT_GLOBAL || scope() == GLOBAL) { mysql_mutex_assert_owner(&LOCK_global_system_variables); @@ -261,12 +264,10 @@ bool sys_var::set_default(THD *thd, set_var* var) } -#define do_num_val(T,CMD) \ -do { \ - mysql_mutex_lock(&LOCK_global_system_variables); \ - T val= *(T*) value_ptr(thd, type, base); \ - mysql_mutex_unlock(&LOCK_global_system_variables); \ - CMD; \ +#define do_num_val(T,CMD) \ +do { \ + T val= *(T*) value; \ + CMD; \ } while (0) #define case_for_integers(CMD) \ @@ -276,39 +277,38 @@ do { \ case SHOW_UINT: do_num_val (uint,CMD); \ case SHOW_ULONG: do_num_val (ulong,CMD); \ case SHOW_ULONGLONG:do_num_val (ulonglong,CMD); \ - case SHOW_HA_ROWS: do_num_val (ha_rows,CMD); \ - case SHOW_BOOL: do_num_val (bool,CMD); \ - case SHOW_MY_BOOL: do_num_val (my_bool,CMD) + case SHOW_HA_ROWS: do_num_val (ha_rows,CMD); -#define case_for_double(CMD) \ +#define case_for_double(CMD) \ case SHOW_DOUBLE: do_num_val (double,CMD) -#define case_get_string_as_lex_string \ - case SHOW_CHAR: \ - mysql_mutex_lock(&LOCK_global_system_variables); \ - sval.str= (char*) value_ptr(thd, type, base); \ - sval.length= sval.str ? strlen(sval.str) : 0; \ - break; \ - case SHOW_CHAR_PTR: \ - mysql_mutex_lock(&LOCK_global_system_variables); \ - sval.str= *(char**) value_ptr(thd, type, base); \ - sval.length= sval.str ? strlen(sval.str) : 0; \ - break; \ - case SHOW_LEX_STRING: \ - mysql_mutex_lock(&LOCK_global_system_variables); \ - sval= *(LEX_STRING *) value_ptr(thd, type, base); \ +#define case_get_string_as_lex_string \ + case SHOW_CHAR: \ + sval.str= (char*) value; \ + sval.length= sval.str ? strlen(sval.str) : 0; \ + break; \ + case SHOW_CHAR_PTR: \ + sval.str= *(char**) value; \ + sval.length= sval.str ? strlen(sval.str) : 0; \ + break; \ + case SHOW_LEX_STRING: \ + sval= *(LEX_STRING *) value; \ break longlong sys_var::val_int(bool *is_null, - THD *thd, enum_var_type type, LEX_STRING *base) + THD *thd, enum_var_type type, const LEX_STRING *base) { LEX_STRING sval; + AutoWLock lock(&PLock_global_system_variables); + const uchar *value= value_ptr(thd, type, base); *is_null= false; + switch (show_type()) { case_get_string_as_lex_string; case_for_integers(return val); case_for_double(return (longlong) val); + case SHOW_MY_BOOL: return *(my_bool*)value; default: my_error(ER_VAR_CANT_BE_READ, MYF(0), name.str); return 0; @@ -316,44 +316,63 @@ longlong sys_var::val_int(bool *is_null, longlong ret= 0; if (!(*is_null= !sval.str)) - ret= longlong_from_string_with_check(system_charset_info, + ret= longlong_from_string_with_check(charset(thd), sval.str, sval.str + sval.length); - mysql_mutex_unlock(&LOCK_global_system_variables); return ret; } -String *sys_var::val_str(String *str, - THD *thd, enum_var_type type, LEX_STRING *base) +String *sys_var::val_str_nolock(String *str, THD *thd, const uchar *value) { + static LEX_STRING bools[]= + { + { C_STRING_WITH_LEN("OFF") }, + { C_STRING_WITH_LEN("ON") } + }; + LEX_STRING sval; switch (show_type()) { case_get_string_as_lex_string; - case_for_integers(return str->set((ulonglong)val, system_charset_info) ? 0 : str); + case_for_integers(return str->set(val, system_charset_info) ? 0 : str); case_for_double(return str->set_real(val, 6, system_charset_info) ? 0 : str); + case SHOW_MY_BOOL: + sval= bools[(int)*(my_bool*)value]; + break; default: my_error(ER_VAR_CANT_BE_READ, MYF(0), name.str); return 0; } - if (!sval.str || str->copy(sval.str, sval.length, system_charset_info)) + if (!sval.str || str->copy(sval.str, sval.length, charset(thd))) str= NULL; - mysql_mutex_unlock(&LOCK_global_system_variables); return str; } +String *sys_var::val_str(String *str, + THD *thd, enum_var_type type, const LEX_STRING *base) +{ + AutoWLock lock(&PLock_global_system_variables); + const uchar *value= value_ptr(thd, type, base); + return val_str_nolock(str, thd, value); +} + + double sys_var::val_real(bool *is_null, - THD *thd, enum_var_type type, LEX_STRING *base) + THD *thd, enum_var_type type, const LEX_STRING *base) { LEX_STRING sval; + AutoWLock lock(&PLock_global_system_variables); + const uchar *value= value_ptr(thd, type, base); *is_null= false; + switch (show_type()) { case_get_string_as_lex_string; case_for_integers(return val); case_for_double(return val); + case SHOW_MY_BOOL: return *(my_bool*)value; default: my_error(ER_VAR_CANT_BE_READ, MYF(0), name.str); return 0; @@ -361,9 +380,8 @@ double sys_var::val_real(bool *is_null, double ret= 0; if (!(*is_null= !sval.str)) - ret= double_from_string_with_check(system_charset_info, + ret= double_from_string_with_check(charset(thd), sval.str, sval.str + sval.length); - mysql_mutex_unlock(&LOCK_global_system_variables); return ret; } @@ -453,6 +471,7 @@ CHARSET_INFO *sys_var::charset(THD *thd) system_charset_info; } + typedef struct old_names_map_st { const char *old_name; @@ -565,7 +584,7 @@ static int show_cmp(SHOW_VAR *a, SHOW_VAR *b) @param thd current thread @param sorted If TRUE, the system variables should be sorted - @param type OPT_GLOBAL or OPT_SESSION for SHOW GLOBAL|SESSION VARIABLES + @param scope OPT_GLOBAL or OPT_SESSION for SHOW GLOBAL|SESSION VARIABLES @retval pointer Array of SHOW_VAR elements for display @@ -573,7 +592,7 @@ static int show_cmp(SHOW_VAR *a, SHOW_VAR *b) NULL FAILURE */ -SHOW_VAR* enumerate_sys_vars(THD *thd, bool sorted, enum enum_var_type type) +SHOW_VAR* enumerate_sys_vars(THD *thd, bool sorted, enum enum_var_type scope) { int count= system_variable_hash.records, i; int size= sizeof(SHOW_VAR) * (count + 1); @@ -588,7 +607,7 @@ SHOW_VAR* enumerate_sys_vars(THD *thd, bool sorted, enum enum_var_type type) sys_var *var= (sys_var*) my_hash_element(&system_variable_hash, i); // don't show session-only variables in SHOW GLOBAL VARIABLES - if (type == OPT_GLOBAL && var->check_type(type)) + if (scope == OPT_GLOBAL && var->check_type(scope)) continue; show->name= var->name.str; @@ -923,3 +942,232 @@ int set_var_collation_client::update(THD *thd) return 0; } +/***************************************************************************** + INFORMATION_SCHEMA.SYSTEM_VARIABLES +*****************************************************************************/ +static void store_value_ptr(Field *field, sys_var *var, String *str, + uchar *value_ptr) +{ + field->set_notnull(); + str= var->val_str_nolock(str, field->table->in_use, value_ptr); + if (str) + field->store(str->ptr(), str->length(), str->charset()); +} + +static void store_var(Field *field, sys_var *var, enum_var_type scope, + String *str) +{ + if (var->check_type(scope)) + return; + + store_value_ptr(field, var, str, + var->value_ptr(field->table->in_use, scope, &null_lex_str)); +} + +int fill_sysvars(THD *thd, TABLE_LIST *tables, COND *cond) +{ + char name_buffer[NAME_CHAR_LEN]; + enum_check_fields save_count_cuted_fields= thd->count_cuted_fields; + bool res= 1; + CHARSET_INFO *scs= system_charset_info; + StringBuffer<STRING_BUFFER_USUAL_SIZE> strbuf(scs); + const char *wild= thd->lex->wild ? thd->lex->wild->ptr() : 0; + Field **fields=tables->table->field; + + DBUG_ASSERT(tables->table->in_use == thd); + + cond= make_cond_for_info_schema(cond, tables); + thd->count_cuted_fields= CHECK_FIELD_WARN; + mysql_rwlock_rdlock(&LOCK_system_variables_hash); + + for (uint i= 0; i < system_variable_hash.records; i++) + { + sys_var *var= (sys_var*) my_hash_element(&system_variable_hash, i); + + strmake_buf(name_buffer, var->name.str); + my_caseup_str(system_charset_info, name_buffer); + + /* this must be done before evaluating cond */ + restore_record(tables->table, s->default_values); + fields[0]->store(name_buffer, strlen(name_buffer), scs); + + if ((wild && wild_case_compare(system_charset_info, name_buffer, wild)) + || (cond && !cond->val_int())) + continue; + + mysql_mutex_lock(&LOCK_global_system_variables); + + // SESSION_VALUE + store_var(fields[1], var, OPT_SESSION, &strbuf); + + // GLOBAL_VALUE + store_var(fields[2], var, OPT_GLOBAL, &strbuf); + + // GLOBAL_VALUE_ORIGIN + static const LEX_CSTRING origins[]= + { + { STRING_WITH_LEN("CONFIG") }, + { STRING_WITH_LEN("AUTO") }, + { STRING_WITH_LEN("SQL") }, + { STRING_WITH_LEN("COMPILE-TIME") }, + { STRING_WITH_LEN("ENVIRONMENT") } + }; + const LEX_CSTRING *origin= origins + var->value_origin; + fields[3]->store(origin->str, origin->length, scs); + + // DEFAULT_VALUE + uchar *def= var->is_readonly() && var->option.id < 0 + ? 0 : var->default_value_ptr(thd); + if (def) + store_value_ptr(fields[4], var, &strbuf, def); + + mysql_mutex_unlock(&LOCK_global_system_variables); + + // VARIABLE_SCOPE + static const LEX_CSTRING scopes[]= + { + { STRING_WITH_LEN("GLOBAL") }, + { STRING_WITH_LEN("SESSION") }, + { STRING_WITH_LEN("SESSION ONLY") } + }; + const LEX_CSTRING *scope= scopes + var->scope(); + fields[5]->store(scope->str, scope->length, scs); + + // VARIABLE_TYPE +#if SIZEOF_LONG == SIZEOF_INT +#define LONG_TYPE "INT" +#else +#define LONG_TYPE "BIGINT" +#endif + + static const LEX_CSTRING types[]= + { + { 0, 0 }, // unused 0 + { 0, 0 }, // GET_NO_ARG 1 + { STRING_WITH_LEN("BOOLEAN") }, // GET_BOOL 2 + { STRING_WITH_LEN("INT") }, // GET_INT 3 + { STRING_WITH_LEN("INT UNSIGNED") }, // GET_UINT 4 + { STRING_WITH_LEN(LONG_TYPE) }, // GET_LONG 5 + { STRING_WITH_LEN(LONG_TYPE " UNSIGNED") }, // GET_ULONG 6 + { STRING_WITH_LEN("BIGINT") }, // GET_LL 7 + { STRING_WITH_LEN("BIGINT UNSIGNED") }, // GET_ULL 8 + { STRING_WITH_LEN("VARCHAR") }, // GET_STR 9 + { STRING_WITH_LEN("VARCHAR") }, // GET_STR_ALLOC 10 + { 0, 0 }, // GET_DISABLED 11 + { STRING_WITH_LEN("ENUM") }, // GET_ENUM 12 + { STRING_WITH_LEN("SET") }, // GET_SET 13 + { STRING_WITH_LEN("DOUBLE") }, // GET_DOUBLE 14 + { STRING_WITH_LEN("FLAGSET") }, // GET_FLAGSET 15 + }; + const LEX_CSTRING *type= types + (var->option.var_type & GET_TYPE_MASK); + fields[6]->store(type->str, type->length, scs); + + // VARIABLE_COMMENT + fields[7]->store(var->option.comment, strlen(var->option.comment), + scs); + + // NUMERIC_MIN_VALUE + // NUMERIC_MAX_VALUE + // NUMERIC_BLOCK_SIZE + bool is_unsigned= true; + switch (var->option.var_type) + { + case GET_INT: + case GET_LONG: + case GET_LL: + is_unsigned= false; + /* fall through */ + case GET_UINT: + case GET_ULONG: + case GET_ULL: + fields[8]->set_notnull(); + fields[9]->set_notnull(); + fields[10]->set_notnull(); + fields[8]->store(var->option.min_value, is_unsigned); + fields[9]->store(var->option.max_value, is_unsigned); + fields[10]->store(var->option.block_size, is_unsigned); + break; + case GET_DOUBLE: + fields[8]->set_notnull(); + fields[9]->set_notnull(); + fields[8]->store(getopt_ulonglong2double(var->option.min_value)); + fields[9]->store(getopt_ulonglong2double(var->option.max_value)); + } + + // ENUM_VALUE_LIST + TYPELIB *tl= var->option.typelib; + if (tl) + { + uint i; + strbuf.length(0); + for (i=0; i + 1 < tl->count; i++) + { + strbuf.append(tl->type_names[i]); + strbuf.append(','); + } + strbuf.append(tl->type_names[i]); + fields[11]->set_notnull(); + fields[11]->store(strbuf.ptr(), strbuf.length(), scs); + } + + // READ_ONLY + static const LEX_CSTRING yesno[]= + { + { STRING_WITH_LEN("NO") }, + { STRING_WITH_LEN("YES") } + }; + const LEX_CSTRING *yn = yesno + var->is_readonly(); + fields[12]->store(yn->str, yn->length, scs); + + // COMMAND_LINE_ARGUMENT + if (var->option.id >= 0) + { + static const LEX_CSTRING args[]= + { + { STRING_WITH_LEN("NONE") }, // NO_ARG + { STRING_WITH_LEN("OPTIONAL") }, // OPT_ARG + { STRING_WITH_LEN("REQUIRED") } // REQUIRED_ARG + }; + const LEX_CSTRING *arg= args + var->option.arg_type; + fields[13]->set_notnull(); + fields[13]->store(arg->str, arg->length, scs); + } + + if (schema_table_store_record(thd, tables->table)) + goto end; + thd->get_stmt_da()->inc_current_row_for_warning(); + } + res= 0; +end: + mysql_rwlock_unlock(&LOCK_system_variables_hash); + thd->count_cuted_fields= save_count_cuted_fields; + return res; +} + +/* + This is a simple and inefficient helper that sets sys_var::value_origin + for a specific sysvar. + It should *only* be used on server startup, if you need to do this later, + get yourself a pointer to your sysvar (see e.g. Sys_autocommit_ptr) + and update it directly. +*/ + +void mark_sys_var_value_origin(void *ptr, enum sys_var::where here) +{ + bool found= false; + DBUG_ASSERT(!mysqld_server_started); // only to be used during startup + + for (uint i= 0; i < system_variable_hash.records; i++) + { + sys_var *var= (sys_var*) my_hash_element(&system_variable_hash, i); + if (var->option.value == ptr) + { + found= true; + var->value_origin= here; + /* don't break early, search for all matches */ + } + } + + DBUG_ASSERT(found); // variable must have been found +} + diff --git a/sql/set_var.h b/sql/set_var.h index bb92e555aa7..e48f394c316 100644 --- a/sql/set_var.h +++ b/sql/set_var.h @@ -61,7 +61,10 @@ public: sys_var *next; LEX_CSTRING name; enum flag_enum { GLOBAL, SESSION, ONLY_SESSION, SCOPE_MASK=1023, - READONLY=1024, ALLOCATED=2048, PARSE_EARLY=4096, SHOW_VALUE_IN_HELP=8192 }; + READONLY=1024, ALLOCATED=2048, PARSE_EARLY=4096 }; + enum { NO_GETOPT=-1, GETOPT_ONLY_HELP=-2 }; + enum where { CONFIG, AUTO, SQL, COMPILE_TIME, ENV }; + /** Enumeration type to indicate for a system variable whether it will be written to the binlog or not. @@ -70,6 +73,7 @@ public: SESSION_VARIABLE_IN_BINLOG } binlog_status; my_option option; ///< min, max, default values are stored here + enum where value_origin; protected: typedef bool (*on_check_function)(sys_var *self, THD *thd, set_var *var); @@ -105,7 +109,7 @@ public: virtual sys_var_pluginvar *cast_pluginvar() { return 0; } bool check(THD *thd, set_var *var); - uchar *value_ptr(THD *thd, enum_var_type type, LEX_STRING *base); + uchar *value_ptr(THD *thd, enum_var_type type, const LEX_STRING *base); /** Update the system variable with the default value from either @@ -115,9 +119,10 @@ public: bool set_default(THD *thd, set_var *var); bool update(THD *thd, set_var *var); - longlong val_int(bool *is_null, THD *thd, enum_var_type type, LEX_STRING *base); - String *val_str(String *str, THD *thd, enum_var_type type, LEX_STRING *base); - double val_real(bool *is_null, THD *thd, enum_var_type type, LEX_STRING *base); + String *val_str_nolock(String *str, THD *thd, const uchar *value); + longlong val_int(bool *is_null, THD *thd, enum_var_type type, const LEX_STRING *base); + String *val_str(String *str, THD *thd, enum_var_type type, const LEX_STRING *base); + double val_real(bool *is_null, THD *thd, enum_var_type type, const LEX_STRING *base); SHOW_TYPE show_type() { return show_val_type; } int scope() const { return flags & SCOPE_MASK; } @@ -130,7 +135,31 @@ public: bool is_struct() { return option.var_type & GET_ASK_ADDR; } bool is_written_to_binlog(enum_var_type type) { return type != OPT_GLOBAL && binlog_status == SESSION_VARIABLE_IN_BINLOG; } - virtual bool check_update_type(Item_result type) = 0; + bool check_update_type(Item_result type) + { + switch (option.var_type & GET_TYPE_MASK) { + case GET_INT: + case GET_UINT: + case GET_LONG: + case GET_ULONG: + case GET_LL: + case GET_ULL: + return type != INT_RESULT; + case GET_STR: + case GET_STR_ALLOC: + return type != STRING_RESULT; + case GET_ENUM: + case GET_BOOL: + case GET_SET: + case GET_FLAGSET: + return type != STRING_RESULT && type != INT_RESULT; + case GET_DOUBLE: + return type != INT_RESULT && type != REAL_RESULT && type != DECIMAL_RESULT; + default: + return true; + } + } + bool check_type(enum_var_type type) { switch (scope()) @@ -143,12 +172,29 @@ public: } bool register_option(DYNAMIC_ARRAY *array, int parse_flags) { - return ((((option.id != -1) && ((flags & PARSE_EARLY) == parse_flags)) || - (flags & parse_flags)) && - insert_dynamic(array, (uchar*)&option)); + DBUG_ASSERT(parse_flags == GETOPT_ONLY_HELP || + parse_flags == PARSE_EARLY || parse_flags == 0); + if (option.id == NO_GETOPT) + return 0; + if (parse_flags == GETOPT_ONLY_HELP) + { + if (option.id != GETOPT_ONLY_HELP) + return 0; + } + else + { + if (option.id == GETOPT_ONLY_HELP) + return 0; + if ((flags & PARSE_EARLY) != parse_flags) + return 0; + } + return insert_dynamic(array, (uchar*)&option); } void do_deprecated_warning(THD *thd); + virtual uchar *default_value_ptr(THD *thd) + { return (uchar*)&option.def_value; } + private: virtual bool do_check(THD *thd, set_var *var) = 0; /** @@ -165,11 +211,11 @@ private: protected: /** A pointer to a value of the variable for SHOW. - It must be of show_val_type type (bool for SHOW_BOOL, int for SHOW_INT, - longlong for SHOW_LONGLONG, etc). + It must be of show_val_type type (my_bool for SHOW_MY_BOOL, + int for SHOW_INT, longlong for SHOW_LONGLONG, etc). */ - virtual uchar *session_value_ptr(THD *thd, LEX_STRING *base); - virtual uchar *global_value_ptr(THD *thd, LEX_STRING *base); + virtual uchar *session_value_ptr(THD *thd, const LEX_STRING *base); + virtual uchar *global_value_ptr(THD *thd, const LEX_STRING *base); /** A pointer to a storage area of the variable, to the raw data. @@ -341,10 +387,19 @@ extern SHOW_COMP_OPTION have_openssl; */ SHOW_VAR* enumerate_sys_vars(THD *thd, bool sorted, enum enum_var_type type); +int fill_sysvars(THD *thd, TABLE_LIST *tables, COND *cond); sys_var *find_sys_var(THD *thd, const char *str, uint length=0); int sql_set_variables(THD *thd, List<set_var_base> *var_list); +#define SYSVAR_AUTOSIZE(VAR,VAL) \ + do { \ + VAR= (VAL); \ + mark_sys_var_value_origin(&VAR, sys_var::AUTO); \ + } while(0) + +void mark_sys_var_value_origin(void *ptr, enum sys_var::where here); + bool fix_delay_key_write(sys_var *self, THD *thd, enum_var_type type); ulonglong expand_sql_mode(ulonglong sql_mode); diff --git a/sql/share/errmsg-utf8.txt b/sql/share/errmsg-utf8.txt index 32cdbe138b2..233bb835bd8 100644 --- a/sql/share/errmsg-utf8.txt +++ b/sql/share/errmsg-utf8.txt @@ -5920,9 +5920,8 @@ ER_TEMP_TABLE_PREVENTS_SWITCH_OUT_OF_RBR ER_STORED_FUNCTION_PREVENTS_SWITCH_BINLOG_FORMAT eng "Cannot change the binary logging format inside a stored function or trigger" ger "Das Binärlog-Format kann innerhalb einer gespeicherten Funktion oder eines Triggers nicht geändert werden" -ER_NDB_CANT_SWITCH_BINLOG_FORMAT - eng "The NDB cluster engine does not support changing the binlog format on the fly yet" - ger "Die Speicher-Engine NDB Cluster unterstützt das Ändern des Binärlog-Formats zur Laufzeit noch nicht" +ER_UNUSED_13 + eng "You should never see it" ER_PARTITION_NO_TEMPORARY eng "Cannot create temporary table with partitions" ger "Anlegen temporärer Tabellen mit Partitionen nicht möglich" @@ -6139,9 +6138,8 @@ ER_SLAVE_HEARTBEAT_FAILURE ger "Unerwartete Daten vom Heartbeat des Masters: %s" ER_SLAVE_HEARTBEAT_VALUE_OUT_OF_RANGE eng "The requested value for the heartbeat period is either negative or exceeds the maximum allowed (%s seconds)." -ER_NDB_REPLICATION_SCHEMA_ERROR - eng "Bad schema for mysql.ndb_replication table. Message: %-.64s" - ger "Fehlerhaftes Schema für mysql.ndb_replication table. Meldung: %-.64s" +ER_UNUSED_14 + eng "You should never see it" ER_CONFLICT_FN_PARSE_ERROR eng "Error in parsing conflict function. Message: %-.64s" ger "Fehler beim Parsen einer Konflikt-Funktion. Meldung: %-.64s" @@ -7110,3 +7108,5 @@ ER_IT_IS_A_VIEW 42S02 eng "'%-.192s' is a view" ER_SLAVE_SKIP_NOT_IN_GTID eng "When using GTID, @@sql_slave_skip_counter can not be used. Instead, setting @@gtid_slave_pos explicitly can be used to skip to after a given GTID position." +ER_STATEMENT_TIMEOUT 70100 + eng "Query execution was interrupted (max_statement_time exceeded)" diff --git a/sql/signal_handler.cc b/sql/signal_handler.cc index 3fadbcd088f..61e2830e82e 100644 --- a/sql/signal_handler.cc +++ b/sql/signal_handler.cc @@ -172,6 +172,10 @@ extern "C" sig_handler handle_fatal_signal(int sig) case KILL_QUERY_HARD: kreason= "KILL_QUERY"; break; + case KILL_TIMEOUT: + case KILL_TIMEOUT_HARD: + kreason= "KILL_TIMEOUT"; + break; case KILL_SYSTEM_THREAD: case KILL_SYSTEM_THREAD_HARD: kreason= "KILL_SYSTEM_THREAD"; diff --git a/sql/slave.cc b/sql/slave.cc index f7d019a6c39..ca29410cd1d 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -52,6 +52,7 @@ #include "log_event.h" // Rotate_log_event, // Create_file_log_event, // Format_description_log_event +#include "wsrep_mysqld.h" #ifdef HAVE_REPLICATION @@ -2922,7 +2923,7 @@ static int init_slave_thread(THD* thd, Master_info *mi, thd->security_ctx->skip_grants(); thd->slave_thread= 1; thd->connection_name= mi->connection_name; - thd->enable_slow_log= opt_log_slow_slave_statements; + thd->variables.sql_log_slow= opt_log_slow_slave_statements; thd->variables.log_slow_filter= global_system_variables.log_slow_filter; set_slave_thread_options(thd); thd->client_capabilities = CLIENT_LOCAL_FILES; @@ -3090,9 +3091,7 @@ static ulong read_event(MYSQL* mysql, Master_info *mi, bool* suppress_warnings) /* Check if the current error is of temporary nature of not. Some errors are temporary in nature, such as - ER_LOCK_DEADLOCK and ER_LOCK_WAIT_TIMEOUT. Ndb also signals - that the error is temporary by pushing a warning with the error code - ER_GET_TEMPORARY_ERRMSG, if the originating error is temporary. + ER_LOCK_DEADLOCK and ER_LOCK_WAIT_TIMEOUT. */ static int has_temporary_error(THD *thd) { @@ -3122,25 +3121,6 @@ static int has_temporary_error(THD *thd) thd->get_stmt_da()->sql_errno() == ER_LOCK_WAIT_TIMEOUT) DBUG_RETURN(1); -#ifdef HAVE_NDB_BINLOG - /* - currently temporary error set in ndbcluster - */ - List_iterator_fast<Sql_condition> it(thd->warning_info->warn_list()); - Sql_condition *err; - while ((err= it++)) - { - DBUG_PRINT("info", ("has condition %d %s", err->get_sql_errno(), - err->get_message_text())); - switch (err->get_sql_errno()) - { - case ER_GET_TEMPORARY_ERRMSG: - DBUG_RETURN(1); - default: - break; - } - } -#endif DBUG_RETURN(0); } @@ -4367,6 +4347,7 @@ pthread_handler_t handle_slave_sql(void *arg) my_off_t saved_skip= 0; Master_info *mi= ((Master_info*)arg); Relay_log_info* rli = &mi->rli; + my_bool wsrep_node_dropped __attribute__((unused)) = FALSE; const char *errmsg; rpl_group_info *serial_rgi; rpl_sql_thread_info sql_info(mi->rpl_filter); @@ -4375,8 +4356,7 @@ pthread_handler_t handle_slave_sql(void *arg) my_thread_init(); DBUG_ENTER("handle_slave_sql"); - LINT_INIT(saved_master_log_pos); - LINT_INIT(saved_log_pos); + wsrep_restart_point: serial_rgi= new rpl_group_info(rli); thd = new THD; // note that contructor of THD uses DBUG_ ! @@ -4503,6 +4483,12 @@ pthread_handler_t handle_slave_sql(void *arg) } #endif +#ifdef WITH_WSREP + thd->wsrep_exec_mode= LOCAL_STATE; + /* synchronize with wsrep replication */ + if (WSREP_ON) + wsrep_ready_wait(); +#endif DBUG_PRINT("master_info",("log_file_name: %s position: %s", rli->group_master_log_name, llstr(rli->group_master_log_pos,llbuff))); @@ -4603,13 +4589,19 @@ log '%s' at position %s, relay log '%s' position: %s%s", RPL_LOG_NAME, rli->group_master_log_name, (ulong) rli->group_master_log_pos); saved_skip= 0; } - + if (exec_relay_log_event(thd, rli, serial_rgi)) { DBUG_PRINT("info", ("exec_relay_log_event() failed")); // do not scare the user if SQL thread was simply killed or stopped if (!sql_slave_killed(serial_rgi)) + { slave_output_error_info(rli, thd); + if (WSREP_ON && rli->last_error().number == ER_UNKNOWN_COM_ERROR) + { + wsrep_node_dropped= TRUE; + } + } goto err; } } @@ -4694,6 +4686,27 @@ err_during_init: delete serial_rgi; delete thd; mysql_mutex_unlock(&LOCK_thread_count); +#ifdef WITH_WSREP + /* if slave stopped due to node going non primary, we set global flag to + trigger automatic restart of slave when node joins back to cluster + */ + if (WSREP_ON && wsrep_node_dropped && wsrep_restart_slave) + { + if (wsrep_ready) + { + WSREP_INFO("Slave error due to node temporarily non-primary" + "SQL slave will continue"); + wsrep_node_dropped= FALSE; + mysql_mutex_unlock(&rli->run_lock); + goto wsrep_restart_point; + } else { + WSREP_INFO("Slave error due to node going non-primary"); + WSREP_INFO("wsrep_restart_slave was set and therefore slave will be " + "automatically restarted when node joins back to cluster"); + wsrep_restart_slave_activated= TRUE; + } + } +#endif /* WITH_WSREP */ /* Note: the order of the broadcast and unlock calls below (first broadcast, then unlock) is important. Otherwise a killer_thread can execute between the calls and diff --git a/sql/sp.cc b/sql/sp.cc index 188b311ae86..b25117019c0 100644 --- a/sql/sp.cc +++ b/sql/sp.cc @@ -32,19 +32,6 @@ #include <my_user.h> -static bool -create_string(THD *thd, String *buf, - stored_procedure_type sp_type, - const char *db, ulong dblen, - const char *name, ulong namelen, - const char *params, ulong paramslen, - const char *returns, ulong returnslen, - const char *body, ulong bodylen, - st_sp_chistics *chistics, - const LEX_STRING *definer_user, - const LEX_STRING *definer_host, - ulonglong sql_mode); - static int db_load_routine(THD *thd, stored_procedure_type type, sp_name *name, sp_head **sphp, @@ -844,7 +831,7 @@ db_load_routine(THD *thd, stored_procedure_type type, definition for SHOW CREATE PROCEDURE later. */ - if (!create_string(thd, &defstr, + if (!show_create_sp(thd, &defstr, type, NULL, 0, name->m_name.str, name->m_name.length, @@ -924,7 +911,7 @@ end: } -static void +void sp_returns_type(THD *thd, String &result, sp_head *sp) { TABLE table; @@ -1186,7 +1173,7 @@ sp_create_routine(THD *thd, stored_procedure_type type, sp_head *sp) String log_query; log_query.set_charset(system_charset_info); - if (!create_string(thd, &log_query, + if (!show_create_sp(thd, &log_query, sp->m_type, (sp->m_explicit_name ? sp->m_db.str : NULL), (sp->m_explicit_name ? sp->m_db.length : 0), @@ -2126,8 +2113,8 @@ int sp_cache_routine(THD *thd, enum stored_procedure_type type, sp_name *name, @return Returns TRUE on success, FALSE on (alloc) failure. */ -static bool -create_string(THD *thd, String *buf, +bool +show_create_sp(THD *thd, String *buf, stored_procedure_type type, const char *db, ulong dblen, const char *name, ulong namelen, @@ -2253,7 +2240,7 @@ sp_load_for_information_schema(THD *thd, TABLE *proc_table, String *db, sp_body= (type == TYPE_ENUM_FUNCTION ? "RETURN NULL" : "BEGIN END"); bzero((char*) &sp_chistics, sizeof(sp_chistics)); defstr.set_charset(creation_ctx->get_client_cs()); - if (!create_string(thd, &defstr, type, + if (!show_create_sp(thd, &defstr, type, sp_db_str.str, sp_db_str.length, sp_name_obj.m_name.str, sp_name_obj.m_name.length, params, strlen(params), @@ -2271,3 +2258,4 @@ sp_load_for_information_schema(THD *thd, TABLE *proc_table, String *db, thd->lex= old_lex; return sp; } + @@ -214,4 +214,19 @@ bool load_collation(MEM_ROOT *mem_root, CHARSET_INFO *dflt_cl, CHARSET_INFO **cl); +void sp_returns_type(THD *thd, + String &result, + sp_head *sp); + +bool show_create_sp(THD *thd, String *buf, + stored_procedure_type type, + const char *db, ulong dblen, + const char *name, ulong namelen, + const char *params, ulong paramslen, + const char *returns, ulong returnslen, + const char *body, ulong bodylen, + st_sp_chistics *chistics, + const LEX_STRING *definer_user, + const LEX_STRING *definer_host, + ulonglong sql_mode); #endif /* _SP_H_ */ diff --git a/sql/sp_head.cc b/sql/sp_head.cc index 8a9e8ddc816..f8320e830a5 100644 --- a/sql/sp_head.cc +++ b/sql/sp_head.cc @@ -149,13 +149,9 @@ sp_get_item_value(THD *thd, Item *item, String *str) return NULL; { - char buf_holder[STRING_BUFFER_USUAL_SIZE]; - String buf(buf_holder, sizeof(buf_holder), result->charset()); + StringBuffer<STRING_BUFFER_USUAL_SIZE> buf(result->charset()); CHARSET_INFO *cs= thd->variables.character_set_client; - /* We must reset length of the buffer, because of String specificity. */ - buf.length(0); - buf.append('_'); buf.append(result->charset()->csname); if (cs->escape_with_backslash_is_dangerous) @@ -178,6 +174,28 @@ sp_get_item_value(THD *thd, Item *item, String *str) } +bool Item_splocal::append_for_log(THD *thd, String *str) +{ + if (fix_fields(thd, NULL)) + return true; + + if (limit_clause_param) + return str->append_ulonglong(val_uint()); + + if (str->append(STRING_WITH_LEN(" NAME_CONST('")) || + str->append(&m_name) || + str->append(STRING_WITH_LEN("',"))) + return true; + + StringBuffer<STRING_BUFFER_USUAL_SIZE> str_value_holder(&my_charset_latin1); + String *str_value= sp_get_item_value(thd, this_item(), &str_value_holder); + if (str_value) + return str->append(*str_value) || str->append(')'); + else + return str->append(STRING_WITH_LEN("NULL)")); +} + + /** Returns a combination of: - sp_head::MULTI_RESULTS: added if the 'cmd' is a command that might @@ -577,13 +595,12 @@ sp_head::sp_head() :Query_arena(&main_mem_root, STMT_INITIALIZED_FOR_SP), m_flags(0), m_sp_cache_version(0), + m_creation_ctx(0), unsafe_flags(0), m_recursion_level(0), m_next_cached_sp(0), m_cont_level(0) { - const LEX_STRING str_reset= { NULL, 0 }; - m_first_instance= this; m_first_free_instance= this; m_last_cached_sp= this; @@ -594,7 +611,7 @@ sp_head::sp_head() be rewritten soon. Remove the else part and replace 'if' with an assert when this is done. */ - m_db= m_name= m_qname= str_reset; + m_db= m_name= m_qname= null_lex_str; DBUG_ENTER("sp_head::sp_head"); @@ -873,7 +890,8 @@ sp_head::create_result_field(uint field_max_length, const char *field_name, } -int cmp_splocal_locations(Item_splocal * const *a, Item_splocal * const *b) +int cmp_rqp_locations(Rewritable_query_parameter * const *a, + Rewritable_query_parameter * const *b) { return (int)((*a)->pos_in_query - (*b)->pos_in_query); } @@ -979,85 +997,32 @@ subst_spvars(THD *thd, sp_instr *instr, LEX_STRING *query_str) { DBUG_ENTER("subst_spvars"); - Dynamic_array<Item_splocal*> sp_vars_uses; - char *pbuf, *cur, buffer[512]; - String qbuf(buffer, sizeof(buffer), &my_charset_bin); - int prev_pos, res, buf_len; + Dynamic_array<Rewritable_query_parameter*> rewritables; + char *pbuf; + StringBuffer<512> qbuf; + Copy_query_with_rewrite acc(thd, query_str->str, query_str->length, &qbuf); - /* Find all instances of Item_splocal used in this statement */ + /* Find rewritable Items used in this statement */ for (Item *item= instr->free_list; item; item= item->next) { - if (item->is_splocal()) - { - Item_splocal *item_spl= (Item_splocal*)item; - if (item_spl->pos_in_query) - sp_vars_uses.append(item_spl); - } + Rewritable_query_parameter *rqp= item->get_rewritable_query_parameter(); + if (rqp && rqp->pos_in_query) + rewritables.append(rqp); } - if (!sp_vars_uses.elements()) + if (!rewritables.elements()) DBUG_RETURN(FALSE); - /* Sort SP var refs by their occurences in the query */ - sp_vars_uses.sort(cmp_splocal_locations); + rewritables.sort(cmp_rqp_locations); - /* - Construct a statement string where SP local var refs are replaced - with "NAME_CONST(name, value)" - */ - qbuf.length(0); - cur= query_str->str; - prev_pos= res= 0; - thd->query_name_consts= 0; + thd->query_name_consts= rewritables.elements(); - for (Item_splocal **splocal= sp_vars_uses.front(); - splocal <= sp_vars_uses.back(); splocal++) + for (Rewritable_query_parameter **rqp= rewritables.front(); + rqp <= rewritables.back(); rqp++) { - Item *val; - - char str_buffer[STRING_BUFFER_USUAL_SIZE]; - String str_value_holder(str_buffer, sizeof(str_buffer), - &my_charset_latin1); - String *str_value; - - /* append the text between sp ref occurences */ - res|= qbuf.append(cur + prev_pos, (*splocal)->pos_in_query - prev_pos); - prev_pos= (*splocal)->pos_in_query + (*splocal)->len_in_query; - - res|= (*splocal)->fix_fields(thd, (Item **) splocal); - if (res) - break; - - if ((*splocal)->limit_clause_param) - { - res|= qbuf.append_ulonglong((*splocal)->val_uint()); - if (res) - break; - continue; - } - - /* append the spvar substitute */ - res|= qbuf.append(STRING_WITH_LEN(" NAME_CONST('")); - res|= qbuf.append((*splocal)->m_name.str, (*splocal)->m_name.length); - res|= qbuf.append(STRING_WITH_LEN("',")); - - if (res) - break; - - val= (*splocal)->this_item(); - DBUG_PRINT("info", ("print 0x%lx", (long) val)); - str_value= sp_get_item_value(thd, val, &str_value_holder); - if (str_value) - res|= qbuf.append(*str_value); - else - res|= qbuf.append(STRING_WITH_LEN("NULL")); - res|= qbuf.append(')'); - if (res) - break; - - thd->query_name_consts++; + if (acc.append(*rqp)) + DBUG_RETURN(TRUE); } - if (res || - qbuf.append(cur + prev_pos, query_str->length - prev_pos)) + if (acc.finalize()) DBUG_RETURN(TRUE); /* @@ -1072,8 +1037,8 @@ subst_spvars(THD *thd, sp_instr *instr, LEX_STRING *query_str) <db_name> Name of current database <flags> Flags struct */ - buf_len= (qbuf.length() + 1 + QUERY_CACHE_DB_LENGTH_SIZE + thd->db_length + - QUERY_CACHE_FLAGS_SIZE + 1); + int buf_len= (qbuf.length() + 1 + QUERY_CACHE_DB_LENGTH_SIZE + + thd->db_length + QUERY_CACHE_FLAGS_SIZE + 1); if ((pbuf= (char *) alloc_root(thd->mem_root, buf_len))) { char *ptr= pbuf + qbuf.length(); @@ -1239,7 +1204,8 @@ sp_head::execute(THD *thd, bool merge_da_on_success) Switch query context. This has to be done early as this is sometimes allocated trough sql_alloc */ - saved_creation_ctx= m_creation_ctx->set_n_backup(thd); + if (m_creation_ctx) + saved_creation_ctx= m_creation_ctx->set_n_backup(thd); /* We have to save/restore this info when we are changing call level to @@ -1403,7 +1369,8 @@ sp_head::execute(THD *thd, bool merge_da_on_success) /* Restore query context. */ - m_creation_ctx->restore_env(thd, saved_creation_ctx); + if (m_creation_ctx) + m_creation_ctx->restore_env(thd, saved_creation_ctx); /* Restore arena. */ @@ -2328,6 +2295,9 @@ sp_head::restore_lex(THD *thd) procedures) to multiset of tables used by this routine. */ merge_table_list(thd, sublex->query_tables, sublex); + /* Merge lists of PS parameters. */ + oldlex->param_list.append(&sublex->param_list); + if (! sublex->sp_lex_in_use) { sublex->sphead= NULL; diff --git a/sql/sql_acl.cc b/sql/sql_acl.cc index 106036e1e83..3d3c0bc835a 100644 --- a/sql/sql_acl.cc +++ b/sql/sql_acl.cc @@ -205,15 +205,6 @@ static plugin_ref old_password_plugin; #endif static plugin_ref native_password_plugin; -static char *safe_str(char *str) -{ return str ? str : const_cast<char*>(""); } - -static const char *safe_str(const char *str) -{ return str ? str : ""; } - -static size_t safe_strlen(const char *str) -{ return str ? strlen(str) : 0; } - /* Classes */ struct acl_host_and_ip @@ -709,6 +700,8 @@ bool ROLE_GRANT_PAIR::init(MEM_ROOT *mem, char *username, #define ROLE_ASSIGN_COLUMN_IDX 43 #define DEFAULT_ROLE_COLUMN_IDX 44 +#define MAX_STATEMENT_TIME_COLUMN_IDX 45 + /* various flags valid for ACL_USER */ #define IS_ROLE (1L << 0) /* Flag to mark that a ROLE is on the recursive DEPTH_FIRST_SEARCH stack */ @@ -1175,6 +1168,8 @@ static bool acl_load(THD *thd, TABLE_LIST *tables) mysql_mutex_unlock(&LOCK_global_system_variables); else { + extern sys_var *Sys_old_passwords_ptr; + Sys_old_passwords_ptr->value_origin= sys_var::AUTO; global_system_variables.old_passwords= 1; mysql_mutex_unlock(&LOCK_global_system_variables); sql_print_warning("mysql.user table is not updated to new password format; " @@ -1272,6 +1267,8 @@ static bool acl_load(THD *thd, TABLE_LIST *tables) user.sort= get_sort(2, user.host.hostname, user.user.str); user.hostname_length= safe_strlen(user.host.hostname); + user.user_resource.user_conn= 0; + user.user_resource.max_statement_time= 0.0; /* Starting from 4.0.2 we have more fields */ if (table->s->fields >= 31) @@ -1331,6 +1328,14 @@ static bool acl_load(THD *thd, TABLE_LIST *tables) fix_user_plugin_ptr(&user); } } + + if (table->s->fields > MAX_STATEMENT_TIME_COLUMN_IDX) + { + /* Starting from 10.1.1 we can have max_statement_time */ + ptr= get_field(thd->mem_root, + table->field[MAX_STATEMENT_TIME_COLUMN_IDX]); + user.user_resource.max_statement_time= ptr ? atof(ptr) : 0.0; + } } else { @@ -2041,6 +2046,8 @@ static void acl_update_user(const char *user, const char *host, acl_user->user_resource.conn_per_hour= mqh->conn_per_hour; if (mqh->specified_limits & USER_RESOURCES::USER_CONNECTIONS) acl_user->user_resource.user_conn= mqh->user_conn; + if (mqh->specified_limits & USER_RESOURCES::MAX_STATEMENT_TIME) + acl_user->user_resource.max_statement_time= mqh->max_statement_time; if (ssl_type != SSL_TYPE_NOT_SPECIFIED) { acl_user->ssl_type= ssl_type; @@ -2559,7 +2566,9 @@ int check_alter_user(THD *thd, const char *host, const char *user) my_error(ER_OPTION_PREVENTS_STATEMENT, MYF(0), "--skip-grant-tables"); goto end; } - if (!thd->slave_thread && !thd->security_ctx->priv_user[0]) + + if (IF_WSREP((!WSREP(thd) || !thd->wsrep_applier), 1) && + !thd->slave_thread && !thd->security_ctx->priv_user[0]) { my_message(ER_PASSWORD_ANONYMOUS_USER, ER(ER_PASSWORD_ANONYMOUS_USER), MYF(0)); @@ -2570,7 +2579,9 @@ int check_alter_user(THD *thd, const char *host, const char *user) my_error(ER_PASSWORD_NO_MATCH, MYF(0)); goto end; } + if (!thd->slave_thread && + IF_WSREP((!WSREP(thd) || !thd->wsrep_applier),1) && (strcmp(thd->security_ctx->priv_user, user) || my_strcasecmp(system_charset_info, host, thd->security_ctx->priv_host))) @@ -2635,10 +2646,12 @@ bool change_password(THD *thd, const char *host, const char *user, TABLE_LIST tables[TABLES_MAX]; /* Buffer should be extended when password length is extended. */ char buff[512]; - ulong query_length; + ulong query_length= 0; enum_binlog_format save_binlog_format; uint new_password_len= (uint) strlen(new_password); - int result; + int result=0; + const CSET_STRING query_save __attribute__((unused)) = thd->query_string; + DBUG_ENTER("change_password"); DBUG_PRINT("enter",("host: '%s' user: '%s' new_password: '%s'", host,user,new_password)); @@ -2647,6 +2660,19 @@ bool change_password(THD *thd, const char *host, const char *user, if (check_change_password(thd, host, user, new_password, new_password_len)) DBUG_RETURN(1); + if (mysql_bin_log.is_open() || + (WSREP(thd) && !IF_WSREP(thd->wsrep_applier, 0))) + { + query_length= sprintf(buff, "SET PASSWORD FOR '%-.120s'@'%-.120s'='%-.120s'", + safe_str(user), safe_str(host), new_password); + } + + if (WSREP(thd) && !IF_WSREP(thd->wsrep_applier, 0)) + { + thd->set_query_inner(buff, query_length, system_charset_info); + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, (char*)"user", NULL); + } + if ((result= open_grant_tables(thd, tables, TL_WRITE, Table_user))) DBUG_RETURN(result != 1); @@ -2697,17 +2723,24 @@ bool change_password(THD *thd, const char *host, const char *user, result= 0; if (mysql_bin_log.is_open()) { - query_length= - sprintf(buff,"SET PASSWORD FOR '%-.120s'@'%-.120s'='%-.120s'", - safe_str(acl_user->user.str), - safe_str(acl_user->host.hostname), - new_password); + DBUG_ASSERT(query_length); thd->clear_error(); result= thd->binlog_query(THD::STMT_QUERY_TYPE, buff, query_length, FALSE, FALSE, FALSE, 0); } end: close_mysql_tables(thd); + +#ifdef WITH_WSREP +error: // this label is used in WSREP_TO_ISOLATION_END + if (WSREP(thd) && !thd->wsrep_applier) + { + WSREP_TO_ISOLATION_END; + + thd->set_query_inner(query_save); + thd->wsrep_exec_mode = LOCAL_STATE; + } +#endif /* WITH_WSREP */ thd->restore_stmt_binlog_format(save_binlog_format); DBUG_RETURN(result); @@ -2726,9 +2759,11 @@ int acl_set_default_role(THD *thd, const char *host, const char *user, char user_key[MAX_KEY_LENGTH]; int result= 1; int error; + ulong query_length= 0; bool clear_role= FALSE; + char buff[512]; enum_binlog_format save_binlog_format; - + const CSET_STRING query_save __attribute__((unused)) = thd->query_string; DBUG_ENTER("acl_set_default_role"); DBUG_PRINT("enter",("host: '%s' user: '%s' rolename: '%s'", @@ -2747,6 +2782,20 @@ int acl_set_default_role(THD *thd, const char *host, const char *user, if (!strcasecmp(rolename, "NONE")) clear_role= TRUE; + if (mysql_bin_log.is_open() || + (WSREP(thd) && !IF_WSREP(thd->wsrep_applier, 0))) + { + query_length= + sprintf(buff,"SET DEFAULT ROLE '%-.120s' FOR '%-.120s'@'%-.120s'", + safe_str(rolename), safe_str(user), safe_str(host)); + } + + if (WSREP(thd) && !IF_WSREP(thd->wsrep_applier, 0)) + { + thd->set_query_inner(buff, query_length, system_charset_info); + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, (char*)"user", NULL); + } + if ((result= open_grant_tables(thd, tables, TL_WRITE, Table_user))) DBUG_RETURN(result != 1); @@ -2823,18 +2872,25 @@ int acl_set_default_role(THD *thd, const char *host, const char *user, result= 0; if (mysql_bin_log.is_open()) { - char buff[512]; - int query_length= - sprintf(buff,"SET DEFAULT ROLE '%-.120s' FOR '%-.120s'@'%-.120s'", - safe_str(acl_user->default_rolename.str), - safe_str(acl_user->user.str), - safe_str(acl_user->host.hostname)); + DBUG_ASSERT(query_length); thd->clear_error(); result= thd->binlog_query(THD::STMT_QUERY_TYPE, buff, query_length, FALSE, FALSE, FALSE, 0); } end: close_mysql_tables(thd); + +#ifdef WITH_WSREP +error: // this label is used in WSREP_TO_ISOLATION_END + if (WSREP(thd) && !thd->wsrep_applier) + { + WSREP_TO_ISOLATION_END; + + thd->set_query_inner(query_save); + thd->wsrep_exec_mode = LOCAL_STATE; + } +#endif /* WITH_WSREP */ + thd->restore_stmt_binlog_format(save_binlog_format); DBUG_RETURN(result); @@ -3351,8 +3407,6 @@ static int replace_user_table(THD *thd, TABLE *table, LEX_USER &combo, if (table->s->fields >= 36 && (mqh.specified_limits & USER_RESOURCES::USER_CONNECTIONS)) table->field[next_field+3]->store((longlong) mqh.user_conn, FALSE); - mqh_used= mqh_used || mqh.questions || mqh.updates || mqh.conn_per_hour; - next_field+= 4; if (table->s->fields >= 41) { @@ -3373,7 +3427,16 @@ static int replace_user_table(THD *thd, TABLE *table, LEX_USER &combo, table->field[next_field]->reset(); table->field[next_field + 1]->reset(); } + + if (table->s->fields > MAX_STATEMENT_TIME_COLUMN_IDX) + { + if (mqh.specified_limits & USER_RESOURCES::MAX_STATEMENT_TIME) + table->field[MAX_STATEMENT_TIME_COLUMN_IDX]-> + store(mqh.max_statement_time); + } } + mqh_used= (mqh_used || mqh.questions || mqh.updates || mqh.conn_per_hour || + mqh.user_conn || mqh.max_statement_time != 0.0); /* table format checked earlier */ if (handle_as_role) @@ -7466,6 +7529,21 @@ static void add_user_option(String *grant, long value, const char *name, } } + +static void add_user_option(String *grant, double value, const char *name) +{ + if (value != 0.0 ) + { + char buff[FLOATING_POINT_BUFFER]; + size_t len; + grant->append(' '); + grant->append(name, strlen(name)); + grant->append(' '); + len= my_fcvt(value, 6, buff, NULL); + grant->append(buff, len); + } +} + static const char *command_array[]= { "SELECT", "INSERT", "UPDATE", "DELETE", "CREATE", "DROP", "RELOAD", @@ -7848,7 +7926,8 @@ static bool show_global_privileges(THD *thd, ACL_USER_BASE *acl_entry, (acl_user->user_resource.questions || acl_user->user_resource.updates || acl_user->user_resource.conn_per_hour || - acl_user->user_resource.user_conn)) + acl_user->user_resource.user_conn || + acl_user->user_resource.max_statement_time != 0.0)) { global.append(STRING_WITH_LEN(" WITH")); if (want_access & GRANT_ACL) @@ -7861,6 +7940,8 @@ static bool show_global_privileges(THD *thd, ACL_USER_BASE *acl_entry, "MAX_CONNECTIONS_PER_HOUR", false); add_user_option(&global, acl_user->user_resource.user_conn, "MAX_USER_CONNECTIONS", true); + add_user_option(&global, acl_user->user_resource.max_statement_time, + "MAX_STATEMENT_TIME"); } } @@ -8643,7 +8724,6 @@ static int handle_grant_struct(enum enum_acl_lists struct_no, bool drop, int elements; const char *UNINIT_VAR(user); const char *UNINIT_VAR(host); - const char *UNINIT_VAR(role); ACL_USER *acl_user= NULL; ACL_ROLE *acl_role= NULL; ACL_DB *acl_db= NULL; @@ -8783,7 +8863,6 @@ static int handle_grant_struct(enum enum_acl_lists struct_no, bool drop, role_grant_pair= (ROLE_GRANT_PAIR *) my_hash_element(roles_mappings_hash, idx); user= role_grant_pair->u_uname; host= role_grant_pair->u_hname; - role= role_grant_pair->r_uname; break; default: @@ -8793,8 +8872,6 @@ static int handle_grant_struct(enum enum_acl_lists struct_no, bool drop, user= ""; if (! host) host= ""; - if (! role) - role= ""; #ifdef EXTRA_DEBUG DBUG_PRINT("loop",("scan struct: %u index: %u user: '%s' host: '%s'", @@ -8803,6 +8880,7 @@ static int handle_grant_struct(enum enum_acl_lists struct_no, bool drop, if (struct_no == ROLES_MAPPINGS_HASH) { + const char* role= role_grant_pair->r_uname? role_grant_pair->r_uname: ""; if (user_from->is_role() ? strcmp(user_from->user.str, role) : (strcmp(user_from->user.str, user) || my_strcasecmp(system_charset_info, user_from->host.str, host))) @@ -10128,6 +10206,11 @@ applicable_roles_insert(ACL_USER_BASE *grantee, ACL_ROLE *role, void *ptr) return 0; } +#else +bool check_grant(THD *, ulong, TABLE_LIST *, bool, uint, bool) +{ + return 0; +} #endif /*NO_EMBEDDED_ACCESS_CHECKS */ int fill_schema_enabled_roles(THD *thd, TABLE_LIST *tables, COND *cond) @@ -12193,12 +12276,22 @@ bool acl_authenticate(THD *thd, uint com_change_user_pkt_len) if ((acl_user->user_resource.questions || acl_user->user_resource.updates || acl_user->user_resource.conn_per_hour || - acl_user->user_resource.user_conn || max_user_connections_checking) && + acl_user->user_resource.user_conn || + acl_user->user_resource.max_statement_time != 0.0 || + max_user_connections_checking) && get_or_create_user_conn(thd, (opt_old_style_user_limits ? sctx->user : sctx->priv_user), (opt_old_style_user_limits ? sctx->host_or_ip : sctx->priv_host), &acl_user->user_resource)) DBUG_RETURN(1); // The error is set by get_or_create_user_conn() + + if (acl_user->user_resource.max_statement_time != 0.0) + { + thd->variables.max_statement_time_double= + acl_user->user_resource.max_statement_time; + thd->variables.max_statement_time= + (thd->variables.max_statement_time_double * 1e6 + 0.1); + } } else sctx->skip_grants(); diff --git a/sql/sql_acl.h b/sql/sql_acl.h index 7833fb8736b..28eb7a7f933 100644 --- a/sql/sql_acl.h +++ b/sql/sql_acl.h @@ -235,6 +235,10 @@ bool check_grant_all_columns(THD *thd, ulong want_access, bool check_grant_routine(THD *thd, ulong want_access, TABLE_LIST *procs, bool is_proc, bool no_error); bool check_grant_db(THD *thd,const char *db); +bool check_global_access(THD *thd, ulong want_access, bool no_errors= false); +bool check_access(THD *thd, ulong want_access, const char *db, ulong *save_priv, + GRANT_INTERNAL_INFO *grant_internal_info, + bool dont_check_global_grants, bool no_errors); ulong get_table_grant(THD *thd, TABLE_LIST *table); ulong get_column_grant(THD *thd, GRANT_INFO *grant, const char *db_name, const char *table_name, @@ -263,10 +267,6 @@ int fill_schema_table_privileges(THD *thd, TABLE_LIST *tables, COND *cond); int fill_schema_column_privileges(THD *thd, TABLE_LIST *tables, COND *cond); int wild_case_compare(CHARSET_INFO *cs, const char *str,const char *wildstr); int check_password_policy(String *password); -#ifdef NO_EMBEDDED_ACCESS_CHECKS -#define check_grant(A,B,C,D,E,F) 0 -#define check_grant_db(A,B) 0 -#endif /** Result of an access check for an internal schema or table. diff --git a/sql/sql_admin.cc b/sql/sql_admin.cc index 34a076cc327..0b610718cd0 100644 --- a/sql/sql_admin.cc +++ b/sql/sql_admin.cc @@ -1143,6 +1143,8 @@ bool Sql_cmd_analyze_table::execute(THD *thd) FALSE, UINT_MAX, FALSE)) goto error; thd->enable_slow_log= opt_log_slow_admin_statements; + WSREP_TO_ISOLATION_BEGIN(first_table->db, first_table->table_name, NULL); + res= mysql_admin_table(thd, first_table, &m_lex->check_opt, "analyze", lock_type, 1, 0, 0, 0, &handler::ha_analyze, 0); @@ -1197,6 +1199,7 @@ bool Sql_cmd_optimize_table::execute(THD *thd) if (check_table_access(thd, SELECT_ACL | INSERT_ACL, first_table, FALSE, UINT_MAX, FALSE)) goto error; /* purecov: inspected */ + WSREP_TO_ISOLATION_BEGIN(first_table->db, first_table->table_name, NULL) thd->enable_slow_log= opt_log_slow_admin_statements; res= (specialflag & SPECIAL_NO_NEW_FUNC) ? mysql_recreate_table(thd, first_table, true) : @@ -1230,6 +1233,7 @@ bool Sql_cmd_repair_table::execute(THD *thd) FALSE, UINT_MAX, FALSE)) goto error; /* purecov: inspected */ thd->enable_slow_log= opt_log_slow_admin_statements; + WSREP_TO_ISOLATION_BEGIN(first_table->db, first_table->table_name, NULL) res= mysql_admin_table(thd, first_table, &m_lex->check_opt, "repair", TL_WRITE, 1, MY_TEST(m_lex->check_opt.sql_flags & TT_USEFRM), diff --git a/sql/sql_alter.cc b/sql/sql_alter.cc index 97b9c127c22..cfe360217c2 100644 --- a/sql/sql_alter.cc +++ b/sql/sql_alter.cc @@ -18,6 +18,7 @@ // mysql_exchange_partition #include "sql_base.h" // open_temporary_tables #include "sql_alter.h" +#include "wsrep_mysqld.h" Alter_info::Alter_info(const Alter_info &rhs, MEM_ROOT *mem_root) :drop_list(rhs.drop_list, mem_root), @@ -303,6 +304,22 @@ bool Sql_cmd_alter_table::execute(THD *thd) thd->enable_slow_log= opt_log_slow_admin_statements; +#ifdef WITH_WSREP + TABLE *find_temporary_table(THD *thd, const TABLE_LIST *tl); + + if (WSREP(thd) && + (!thd->is_current_stmt_binlog_format_row() || + !find_temporary_table(thd, first_table)) && + wsrep_to_isolation_begin(thd, + lex->name.str ? select_lex->db : NULL, + lex->name.str ? lex->name.str : NULL, + first_table)) + { + WSREP_WARN("ALTER TABLE isolation failure"); + DBUG_RETURN(TRUE); + } +#endif /* WITH_WSREP */ + result= mysql_alter_table(thd, select_lex->db, lex->name.str, &create_info, first_table, diff --git a/sql/sql_base.cc b/sql/sql_base.cc index d60506dcad7..e51eb1c1a11 100644 --- a/sql/sql_base.cc +++ b/sql/sql_base.cc @@ -61,7 +61,8 @@ #ifdef __WIN__ #include <io.h> #endif - +#include "wsrep_mysqld.h" +#include "wsrep_thd.h" bool No_such_table_error_handler::handle_condition(THD *, @@ -3556,8 +3557,7 @@ thr_lock_type read_lock_type_for_table(THD *thd, at THD::variables::sql_log_bin member. */ bool log_on= mysql_bin_log.is_open() && thd->variables.sql_log_bin; - ulong binlog_format= thd->variables.binlog_format; - if ((log_on == FALSE) || (binlog_format == BINLOG_FORMAT_ROW) || + if ((log_on == FALSE) || (thd->wsrep_binlog_format() == BINLOG_FORMAT_ROW) || (table_list->table->s->table_category == TABLE_CATEGORY_LOG) || (table_list->table->s->table_category == TABLE_CATEGORY_PERFORMANCE) || !(is_update_query(prelocking_ctx->sql_command) || @@ -4417,7 +4417,7 @@ restart: flags)) { error= TRUE; - goto err; + goto error; } } else @@ -4427,7 +4427,7 @@ restart: ot_ctx.get_timeout(), flags)) { error= TRUE; - goto err; + goto error; } for (table= *start; table && table != thd->lex->first_not_own_table(); table= table->next_global) @@ -4485,16 +4485,16 @@ restart: it may change in future. */ if (ot_ctx.recover_from_failed_open()) - goto err; + goto error; /* Re-open temporary tables after close_tables_for_reopen(). */ if (open_temporary_tables(thd, *start)) - goto err; + goto error; error= FALSE; goto restart; } - goto err; + goto error; } DEBUG_SYNC(thd, "open_tables_after_open_and_process_table"); @@ -4542,11 +4542,11 @@ restart: close_tables_for_reopen(thd, start, ot_ctx.start_of_statement_svp()); if (ot_ctx.recover_from_failed_open()) - goto err; + goto error; /* Re-open temporary tables after close_tables_for_reopen(). */ if (open_temporary_tables(thd, *start)) - goto err; + goto error; error= FALSE; goto restart; @@ -4556,7 +4556,7 @@ restart: Something is wrong with the table or its contents, and an error has been emitted; we must abort. */ - goto err; + goto error; } } } @@ -4567,26 +4567,40 @@ restart: children, attach the children to their parents. At end of statement, the children are detached. Attaching and detaching are always done, even under LOCK TABLES. + + And start wsrep TOI if needed. */ for (tables= *start; tables; tables= tables->next_global) { TABLE *tbl= tables->table; + if (!tbl) + continue; + + if (WSREP_ON && sqlcom_can_generate_row_events(thd) && + wsrep_replicate_myisam && tables && tbl->file->ht == myisam_hton && + tables->lock_type >= TL_WRITE_ALLOW_WRITE) + { + WSREP_TO_ISOLATION_BEGIN(NULL, NULL, tables); + } + /* Schema tables may not have a TABLE object here. */ - if (tbl && tbl->file->ht->db_type == DB_TYPE_MRG_MYISAM) + if (tbl->file->ht->db_type == DB_TYPE_MRG_MYISAM) { /* MERGE tables need to access parent and child TABLE_LISTs. */ DBUG_ASSERT(tbl->pos_in_table_list == tables); if (tbl->file->extra(HA_EXTRA_ATTACH_CHILDREN)) { error= TRUE; - goto err; + goto error; } } } -err: +error: THD_STAGE_INFO(thd, stage_after_opening_tables); + thd_proc_info(thd, 0); + free_root(&new_frm_mem, MYF(0)); // Free pre-alloced block if (error && *table_to_open) @@ -5040,6 +5054,8 @@ end: close_thread_tables(thd); } THD_STAGE_INFO(thd, stage_after_opening_tables); + + thd_proc_info(thd, 0); DBUG_RETURN(table); } @@ -5303,7 +5319,7 @@ bool lock_tables(THD *thd, TABLE_LIST *tables, uint count, We can solve these problems in mixed mode by switching to binlogging if at least one updated table is used by sub-statement */ - if (thd->variables.binlog_format != BINLOG_FORMAT_ROW && tables && + if (thd->wsrep_binlog_format() != BINLOG_FORMAT_ROW && tables && has_write_table_with_auto_increment(thd->lex->first_not_own_table())) thd->lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_AUTOINC_COLUMNS); } @@ -7360,7 +7376,7 @@ store_natural_using_join_columns(THD *thd, TABLE_LIST *natural_using_join, } if (non_join_columns->elements > 0) - natural_using_join->join_columns->concat(non_join_columns); + natural_using_join->join_columns->append(non_join_columns); natural_using_join->is_join_columns_complete= TRUE; result= FALSE; @@ -8993,7 +9009,15 @@ bool mysql_notify_thread_having_shared_lock(THD *thd, THD *in_use, (e.g. see partitioning code). */ if (!thd_table->needs_reopen()) - signalled|= mysql_lock_abort_for_thread(thd, thd_table); + { + signalled|= mysql_lock_abort_for_thread(thd, thd_table); + if (thd && WSREP(thd) && wsrep_thd_is_BF(thd, true)) + { + WSREP_DEBUG("remove_table_from_cache: %llu", + (unsigned long long) thd->real_id); + wsrep_abort_thd((void *)thd, (void *)in_use, FALSE); + } + } } mysql_mutex_unlock(&in_use->LOCK_thd_data); } diff --git a/sql/sql_builtin.cc.in b/sql/sql_builtin.cc.in index 63850650ac9..5bf0a682369 100644 --- a/sql/sql_builtin.cc.in +++ b/sql/sql_builtin.cc.in @@ -25,7 +25,11 @@ extern #endif builtin_maria_plugin @mysql_mandatory_plugins@ @mysql_optional_plugins@ - builtin_maria_binlog_plugin, builtin_maria_mysql_password_plugin; + builtin_maria_binlog_plugin, +#ifdef WITH_WSREP + builtin_wsrep_plugin, +#endif /* WITH_WSREP */ + builtin_maria_mysql_password_plugin; struct st_maria_plugin *mysql_optional_plugins[]= { @@ -35,5 +39,8 @@ struct st_maria_plugin *mysql_optional_plugins[]= struct st_maria_plugin *mysql_mandatory_plugins[]= { builtin_maria_binlog_plugin, builtin_maria_mysql_password_plugin, +#ifdef WITH_WSREP + builtin_wsrep_plugin, +#endif /* WITH_WSREP */ @mysql_mandatory_plugins@ 0 }; diff --git a/sql/sql_class.cc b/sql/sql_class.cc index 3d1476715e5..d41c7e58e67 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -64,6 +64,8 @@ #include "sql_parse.h" // is_update_query #include "sql_callback.h" #include "lock.h" +#include "wsrep_mysqld.h" +#include "wsrep_thd.h" #include "sql_connect.h" /* @@ -859,9 +861,26 @@ bool Drop_table_error_handler::handle_condition(THD *thd, } -THD::THD() - :Statement(&main_lex, &main_mem_root, STMT_CONVENTIONAL_EXECUTION, - /* statement id */ 0), +/** + Send timeout to thread. + + Note that this is always safe as the thread will always remove it's + timeouts at end of query (and thus before THD is destroyed) +*/ + +extern "C" void thd_kill_timeout(THD* thd) +{ + thd->status_var.max_statement_time_exceeded++; + mysql_mutex_lock(&thd->LOCK_thd_data); + /* Kill queries that can't cause data corruptions */ + thd->awake(KILL_TIMEOUT); + mysql_mutex_unlock(&thd->LOCK_thd_data); +} + + +THD::THD(bool is_wsrep_applier) + :Statement(&main_lex, &main_mem_root, STMT_CONVENTIONAL_EXECUTION, + /* statement id */ 0), rli_fake(0), rgi_fake(0), rgi_slave(NULL), in_sub_stmt(0), log_all_errors(0), binlog_unsafe_warning_flags(0), @@ -894,8 +913,18 @@ THD::THD() debug_sync_control(0), #endif /* defined(ENABLED_DEBUG_SYNC) */ wait_for_commit_ptr(0), - main_da(0, false, false), + main_da(0, false, false), m_stmt_da(&main_da) +#ifdef WITH_WSREP + , + wsrep_applier(is_wsrep_applier), + wsrep_applier_closing(false), + wsrep_client_thread(false), + wsrep_apply_toi(false), + wsrep_po_handle(WSREP_PO_INITIALIZER), + wsrep_po_cnt(0), + wsrep_apply_format(0) +#endif { ulong tmp; @@ -1004,6 +1033,22 @@ THD::THD() m_command=COM_CONNECT; *scramble= '\0'; +#ifdef WITH_WSREP + mysql_mutex_init(key_LOCK_wsrep_thd, &LOCK_wsrep_thd, MY_MUTEX_INIT_FAST); + mysql_cond_init(key_COND_wsrep_thd, &COND_wsrep_thd, NULL); + wsrep_ws_handle.trx_id = WSREP_UNDEFINED_TRX_ID; + wsrep_ws_handle.opaque = NULL; + wsrep_retry_counter = 0; + wsrep_PA_safe = true; + wsrep_retry_query = NULL; + wsrep_retry_query_len = 0; + wsrep_retry_command = COM_CONNECT; + wsrep_consistency_check = NO_CONSISTENCY_CHECK; + wsrep_mysql_replicated = 0; + wsrep_TOI_pre_query = NULL; + wsrep_TOI_pre_query_len = 0; + wsrep_info[sizeof(wsrep_info) - 1] = '\0'; /* make sure it is 0-terminated */ +#endif /* Call to init() below requires fully initialized Open_tables_state. */ reset_open_tables_state(this); @@ -1031,6 +1076,8 @@ THD::THD() protocol_text.init(this); protocol_binary.init(this); + thr_timer_init(&query_timer, (void (*)(void*)) thd_kill_timeout, this); + tablespace_op=FALSE; /* @@ -1043,6 +1090,7 @@ THD::THD() my_rnd_init(&rand, tmp + (ulong) &rand, tmp + (ulong) ::global_query_id); substitute_null_with_insert_id = FALSE; thr_lock_info_init(&lock_info); /* safety: will be reset after start */ + lock_info.mysql_thd= (void *)this; m_internal_handler= NULL; m_binlog_invoker= INVOKER_NONE; @@ -1345,6 +1393,7 @@ extern "C" THD *_current_thd_noinline(void) return my_pthread_getspecific_ptr(THD*,THR_THD); } #endif + /* Init common variables that has to be reset on start and on change_user */ @@ -1355,8 +1404,8 @@ void THD::init(void) mysql_mutex_lock(&LOCK_global_system_variables); plugin_thdvar_init(this); /* - variables= global_system_variables above has reset - variables.pseudo_thread_id to 0. We need to correct it here to + plugin_thd_var_init() sets variables= global_system_variables, which + has reset variables.pseudo_thread_id to 0. We need to correct it here to avoid temporary tables replication failure. */ variables.pseudo_thread_id= thread_id; @@ -1387,6 +1436,24 @@ void THD::init(void) bzero((char *) &org_status_var, sizeof(org_status_var)); start_bytes_received= 0; last_commit_gtid.seq_no= 0; +#ifdef WITH_WSREP + wsrep_exec_mode= wsrep_applier ? REPL_RECV : LOCAL_STATE; + wsrep_conflict_state= NO_CONFLICT; + wsrep_query_state= QUERY_IDLE; + wsrep_last_query_id= 0; + wsrep_trx_meta.gtid= WSREP_GTID_UNDEFINED; + wsrep_trx_meta.depends_on= WSREP_SEQNO_UNDEFINED; + wsrep_converted_lock_session= false; + wsrep_retry_counter= 0; + wsrep_rli= NULL; + wsrep_rgi= NULL; + wsrep_PA_safe= true; + wsrep_consistency_check = NO_CONSISTENCY_CHECK; + wsrep_mysql_replicated = 0; + + wsrep_TOI_pre_query = NULL; + wsrep_TOI_pre_query_len = 0; +#endif if (variables.sql_log_bin) variables.option_bits|= OPTION_BIN_LOG; @@ -1582,6 +1649,13 @@ THD::~THD() mysql_mutex_lock(&LOCK_thd_data); mysql_mutex_unlock(&LOCK_thd_data); +#ifdef WITH_WSREP + mysql_mutex_lock(&LOCK_wsrep_thd); + mysql_mutex_unlock(&LOCK_wsrep_thd); + mysql_mutex_destroy(&LOCK_wsrep_thd); + if (wsrep_rli) delete wsrep_rli; + if (wsrep_rgi) delete wsrep_rgi; +#endif /* Close connection */ #ifndef EMBEDDED_LIBRARY if (net.vio) @@ -1726,6 +1800,7 @@ void add_diff_to_status(STATUS_VAR *to_var, STATUS_VAR *from_var, This is normally called from another thread's THD object. @note Do always call this while holding LOCK_thd_data. + NOT_KILLED is used to awake a thread for a slave */ void THD::awake(killed_state state_to_set) @@ -1737,6 +1812,13 @@ void THD::awake(killed_state state_to_set) print_aborted_warning(3, "KILLED"); + /* + Don't degrade killed state, for example from a KILL_CONNECTION to + STATEMENT TIMEOUT + */ + if (killed >= KILL_CONNECTION) + state_to_set= killed; + /* Set the 'killed' flag of 'this', which is the target THD object. */ killed= state_to_set; @@ -1768,6 +1850,7 @@ void THD::awake(killed_state state_to_set) mysql_mutex_lock(&mysys_var->mutex); if (!system_thread) // Don't abort locks mysys_var->abort=1; + /* This broadcast could be up in the air if the victim thread exits the cond in the time between read and broadcast, but that is @@ -1892,7 +1975,15 @@ bool THD::notify_shared_lock(MDL_context_owner *ctx_in_use, (e.g. see partitioning code). */ if (!thd_table->needs_reopen()) + { signalled|= mysql_lock_abort_for_thread(this, thd_table); + if (this && WSREP(this) && wsrep_thd_is_BF(this, FALSE)) + { + WSREP_DEBUG("remove_table_from_cache: %llu", + (unsigned long long) this->real_id); + wsrep_abort_thd((void *)this, (void *)in_use, FALSE); + } + } } mysql_mutex_unlock(&in_use->LOCK_thd_data); } @@ -1928,6 +2019,9 @@ int killed_errno(killed_state killed) case KILL_QUERY: case KILL_QUERY_HARD: DBUG_RETURN(ER_QUERY_INTERRUPTED); + case KILL_TIMEOUT: + case KILL_TIMEOUT_HARD: + DBUG_RETURN(ER_STATEMENT_TIMEOUT); case KILL_SERVER: case KILL_SERVER_HARD: DBUG_RETURN(ER_SERVER_SHUTDOWN); @@ -2074,6 +2168,12 @@ void THD::cleanup_after_query() /* reset table map for multi-table update */ table_map_for_update= 0; m_binlog_invoker= INVOKER_NONE; +#ifdef WITH_WSREP + if (TOTAL_ORDER == wsrep_exec_mode) + { + wsrep_exec_mode = LOCAL_STATE; + } +#endif /* WITH_WSREP */ #ifndef EMBEDDED_LIBRARY if (rgi_slave) @@ -2506,6 +2606,13 @@ bool sql_exchange::escaped_given(void) bool select_send::send_result_set_metadata(List<Item> &list, uint flags) { bool res; +#ifdef WITH_WSREP + if (WSREP(thd) && thd->wsrep_retry_query) + { + WSREP_DEBUG("skipping select metadata"); + return FALSE; + } +#endif /* WITH_WSREP */ if (!(res= thd->protocol->send_result_set_metadata(&list, flags))) is_result_set_started= 1; return res; @@ -3634,6 +3741,23 @@ Statement_map::~Statement_map() my_hash_free(&st_hash); } +bool my_var_user::set(THD *thd, Item *item) +{ + Item_func_set_user_var *suv= new Item_func_set_user_var(name, item); + suv->save_item_result(item); + return suv->fix_fields(thd, 0) || suv->update(); +} + +bool my_var_sp::set(THD *thd, Item *item) +{ + return thd->spcont->set_variable(thd, offset, &item); +} + +bool my_var_param::set(THD *thd, Item *item) +{ + return param->set_value(thd, 0, &item); +} + int select_dumpvar::send_data(List<Item> &items) { List_iterator_fast<my_var> var_li(var_list); @@ -3654,20 +3778,8 @@ int select_dumpvar::send_data(List<Item> &items) } while ((mv= var_li++) && (item= it++)) { - if (mv->local) - { - if (thd->spcont->set_variable(thd, mv->offset, &item)) - DBUG_RETURN(1); - } - else - { - Item_func_set_user_var *suv= new Item_func_set_user_var(mv->s, item); - suv->save_item_result(item); - if (suv->fix_fields(thd, 0)) - DBUG_RETURN (1); - if (suv->update()) - DBUG_RETURN (1); - } + if (mv->set(thd, item)) + DBUG_RETURN(1); } DBUG_RETURN(thd->is_error()); } @@ -4251,8 +4363,9 @@ extern "C" int thd_non_transactional_update(const MYSQL_THD thd) extern "C" int thd_binlog_format(const MYSQL_THD thd) { - if (mysql_bin_log.is_open() && (thd->variables.option_bits & OPTION_BIN_LOG)) - return (int) thd->variables.binlog_format; + if (((WSREP(thd) && wsrep_emulate_bin_log) || mysql_bin_log.is_open()) && + thd->variables.option_bits & OPTION_BIN_LOG) + return (int) thd->wsrep_binlog_format(); else return BINLOG_FORMAT_UNSPEC; } @@ -4982,7 +5095,7 @@ int THD::decide_logging_format(TABLE_LIST *tables) binlog by filtering rules. */ if (mysql_bin_log.is_open() && (variables.option_bits & OPTION_BIN_LOG) && - !(variables.binlog_format == BINLOG_FORMAT_STMT && + !(wsrep_binlog_format() == BINLOG_FORMAT_STMT && !binlog_filter->db_ok(db))) { /* @@ -5192,7 +5305,7 @@ int THD::decide_logging_format(TABLE_LIST *tables) */ my_error((error= ER_BINLOG_ROW_INJECTION_AND_STMT_ENGINE), MYF(0)); } - else if (variables.binlog_format == BINLOG_FORMAT_ROW && + else if (wsrep_binlog_format() == BINLOG_FORMAT_ROW && sqlcom_can_generate_row_events(this)) { /* @@ -5221,7 +5334,7 @@ int THD::decide_logging_format(TABLE_LIST *tables) else { /* binlog_format = STATEMENT */ - if (variables.binlog_format == BINLOG_FORMAT_STMT) + if (wsrep_binlog_format() == BINLOG_FORMAT_STMT) { if (lex->is_stmt_row_injection()) { @@ -5238,7 +5351,10 @@ int THD::decide_logging_format(TABLE_LIST *tables) 5. Error: Cannot modify table that uses a storage engine limited to row-logging when binlog_format = STATEMENT */ - my_error((error= ER_BINLOG_STMT_MODE_AND_ROW_ENGINE), MYF(0), ""); + if (IF_WSREP((!WSREP(this) || wsrep_exec_mode == LOCAL_STATE),1)) + { + my_error((error= ER_BINLOG_STMT_MODE_AND_ROW_ENGINE), MYF(0), ""); + } } else if (is_write && (unsafe_flags= lex->get_stmt_unsafe_flags()) != 0) { @@ -5346,11 +5462,11 @@ int THD::decide_logging_format(TABLE_LIST *tables) DBUG_PRINT("info", ("decision: no logging since " "mysql_bin_log.is_open() = %d " "and (options & OPTION_BIN_LOG) = 0x%llx " - "and binlog_format = %lu " + "and binlog_format = %u " "and binlog_filter->db_ok(db) = %d", mysql_bin_log.is_open(), (variables.option_bits & OPTION_BIN_LOG), - variables.binlog_format, + (uint) wsrep_binlog_format(), binlog_filter->db_ok(db))); #endif @@ -5586,9 +5702,10 @@ CPP_UNNAMED_NS_END int THD::binlog_write_row(TABLE* table, bool is_trans, MY_BITMAP const* cols, size_t colcnt, uchar const *record) -{ - DBUG_ASSERT(is_current_stmt_binlog_format_row() && mysql_bin_log.is_open()); +{ + DBUG_ASSERT(is_current_stmt_binlog_format_row() && + ((WSREP(this) && wsrep_emulate_bin_log) || mysql_bin_log.is_open())); /* Pack records into format for transfer. We are allocating more memory than needed, but that doesn't matter. @@ -5620,8 +5737,9 @@ int THD::binlog_update_row(TABLE* table, bool is_trans, MY_BITMAP const* cols, size_t colcnt, const uchar *before_record, const uchar *after_record) -{ - DBUG_ASSERT(is_current_stmt_binlog_format_row() && mysql_bin_log.is_open()); +{ + DBUG_ASSERT(is_current_stmt_binlog_format_row() && + ((WSREP(this) && wsrep_emulate_bin_log) || mysql_bin_log.is_open())); size_t const before_maxlen = max_row_length(table, before_record); size_t const after_maxlen = max_row_length(table, after_record); @@ -5669,8 +5787,9 @@ int THD::binlog_update_row(TABLE* table, bool is_trans, int THD::binlog_delete_row(TABLE* table, bool is_trans, MY_BITMAP const* cols, size_t colcnt, uchar const *record) -{ - DBUG_ASSERT(is_current_stmt_binlog_format_row() && mysql_bin_log.is_open()); +{ + DBUG_ASSERT(is_current_stmt_binlog_format_row() && + ((WSREP(this) && wsrep_emulate_bin_log) || mysql_bin_log.is_open())); /* Pack records into format for transfer. We are allocating more @@ -5705,7 +5824,7 @@ int THD::binlog_remove_pending_rows_event(bool clear_maps, { DBUG_ENTER("THD::binlog_remove_pending_rows_event"); - if (!mysql_bin_log.is_open()) + if(!WSREP_EMULATE_BINLOG(this) && !mysql_bin_log.is_open()) DBUG_RETURN(0); /* Ensure that all events in a GTID group are in the same cache */ @@ -5728,7 +5847,7 @@ int THD::binlog_flush_pending_rows_event(bool stmt_end, bool is_transactional) mode: it might be the case that we left row-based mode before flushing anything (e.g., if we have explicitly locked tables). */ - if (!mysql_bin_log.is_open()) + if(!WSREP_EMULATE_BINLOG(this) && !mysql_bin_log.is_open()) DBUG_RETURN(0); /* Ensure that all events in a GTID group are in the same cache */ @@ -5980,7 +6099,9 @@ int THD::binlog_query(THD::enum_binlog_query_type qtype, char const *query_arg, DBUG_ENTER("THD::binlog_query"); DBUG_PRINT("enter", ("qtype: %s query: '%-.*s'", show_query_type(qtype), (int) query_len, query_arg)); - DBUG_ASSERT(query_arg && mysql_bin_log.is_open()); + + DBUG_ASSERT(query_arg && + (WSREP_EMULATE_BINLOG(this) || mysql_bin_log.is_open())); /* If this is withing a BEGIN ... COMMIT group, don't log it */ if (variables.option_bits & OPTION_GTID_BEGIN) diff --git a/sql/sql_class.h b/sql/sql_class.h index c0636349b89..e1536071e08 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -37,6 +37,7 @@ #include "violite.h" /* vio_is_connected */ #include "thr_lock.h" /* thr_lock_type, THR_LOCK_DATA, THR_LOCK_INFO */ +#include "thr_timer.h" #include <mysql/psi/mysql_stage.h> #include <mysql/psi/mysql_statement.h> #include <mysql/psi/mysql_idle.h> @@ -50,12 +51,13 @@ void set_thd_stage_info(void *thd, const char *calling_func, const char *calling_file, const unsigned int calling_line); - + #define THD_STAGE_INFO(thd, stage) \ (thd)->enter_stage(& stage, NULL, __func__, __FILE__, __LINE__) #include "my_apc.h" #include "rpl_gtid.h" +#include "wsrep_mysqld.h" class Reprepare_observer; class Relay_log_info; @@ -448,17 +450,19 @@ enum killed_state */ ABORT_QUERY= 6, ABORT_QUERY_HARD= 7, + KILL_TIMEOUT= 8, + KILL_TIMEOUT_HARD= 9, /* All of the following killed states will kill the connection KILL_CONNECTION must be the first of these and it must start with an even number (becasue of HARD bit)! */ - KILL_CONNECTION= 8, - KILL_CONNECTION_HARD= 9, - KILL_SYSTEM_THREAD= 10, - KILL_SYSTEM_THREAD_HARD= 11, - KILL_SERVER= 12, - KILL_SERVER_HARD= 13 + KILL_CONNECTION= 10, + KILL_CONNECTION_HARD= 11, + KILL_SYSTEM_THREAD= 12, + KILL_SYSTEM_THREAD_HARD= 13, + KILL_SERVER= 14, + KILL_SERVER_HARD= 15 }; extern int killed_errno(killed_state killed); @@ -507,6 +511,7 @@ typedef struct system_variables ulonglong max_heap_table_size; ulonglong tmp_table_size; ulonglong long_query_time; + ulonglong max_statement_time; ulonglong optimizer_switch; sql_mode_t sql_mode; ///< which non-standard SQL behaviour should be enabled sql_mode_t old_behavior; ///< which old SQL behaviour should be enabled @@ -567,9 +572,6 @@ typedef struct system_variables ulong log_slow_rate_limit; ulong binlog_format; ///< binlog format for this thd (see enum_binlog_format) ulong progress_report_time; - my_bool binlog_annotate_row_events; - my_bool binlog_direct_non_trans_update; - my_bool sql_log_bin; ulong completion_type; ulong query_cache_type; ulong tx_isolation; @@ -600,7 +602,6 @@ typedef struct system_variables my_bool tx_read_only; my_bool low_priority_updates; my_bool query_cache_wlock_invalidate; - my_bool engine_condition_pushdown; my_bool keep_files_on_create; my_bool old_mode; @@ -608,6 +609,10 @@ typedef struct system_variables my_bool old_passwords; my_bool big_tables; my_bool query_cache_strip_comments; + my_bool sql_log_slow; + my_bool sql_log_bin; + my_bool binlog_annotate_row_events; + my_bool binlog_direct_non_trans_update; plugin_ref table_plugin; plugin_ref tmp_table_plugin; @@ -638,7 +643,11 @@ typedef struct system_variables ulong wt_timeout_short, wt_deadlock_search_depth_short; ulong wt_timeout_long, wt_deadlock_search_depth_long; - double long_query_time_double; + my_bool wsrep_on; + my_bool wsrep_causal_reads; + uint wsrep_sync_wait; + ulong wsrep_retry_autocommit; + double long_query_time_double, max_statement_time_double; my_bool pseudo_slave_mode; @@ -727,6 +736,7 @@ typedef struct system_status_var ulong empty_queries; ulong access_denied_errors; ulong lost_connections; + ulong max_statement_time_exceeded; /* Number of statements sent from the client */ @@ -793,6 +803,10 @@ mysqld_collation_get_by_name(const char *name, return cs; } +inline bool is_supported_parser_charset(CHARSET_INFO *cs) +{ + return MY_TEST(cs->mbminlen == 1); +} #ifdef MYSQL_SERVER @@ -1355,10 +1369,9 @@ enum enum_thread_type SYSTEM_THREAD_DELAYED_INSERT= 1, SYSTEM_THREAD_SLAVE_IO= 2, SYSTEM_THREAD_SLAVE_SQL= 4, - SYSTEM_THREAD_NDBCLUSTER_BINLOG= 8, - SYSTEM_THREAD_EVENT_SCHEDULER= 16, - SYSTEM_THREAD_EVENT_WORKER= 32, - SYSTEM_THREAD_BINLOG_BACKGROUND= 64 + SYSTEM_THREAD_EVENT_SCHEDULER= 8, + SYSTEM_THREAD_EVENT_WORKER= 16, + SYSTEM_THREAD_BINLOG_BACKGROUND= 32 }; inline char const * @@ -1371,7 +1384,6 @@ show_system_thread(enum_thread_type thread) RETURN_NAME_AS_STRING(SYSTEM_THREAD_DELAYED_INSERT); RETURN_NAME_AS_STRING(SYSTEM_THREAD_SLAVE_IO); RETURN_NAME_AS_STRING(SYSTEM_THREAD_SLAVE_SQL); - RETURN_NAME_AS_STRING(SYSTEM_THREAD_NDBCLUSTER_BINLOG); RETURN_NAME_AS_STRING(SYSTEM_THREAD_EVENT_SCHEDULER); RETURN_NAME_AS_STRING(SYSTEM_THREAD_EVENT_WORKER); default: @@ -2072,7 +2084,7 @@ public: int is_current_stmt_binlog_format_row() const { DBUG_ASSERT(current_stmt_binlog_format == BINLOG_FORMAT_STMT || current_stmt_binlog_format == BINLOG_FORMAT_ROW); - return current_stmt_binlog_format == BINLOG_FORMAT_ROW; + return WSREP_FORMAT(current_stmt_binlog_format) == BINLOG_FORMAT_ROW; } enum binlog_filter_state @@ -2756,7 +2768,8 @@ public: /* Debug Sync facility. See debug_sync.cc. */ struct st_debug_sync_control *debug_sync_control; #endif /* defined(ENABLED_DEBUG_SYNC) */ - THD(); + THD(bool is_wsrep_applier= false); + ~THD(); void init(void); @@ -3271,8 +3284,7 @@ public: tests fail and so force them to propagate the lex->binlog_row_based_if_mixed upwards to the caller. */ - if ((variables.binlog_format == BINLOG_FORMAT_MIXED) && - (in_sub_stmt == 0)) + if ((wsrep_binlog_format() == BINLOG_FORMAT_MIXED) && (in_sub_stmt == 0)) set_current_stmt_binlog_format_row(); DBUG_VOID_RETURN; @@ -3323,7 +3335,7 @@ public: show_system_thread(system_thread))); if (in_sub_stmt == 0) { - if (variables.binlog_format == BINLOG_FORMAT_ROW) + if (wsrep_binlog_format() == BINLOG_FORMAT_ROW) set_current_stmt_binlog_format_row(); else if (temporary_tables == NULL) set_current_stmt_binlog_format_stmt(); @@ -3711,8 +3723,8 @@ public: mysql_mutex_t LOCK_wakeup_ready; mysql_cond_t COND_wakeup_ready; /* - The GTID assigned to the last commit. If no GTID was assigned to any commit - so far, this is indicated by last_commit_gtid.seq_no == 0. + The GTID assigned to the last commit. If no GTID was assigned to any commit + so far, this is indicated by last_commit_gtid.seq_no == 0. */ rpl_gtid last_commit_gtid; @@ -3731,6 +3743,76 @@ public: return (temporary_tables || (rgi_slave && rgi_have_temporary_tables())); } + + inline ulong wsrep_binlog_format() const + { + return WSREP_FORMAT(variables.binlog_format); + } + +#ifdef WITH_WSREP + const bool wsrep_applier; /* dedicated slave applier thread */ + bool wsrep_applier_closing; /* applier marked to close */ + bool wsrep_client_thread; /* to identify client threads*/ + bool wsrep_PA_safe; + bool wsrep_converted_lock_session; + bool wsrep_apply_toi; /* applier processing in TOI */ + enum wsrep_exec_mode wsrep_exec_mode; + query_id_t wsrep_last_query_id; + enum wsrep_query_state wsrep_query_state; + enum wsrep_conflict_state wsrep_conflict_state; + mysql_mutex_t LOCK_wsrep_thd; + mysql_cond_t COND_wsrep_thd; + wsrep_trx_meta_t wsrep_trx_meta; + uint32 wsrep_rand; + Relay_log_info *wsrep_rli; + rpl_group_info *wsrep_rgi; + wsrep_ws_handle_t wsrep_ws_handle; + ulong wsrep_retry_counter; // of autocommit + char *wsrep_retry_query; + size_t wsrep_retry_query_len; + enum enum_server_command wsrep_retry_command; + enum wsrep_consistency_check_mode + wsrep_consistency_check; + int wsrep_mysql_replicated; + const char *wsrep_TOI_pre_query; /* a query to apply before + the actual TOI query */ + size_t wsrep_TOI_pre_query_len; + wsrep_po_handle_t wsrep_po_handle; + size_t wsrep_po_cnt; +#ifdef GTID_SUPPORT + rpl_sid wsrep_po_sid; +#endif /* GTID_SUPPORT */ + void *wsrep_apply_format; + char wsrep_info[128]; /* string for dynamic proc info */ +#endif /* WITH_WSREP */ + + /* Handling of timeouts for commands */ + thr_timer_t query_timer; +public: + void set_query_timer() + { +#ifndef EMBEDDED_LIBRARY + /* + Don't start a query timer if + - If timeouts are not set + - if we are in a stored procedure or sub statement + - If this is a slave thread + - If we already have set a timeout (happens when running prepared + statements that calls mysql_execute_command()) + */ + if (!variables.max_statement_time || spcont || in_sub_stmt || + slave_thread || query_timer.expired == 0) + return; + thr_timer_settime(&query_timer, variables.max_statement_time); +#endif + } + void reset_query_timer() + { +#ifndef EMBEDDED_LIBRARY + if (!query_timer.expired) + thr_timer_end(&query_timer); +#endif + } }; @@ -4689,21 +4771,46 @@ public: class my_var : public Sql_alloc { public: - LEX_STRING s; -#ifndef DBUG_OFF + const LEX_STRING name; + enum type { SESSION_VAR, LOCAL_VAR, PARAM_VAR }; + type scope; + my_var(const LEX_STRING& j, enum type s) : name(j), scope(s) { } + virtual ~my_var() {} + virtual bool set(THD *thd, Item *val) = 0; +}; + +class my_var_param: public my_var { +public: + Settable_routine_parameter *param; + my_var_param(Item_param *p) + : my_var(null_lex_str, PARAM_VAR), + param(p->get_settable_routine_parameter()) + { p->inout= Item_param::OUT_PARAM; } + ~my_var_param() { } + bool set(THD *thd, Item *val); +}; + +class my_var_sp: public my_var { +public: + uint offset; + enum_field_types type; /* Routine to which this Item_splocal belongs. Used for checking if correct runtime context is used for variable handling. */ sp_head *sp; -#endif - bool local; - uint offset; - enum_field_types type; - my_var (LEX_STRING& j, bool i, uint o, enum_field_types t) - :s(j), local(i), offset(o), type(t) - {} - ~my_var() {} + my_var_sp(const LEX_STRING& j, uint o, enum_field_types t, sp_head *s) + : my_var(j, LOCAL_VAR), offset(o), type(t), sp(s) { } + ~my_var_sp() { } + bool set(THD *thd, Item *val); +}; + +class my_var_user: public my_var { +public: + my_var_user(const LEX_STRING& j) + : my_var(j, SESSION_VAR) { } + ~my_var_user() { } + bool set(THD *thd, Item *val); }; class select_dumpvar :public select_result_interceptor { @@ -4846,6 +4953,11 @@ public: */ #define CF_SKIP_QUESTIONS (1U << 1) +/** + Do not check that wsrep snapshot is ready before allowing this command +*/ +#define CF_SKIP_WSREP_CHECK (1U << 2) + void mark_transaction_to_rollback(THD *thd, bool all); /* Inline functions */ diff --git a/sql/sql_cmd.h b/sql/sql_cmd.h index 231db2a1d8c..dcc0b3303d8 100644 --- a/sql/sql_cmd.h +++ b/sql/sql_cmd.h @@ -78,8 +78,7 @@ enum enum_sql_command { SQLCOM_ALTER_TABLESPACE, SQLCOM_INSTALL_PLUGIN, SQLCOM_UNINSTALL_PLUGIN, SQLCOM_SHOW_AUTHORS, SQLCOM_BINLOG_BASE64_EVENT, - SQLCOM_SHOW_PLUGINS, - SQLCOM_SHOW_CONTRIBUTORS, + SQLCOM_SHOW_PLUGINS, SQLCOM_SHOW_CONTRIBUTORS, SQLCOM_CREATE_SERVER, SQLCOM_DROP_SERVER, SQLCOM_ALTER_SERVER, SQLCOM_CREATE_EVENT, SQLCOM_ALTER_EVENT, SQLCOM_DROP_EVENT, SQLCOM_SHOW_CREATE_EVENT, SQLCOM_SHOW_EVENTS, @@ -89,11 +88,11 @@ enum enum_sql_command { SQLCOM_SIGNAL, SQLCOM_RESIGNAL, SQLCOM_SHOW_RELAYLOG_EVENTS, SQLCOM_GET_DIAGNOSTICS, - SQLCOM_SHOW_USER_STATS, SQLCOM_SHOW_TABLE_STATS, SQLCOM_SHOW_INDEX_STATS, - SQLCOM_SHOW_CLIENT_STATS, SQLCOM_SLAVE_ALL_START, SQLCOM_SLAVE_ALL_STOP, SQLCOM_SHOW_EXPLAIN, SQLCOM_SHUTDOWN, SQLCOM_CREATE_ROLE, SQLCOM_DROP_ROLE, SQLCOM_GRANT_ROLE, SQLCOM_REVOKE_ROLE, + SQLCOM_COMPOUND, + SQLCOM_SHOW_GENERIC, /* When a command is added here, be sure it's also added in mysqld.cc diff --git a/sql/sql_connect.cc b/sql/sql_connect.cc index 433f3303ad7..5b36b828b8a 100644 --- a/sql/sql_connect.cc +++ b/sql/sql_connect.cc @@ -37,6 +37,7 @@ // reset_host_errors #include "sql_acl.h" // acl_getroot, NO_ACCESS, SUPER_ACL #include "sql_callback.h" +#include "wsrep_mysqld.h" HASH global_user_stats, global_client_stats, global_table_stats; HASH global_index_stats; @@ -100,7 +101,6 @@ int get_or_create_user_conn(THD *thd, const char *user, end: mysql_mutex_unlock(&LOCK_user_conn); return return_val; - } @@ -417,6 +417,7 @@ void init_user_stats(USER_STATS *user_stats, size_t user_length, const char *priv_user, uint total_connections, + uint total_ssl_connections, uint concurrent_connections, time_t connected_time, double busy_time, @@ -436,6 +437,7 @@ void init_user_stats(USER_STATS *user_stats, ulonglong rollback_trans, ulonglong denied_connections, ulonglong lost_connections, + ulonglong max_statement_time_exceeded, ulonglong access_denied_errors, ulonglong empty_queries) { @@ -449,6 +451,7 @@ void init_user_stats(USER_STATS *user_stats, strmake_buf(user_stats->priv_user, priv_user); user_stats->total_connections= total_connections; + user_stats->total_ssl_connections= total_ssl_connections; user_stats->concurrent_connections= concurrent_connections; user_stats->connected_time= connected_time; user_stats->busy_time= busy_time; @@ -457,8 +460,10 @@ void init_user_stats(USER_STATS *user_stats, user_stats->bytes_sent= bytes_sent; user_stats->binlog_bytes_written= binlog_bytes_written; user_stats->rows_sent= rows_sent; - user_stats->rows_updated= rows_updated; user_stats->rows_read= rows_read; + user_stats->rows_inserted= rows_inserted; + user_stats->rows_deleted= rows_deleted; + user_stats->rows_updated= rows_updated; user_stats->select_commands= select_commands; user_stats->update_commands= update_commands; user_stats->other_commands= other_commands; @@ -466,64 +471,13 @@ void init_user_stats(USER_STATS *user_stats, user_stats->rollback_trans= rollback_trans; user_stats->denied_connections= denied_connections; user_stats->lost_connections= lost_connections; + user_stats->max_statement_time_exceeded= max_statement_time_exceeded; user_stats->access_denied_errors= access_denied_errors; user_stats->empty_queries= empty_queries; DBUG_VOID_RETURN; } -#ifdef COMPLETE_PATCH_NOT_ADDED_YET - -void add_user_stats(USER_STATS *user_stats, - uint total_connections, - uint concurrent_connections, - time_t connected_time, - double busy_time, - double cpu_time, - ulonglong bytes_received, - ulonglong bytes_sent, - ulonglong binlog_bytes_written, - ha_rows rows_sent, - ha_rows rows_read, - ha_rows rows_inserted, - ha_rows rows_deleted, - ha_rows rows_updated, - ulonglong select_commands, - ulonglong update_commands, - ulonglong other_commands, - ulonglong commit_trans, - ulonglong rollback_trans, - ulonglong denied_connections, - ulonglong lost_connections, - ulonglong access_denied_errors, - ulonglong empty_queries) -{ - user_stats->total_connections+= total_connections; - user_stats->concurrent_connections+= concurrent_connections; - user_stats->connected_time+= connected_time; - user_stats->busy_time+= busy_time; - user_stats->cpu_time+= cpu_time; - user_stats->bytes_received+= bytes_received; - user_stats->bytes_sent+= bytes_sent; - user_stats->binlog_bytes_written+= binlog_bytes_written; - user_stats->rows_sent+= rows_sent; - user_stats->rows_inserted+= rows_inserted; - user_stats->rows_deleted+= rows_deleted; - user_stats->rows_updated+= rows_updated; - user_stats->rows_read+= rows_read; - user_stats->select_commands+= select_commands; - user_stats->update_commands+= update_commands; - user_stats->other_commands+= other_commands; - user_stats->commit_trans+= commit_trans; - user_stats->rollback_trans+= rollback_trans; - user_stats->denied_connections+= denied_connections; - user_stats->lost_connections+= lost_connections; - user_stats->access_denied_errors+= access_denied_errors; - user_stats->empty_queries+= empty_queries; -} -#endif - - void init_global_user_stats(void) { if (my_hash_init(&global_user_stats, system_charset_info, max_connections, @@ -634,15 +588,16 @@ static bool increment_count_by_name(const char *name, size_t name_length, return TRUE; // Out of memory init_user_stats(user_stats, name, name_length, role_name, - 0, 0, // connections + 0, 0, 0, // connections 0, 0, 0, // time 0, 0, 0, // bytes sent, received and written - 0, 0, // Rows sent and read + 0, 0, // rows sent and read 0, 0, 0, // rows inserted, deleted and updated 0, 0, 0, // select, update and other commands 0, 0, // commit and rollback trans thd->status_var.access_denied_errors, 0, // lost connections + 0, // max query timeouts 0, // access denied errors 0); // empty queries @@ -653,6 +608,8 @@ static bool increment_count_by_name(const char *name, size_t name_length, } } user_stats->total_connections++; + if (thd->net.vio && thd->net.vio->type == VIO_TYPE_SSL) + user_stats->total_ssl_connections++; return FALSE; } @@ -755,6 +712,7 @@ static void update_global_user_stats_with_user(THD *thd, /* The following can only contain 0 or 1 and then connection ends */ user_stats->denied_connections+= thd->status_var.access_denied_errors; user_stats->lost_connections+= thd->status_var.lost_connections; + user_stats->max_statement_time_exceeded+= thd->status_var.max_statement_time_exceeded; } @@ -1172,6 +1130,17 @@ bool login_connection(THD *thd) void end_connection(THD *thd) { NET *net= &thd->net; +#ifdef WITH_WSREP + if (WSREP(thd)) + { + wsrep_status_t rcode= wsrep->free_connection(wsrep, thd->thread_id); + if (rcode) { + WSREP_WARN("wsrep failed to free connection context: %lu, code: %d", + thd->thread_id, rcode); + } + } + thd->wsrep_client_thread= 0; +#endif plugin_thdvar_cleanup(thd); if (thd->user_connect) @@ -1307,6 +1276,9 @@ bool thd_prepare_connection(THD *thd) (char *) thd->security_ctx->host_or_ip); prepare_new_connection_state(thd); +#ifdef WITH_WSREP + thd->wsrep_client_thread= 1; +#endif /* WITH_WSREP */ return FALSE; } @@ -1380,7 +1352,15 @@ void do_handle_one_connection(THD *thd_arg) break; } end_connection(thd); - + +#ifdef WITH_WSREP + if (WSREP(thd)) + { + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + thd->wsrep_query_state= QUERY_EXITING; + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + } +#endif end_thread: close_connection(thd); diff --git a/sql/sql_db.cc b/sql/sql_db.cc index 063b90a6780..a930cb0f12d 100644 --- a/sql/sql_db.cc +++ b/sql/sql_db.cc @@ -656,10 +656,6 @@ not_silent: query_length= thd->query_length(); DBUG_ASSERT(query); - ha_binlog_log_query(thd, 0, LOGCOM_CREATE_DB, - query, query_length, - db, ""); - if (mysql_bin_log.is_open()) { int errcode= query_error_code(thd, TRUE); @@ -735,10 +731,6 @@ bool mysql_alter_db(THD *thd, const char *db, HA_CREATE_INFO *create_info) thd->variables.collation_database= thd->db_charset; } - ha_binlog_log_query(thd, 0, LOGCOM_ALTER_DB, - thd->query(), thd->query_length(), - db, ""); - if (mysql_bin_log.is_open()) { int errcode= query_error_code(thd, TRUE); @@ -883,11 +875,6 @@ bool mysql_rm_db(THD *thd,char *db,bool if_exists, bool silent) should be dropped while the database is being cleaned, but in the event that a change in the code to remove other objects is made, these drops should still not be logged. - - Notice that the binary log have to be enabled over the call to - ha_drop_database(), since NDB otherwise detects the binary log - as disabled and will not log the drop database statement on any - other connected server. */ ha_drop_database(path); diff --git a/sql/sql_delete.cc b/sql/sql_delete.cc index 38fb897c4f8..418c1db9b21 100644 --- a/sql/sql_delete.cc +++ b/sql/sql_delete.cc @@ -642,18 +642,18 @@ cleanup: if (!transactional_table && deleted > 0) thd->transaction.stmt.modified_non_trans_table= thd->transaction.all.modified_non_trans_table= TRUE; - + /* See similar binlogging code in sql_update.cc, for comments */ if ((error < 0) || thd->transaction.stmt.modified_non_trans_table) { - if (mysql_bin_log.is_open()) + if (WSREP_EMULATE_BINLOG(thd) || mysql_bin_log.is_open()) { int errcode= 0; if (error < 0) thd->clear_error(); else errcode= query_error_code(thd, killed_status == NOT_KILLED); - + /* [binlog]: If 'handler::delete_all_rows()' was called and the storage engine does not inject the rows itself, we replicate @@ -1107,13 +1107,13 @@ void multi_delete::abort_result_set() DBUG_ASSERT(error_handled); DBUG_VOID_RETURN; } - + if (thd->transaction.stmt.modified_non_trans_table) { - /* + /* there is only side effects; to binlog with the error */ - if (mysql_bin_log.is_open()) + if (WSREP_EMULATE_BINLOG(thd) || mysql_bin_log.is_open()) { int errcode= query_error_code(thd, thd->killed == NOT_KILLED); /* possible error of writing binary log is ignored deliberately */ @@ -1289,7 +1289,7 @@ bool multi_delete::send_eof() } if ((local_error == 0) || thd->transaction.stmt.modified_non_trans_table) { - if (mysql_bin_log.is_open()) + if(WSREP_EMULATE_BINLOG(thd) || mysql_bin_log.is_open()) { int errcode= 0; if (local_error == 0) diff --git a/sql/sql_explain.cc b/sql/sql_explain.cc index 9571e5a2886..a468d16bbe4 100644 --- a/sql/sql_explain.cc +++ b/sql/sql_explain.cc @@ -1390,8 +1390,10 @@ int Explain_insert::print_explain(Explain_query *query, void delete_explain_query(LEX *lex) { + DBUG_ENTER("delete_explain_query"); delete lex->explain; lex->explain= NULL; + DBUG_VOID_RETURN; } diff --git a/sql/sql_insert.cc b/sql/sql_insert.cc index d61af758ced..2b68f7766ac 100644 --- a/sql/sql_insert.cc +++ b/sql/sql_insert.cc @@ -1014,7 +1014,7 @@ bool mysql_insert(THD *thd,TABLE_LIST *table_list, thd->transaction.stmt.modified_non_trans_table || was_insert_delayed) { - if (mysql_bin_log.is_open()) + if(WSREP_EMULATE_BINLOG(thd) || mysql_bin_log.is_open()) { int errcode= 0; if (error <= 0) @@ -1100,6 +1100,11 @@ bool mysql_insert(THD *thd,TABLE_LIST *table_list, if (error) goto abort; + if (thd->lex->analyze_stmt) + { + retval= thd->lex->explain->send_explain(thd); + goto abort; + } if (values_list.elements == 1 && (!(thd->variables.option_bits & OPTION_WARNINGS) || !thd->cuted_fields)) { @@ -3195,6 +3200,11 @@ bool Delayed_insert::handle_inserts(void) mysql_cond_broadcast(&cond_client); // If waiting clients } } + + if (WSREP((&thd))) + thd_proc_info(&thd, "insert done"); + else + thd_proc_info(&thd, 0); mysql_mutex_unlock(&mutex); /* @@ -3647,8 +3657,11 @@ bool select_insert::send_eof() DBUG_PRINT("enter", ("trans_table=%d, table_type='%s'", trans_table, table->file->table_type())); - error= (thd->locked_tables_mode <= LTM_LOCK_TABLES ? - table->file->ha_end_bulk_insert() : 0); + error = IF_WSREP((thd->wsrep_conflict_state == MUST_ABORT || + thd->wsrep_conflict_state == CERT_FAILURE) ? -1 :, ) + (thd->locked_tables_mode <= LTM_LOCK_TABLES ? + table->file->ha_end_bulk_insert() : 0); + if (!error && thd->is_error()) error= thd->get_stmt_da()->sql_errno(); @@ -3676,7 +3689,7 @@ bool select_insert::send_eof() events are in the transaction cache and will be written when ha_autocommit_or_rollback() is issued below. */ - if (mysql_bin_log.is_open() && + if ((WSREP_EMULATE_BINLOG(thd) || mysql_bin_log.is_open()) && (!error || thd->transaction.stmt.modified_non_trans_table)) { int errcode= 0; @@ -3761,7 +3774,7 @@ void select_insert::abort_result_set() { if (!can_rollback_data()) thd->transaction.all.modified_non_trans_table= TRUE; - if (mysql_bin_log.is_open()) + if(WSREP_EMULATE_BINLOG(thd) || mysql_bin_log.is_open()) { int errcode= query_error_code(thd, thd->killed == NOT_KILLED); /* error of writing binary log is ignored */ @@ -4169,7 +4182,7 @@ select_create::binlog_show_create_table(TABLE **tables, uint count) create_info->table_was_deleted); DBUG_ASSERT(result == 0); /* store_create_info() always return 0 */ - if (mysql_bin_log.is_open()) + if (WSREP_EMULATE_BINLOG(thd) || mysql_bin_log.is_open()) { int errcode= query_error_code(thd, thd->killed == NOT_KILLED); result= thd->binlog_query(THD::STMT_QUERY_TYPE, @@ -4179,6 +4192,9 @@ select_create::binlog_show_create_table(TABLE **tables, uint count) /* suppress_use */ FALSE, errcode); } + + ha_fake_trx_id(thd); + return result; } @@ -4208,6 +4224,21 @@ bool select_create::send_eof() trans_commit_stmt(thd); if (!(thd->variables.option_bits & OPTION_GTID_BEGIN)) trans_commit_implicit(thd); +#ifdef WITH_WSREP + if (WSREP_ON) + { + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + if (thd->wsrep_conflict_state != NO_CONFLICT) + { + WSREP_DEBUG("select_create commit failed, thd: %lu err: %d %s", + thd->thread_id, thd->wsrep_conflict_state, thd->query()); + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + abort_result_set(); + return TRUE; + } + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + } +#endif /* WITH_WSREP */ } else if (!thd->is_current_stmt_binlog_format_row()) table->s->table_creation_was_logged= 1; diff --git a/sql/sql_join_cache.h b/sql/sql_join_cache.h index b83ccf4b032..fa00e309623 100644 --- a/sql/sql_join_cache.h +++ b/sql/sql_join_cache.h @@ -84,7 +84,7 @@ class EXPLAIN_BKA_TYPE; For the third algorithm the accumulation of records allows to optimize fetching rows of the second operand from disk for some engines (MyISAM, InnoDB), or to minimize the number of round-trips between the Server and - the engine nodes (NDB Cluster). + the engine nodes. */ class JOIN_CACHE :public Sql_alloc diff --git a/sql/sql_lex.cc b/sql/sql_lex.cc index 1e095627245..4ef94d7dab2 100644 --- a/sql/sql_lex.cc +++ b/sql/sql_lex.cc @@ -1,5 +1,5 @@ /* Copyright (c) 2000, 2013, Oracle and/or its affiliates. - Copyright (c) 2009, 2013, Monty Program Ab. + Copyright (c) 2009, 2014, Monty Program Ab. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -504,7 +504,6 @@ void lex_start(THD *thd) lex->duplicates= DUP_ERROR; lex->ignore= 0; lex->spname= NULL; - lex->sphead= NULL; lex->spcont= NULL; lex->proc_list.first= 0; lex->escape_used= FALSE; @@ -516,8 +515,7 @@ void lex_start(THD *thd) lex->check_exists= FALSE; lex->verbose= 0; - lex->name.str= 0; - lex->name.length= 0; + lex->name= null_lex_str; lex->event_parse_data= NULL; lex->profile_options= PROFILE_NONE; lex->nest_level=0 ; @@ -561,8 +559,20 @@ void lex_end(LEX *lex) } reset_dynamic(&lex->plugins); - delete lex->sphead; - lex->sphead= NULL; + if (lex->context_analysis_only & CONTEXT_ANALYSIS_ONLY_PREPARE) + { + /* + Don't delete lex->sphead, it'll be needed for EXECUTE. + Note that of all statements that populate lex->sphead + only SQLCOM_COMPOUND can be PREPAREd + */ + DBUG_ASSERT(lex->sphead == 0 || lex->sql_command == SQLCOM_COMPOUND); + } + else + { + delete lex->sphead; + lex->sphead= NULL; + } lex->mi.reset(); @@ -1580,6 +1590,17 @@ int lex_one_token(void *arg, THD *thd) } else { +#ifdef WITH_WSREP + if (WSREP(thd) && version == 99997 && thd->wsrep_exec_mode == LOCAL_STATE) + { + WSREP_DEBUG("consistency check: %s", thd->query()); + thd->wsrep_consistency_check= CONSISTENCY_CHECK_DECLARED; + lip->yySkipn(5); + lip->set_echo(TRUE); + state=MY_LEX_START; + break; /* Do not treat contents as a comment. */ + } +#endif /* WITH_WSREP */ /* Patch and skip the conditional comment to avoid it being propagated infinitely (eg. to a slave). @@ -2545,14 +2566,13 @@ void Query_tables_list::destroy_query_tables_list() LEX::LEX() : explain(NULL), - result(0), option_type(OPT_DEFAULT), is_lex_started(0), - limit_rows_examined_cnt(ULONGLONG_MAX) + result(0), option_type(OPT_DEFAULT), sphead(0), + is_lex_started(0), limit_rows_examined_cnt(ULONGLONG_MAX) { - my_init_dynamic_array2(&plugins, sizeof(plugin_ref), - plugins_static_buffer, - INITIAL_LEX_PLUGIN_LIST_SIZE, - INITIAL_LEX_PLUGIN_LIST_SIZE, 0); + init_dynamic_array2(&plugins, sizeof(plugin_ref), plugins_static_buffer, + INITIAL_LEX_PLUGIN_LIST_SIZE, + INITIAL_LEX_PLUGIN_LIST_SIZE, 0); reset_query_tables_list(TRUE); mi.init(); } @@ -2783,7 +2803,7 @@ uint8 LEX::get_effective_with_check(TABLE_LIST *view) bool LEX::copy_db_to(char **p_db, size_t *p_db_length) const { - if (sphead) + if (sphead && sphead->m_name.str) { DBUG_ASSERT(sphead->m_db.str && sphead->m_db.length); /* @@ -3737,7 +3757,7 @@ bool SELECT_LEX::merge_subquery(THD *thd, TABLE_LIST *derived, { derived->wrap_into_nested_join(subq_select->top_join_list); - ftfunc_list->concat(subq_select->ftfunc_list); + ftfunc_list->append(subq_select->ftfunc_list); if (join || thd->lex->sql_command == SQLCOM_UPDATE_MULTI || thd->lex->sql_command == SQLCOM_DELETE_MULTI) diff --git a/sql/sql_lex.h b/sql/sql_lex.h index 5e5f36172b6..09592dfcb8f 100644 --- a/sql/sql_lex.h +++ b/sql/sql_lex.h @@ -135,6 +135,9 @@ struct sys_var_with_base #ifdef MYSQL_SERVER +extern const LEX_STRING null_lex_str; +extern const LEX_STRING empty_lex_str; + enum enum_sp_suid_behaviour { SP_IS_DEFAULT_SUID= 0, @@ -242,8 +245,7 @@ struct LEX_MASTER_INFO heartbeat_period= 0; ssl= ssl_verify_server_cert= heartbeat_opt= repl_ignore_server_ids_opt= LEX_MI_UNCHANGED; - gtid_pos_str.length= 0; - gtid_pos_str.str= NULL; + gtid_pos_str= null_lex_str; use_gtid_opt= LEX_GTID_UNCHANGED; } }; @@ -1021,9 +1023,6 @@ struct st_sp_chistics enum enum_sp_data_access daccess; }; -extern const LEX_STRING null_lex_str; -extern const LEX_STRING empty_lex_str; - struct st_trg_chistics { enum trg_action_time_type action_time; @@ -1035,9 +1034,6 @@ extern sys_var *trg_new_row_fake_var; enum xa_option_words {XA_NONE, XA_JOIN, XA_RESUME, XA_ONE_PHASE, XA_SUSPEND, XA_FOR_MIGRATE}; -extern const LEX_STRING null_lex_str; -extern const LEX_STRING empty_lex_str; - class Sroutine_hash_entry; /* @@ -2892,20 +2888,8 @@ public: }; -struct st_lex_local: public LEX +struct st_lex_local: public LEX, public Sql_alloc { - static void *operator new(size_t size) throw() - { - return sql_alloc(size); - } - static void *operator new(size_t size, MEM_ROOT *mem_root) throw() - { - return (void*) alloc_root(mem_root, (uint) size); - } - static void operator delete(void *ptr,size_t size) - { TRASH(ptr, size); } - static void operator delete(void *ptr, MEM_ROOT *mem_root) - { /* Never called */ } }; extern void lex_init(void); diff --git a/sql/sql_list.h b/sql/sql_list.h index 7538f69766d..1b672e120bd 100644 --- a/sql/sql_list.h +++ b/sql/sql_list.h @@ -244,7 +244,7 @@ public: delete *prev; *prev=node; } - inline void concat(base_list *list) + inline void append(base_list *list) { if (!list->is_empty()) { @@ -290,7 +290,7 @@ public: *prev= &end_of_list; last= prev; } - inline void prepand(base_list *list) + inline void prepend(base_list *list) { if (!list->is_empty()) { @@ -516,9 +516,9 @@ public: inline T* head() {return (T*) base_list::head(); } inline T** head_ref() {return (T**) base_list::head_ref(); } inline T* pop() {return (T*) base_list::pop(); } - inline void concat(List<T> *list) { base_list::concat(list); } + inline void append(List<T> *list) { base_list::append(list); } + inline void prepend(List<T> *list) { base_list::prepend(list); } inline void disjoin(List<T> *list) { base_list::disjoin(list); } - inline void prepand(List<T> *list) { base_list::prepand(list); } inline bool add_unique(T *a, bool (*eq)(T *a, T *b)) { return base_list::add_unique(a, (List_eq *)eq); } void delete_elements(void) diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc index ed2f76a223e..7b603a3da05 100644 --- a/sql/sql_parse.cc +++ b/sql/sql_parse.cc @@ -1,5 +1,5 @@ /* Copyright (c) 2000, 2013, Oracle and/or its affiliates. - Copyright (c) 2008, 2013, Monty Program Ab + Copyright (c) 2008, 2014, Monty Program Ab This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -105,6 +105,12 @@ #include "../storage/maria/ha_maria.h" #endif +#include "wsrep_mysqld.h" +#include "wsrep_thd.h" + +static void wsrep_mysql_parse(THD *thd, char *rawbuf, uint length, + Parser_state *parser_state); + /** @defgroup Runtime_Environment Runtime Environment @{ @@ -261,11 +267,26 @@ void init_update_queries(void) /* Initialize the server command flags array. */ memset(server_command_flags, 0, sizeof(server_command_flags)); - server_command_flags[COM_STATISTICS]= CF_SKIP_QUERY_ID | CF_SKIP_QUESTIONS; - server_command_flags[COM_PING]= CF_SKIP_QUERY_ID | CF_SKIP_QUESTIONS; + server_command_flags[COM_STATISTICS]= CF_SKIP_QUERY_ID | CF_SKIP_QUESTIONS | CF_SKIP_WSREP_CHECK; + server_command_flags[COM_PING]= CF_SKIP_QUERY_ID | CF_SKIP_QUESTIONS | CF_SKIP_WSREP_CHECK; server_command_flags[COM_STMT_PREPARE]= CF_SKIP_QUESTIONS; - server_command_flags[COM_STMT_CLOSE]= CF_SKIP_QUESTIONS; - server_command_flags[COM_STMT_RESET]= CF_SKIP_QUESTIONS; + server_command_flags[COM_STMT_CLOSE]= CF_SKIP_QUESTIONS | CF_SKIP_WSREP_CHECK; + server_command_flags[COM_STMT_RESET]= CF_SKIP_QUESTIONS | CF_SKIP_WSREP_CHECK; + + server_command_flags[COM_QUIT]= CF_SKIP_WSREP_CHECK; + server_command_flags[COM_PROCESS_INFO]= CF_SKIP_WSREP_CHECK; + server_command_flags[COM_PROCESS_KILL]= CF_SKIP_WSREP_CHECK; + server_command_flags[COM_SHUTDOWN]= CF_SKIP_WSREP_CHECK; + server_command_flags[COM_SLEEP]= CF_SKIP_WSREP_CHECK; + server_command_flags[COM_TIME]= CF_SKIP_WSREP_CHECK; + server_command_flags[COM_END]= CF_SKIP_WSREP_CHECK; + + /* + COM_QUERY and COM_SET_OPTION are allowed to pass the early COM_xxx filter, + they're checked later in mysql_execute_command(). + */ + server_command_flags[COM_QUERY]= CF_SKIP_WSREP_CHECK; + server_command_flags[COM_SET_OPTION]= CF_SKIP_WSREP_CHECK; /* Initialize the sql command flags array. */ memset(sql_command_flags, 0, sizeof(sql_command_flags)); @@ -371,6 +392,7 @@ void init_update_queries(void) sql_command_flags[SQLCOM_SHOW_EVENTS]= CF_STATUS_COMMAND | CF_REEXECUTION_FRAGILE; sql_command_flags[SQLCOM_SHOW_OPEN_TABLES]= CF_STATUS_COMMAND | CF_REEXECUTION_FRAGILE; sql_command_flags[SQLCOM_SHOW_PLUGINS]= CF_STATUS_COMMAND; + sql_command_flags[SQLCOM_SHOW_GENERIC]= CF_STATUS_COMMAND; sql_command_flags[SQLCOM_SHOW_FIELDS]= CF_STATUS_COMMAND | CF_REEXECUTION_FRAGILE; sql_command_flags[SQLCOM_SHOW_KEYS]= CF_STATUS_COMMAND | CF_REEXECUTION_FRAGILE; sql_command_flags[SQLCOM_SHOW_VARIABLES]= CF_STATUS_COMMAND | CF_REEXECUTION_FRAGILE; @@ -405,10 +427,6 @@ void init_update_queries(void) sql_command_flags[SQLCOM_SHOW_PROFILES]= CF_STATUS_COMMAND; sql_command_flags[SQLCOM_SHOW_PROFILE]= CF_STATUS_COMMAND; sql_command_flags[SQLCOM_BINLOG_BASE64_EVENT]= CF_STATUS_COMMAND | CF_CAN_GENERATE_ROW_EVENTS; - sql_command_flags[SQLCOM_SHOW_CLIENT_STATS]= CF_STATUS_COMMAND; - sql_command_flags[SQLCOM_SHOW_USER_STATS]= CF_STATUS_COMMAND; - sql_command_flags[SQLCOM_SHOW_TABLE_STATS]= CF_STATUS_COMMAND; - sql_command_flags[SQLCOM_SHOW_INDEX_STATS]= CF_STATUS_COMMAND; sql_command_flags[SQLCOM_SHOW_TABLES]= (CF_STATUS_COMMAND | CF_SHOW_TABLE_COMMAND | CF_REEXECUTION_FRAGILE); sql_command_flags[SQLCOM_SHOW_TABLE_STATUS]= (CF_STATUS_COMMAND | CF_SHOW_TABLE_COMMAND | CF_REEXECUTION_FRAGILE); @@ -452,6 +470,7 @@ void init_update_queries(void) CF_CAN_GENERATE_ROW_EVENTS | CF_OPTIMIZER_TRACE; // (1) sql_command_flags[SQLCOM_EXECUTE]= CF_CAN_GENERATE_ROW_EVENTS; + sql_command_flags[SQLCOM_COMPOUND]= CF_CAN_GENERATE_ROW_EVENTS; /* We don't want to change to statement based replication for these commands @@ -869,8 +888,23 @@ void cleanup_items(Item *item) DBUG_VOID_RETURN; } + #ifndef EMBEDDED_LIBRARY +static bool wsrep_node_is_ready(THD *thd) +{ +#ifdef WITH_WSREP + if (thd->variables.wsrep_on && !thd->wsrep_applier && !wsrep_ready) + { + my_message(ER_UNKNOWN_COM_ERROR, + "WSREP has not yet prepared node for application use", + MYF(0)); + return false; + } +#endif + return true; +} + /** Read one command from connection and execute it (query or simple command). This function is called in loop from thread function. @@ -892,6 +926,19 @@ bool do_command(THD *thd) enum enum_server_command command; DBUG_ENTER("do_command"); +#ifdef WITH_WSREP + if (WSREP(thd)) + { + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + thd->wsrep_query_state= QUERY_IDLE; + if (thd->wsrep_conflict_state==MUST_ABORT) + { + wsrep_client_rollback(thd); + } + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + } +#endif /* WITH_WSREP */ + /* indicator of uninitialized lex => normal flow of errors handling (see my_message_sql) @@ -939,6 +986,29 @@ bool do_command(THD *thd) thd->m_server_idle= TRUE; packet_length= my_net_read(net); thd->m_server_idle= FALSE; +#ifdef WITH_WSREP + if (WSREP(thd)) { + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + + /* these THD's are aborted or are aborting during being idle */ + if (thd->wsrep_conflict_state == ABORTING) + { + while (thd->wsrep_conflict_state == ABORTING) { + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + my_sleep(1000); + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + } + thd->store_globals(); + } + else if (thd->wsrep_conflict_state == ABORTED) + { + thd->store_globals(); + } + + thd->wsrep_query_state= QUERY_EXEC; + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + } +#endif /* WITH_WSREP */ if (packet_length == packet_error) { @@ -946,6 +1016,19 @@ bool do_command(THD *thd) net->error, vio_description(net->vio))); +#ifdef WITH_WSREP + if (WSREP(thd)) + { + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + if (thd->wsrep_conflict_state == MUST_ABORT) + { + DBUG_PRINT("wsrep",("aborted for wsrep rollback: %lu", thd->real_id)); + wsrep_client_rollback(thd); + } + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + } +#endif /* WITH_WSREP */ + /* Instrument this broken statement as "statement/com/error" */ thd->m_statement_psi= MYSQL_REFINE_STATEMENT(thd->m_statement_psi, com_statement_info[COM_END]. @@ -1000,12 +1083,50 @@ bool do_command(THD *thd) vio_description(net->vio), command, command_name[command].str)); + /* bail out if DB snapshot has not been installed. */ + if (!(server_command_flags[command] & CF_SKIP_WSREP_CHECK) && + !wsrep_node_is_ready(thd)) + { + thd->protocol->end_statement(); + return_value= FALSE; + goto out; + } /* Restore read timeout value */ my_net_set_read_timeout(net, thd->variables.net_read_timeout); DBUG_ASSERT(packet_length); DBUG_ASSERT(!thd->apc_target.is_enabled()); return_value= dispatch_command(command, thd, packet+1, (uint) (packet_length-1)); +#ifdef WITH_WSREP + if (WSREP(thd)) + { + while (thd->wsrep_conflict_state== RETRY_AUTOCOMMIT) + { + WSREP_DEBUG("Retry autocommit for: %s\n", thd->wsrep_retry_query); + CHARSET_INFO *current_charset = thd->variables.character_set_client; + if (!is_supported_parser_charset(current_charset)) + { + /* Do not use non-supported parser character sets */ + WSREP_WARN("Current client character set is non-supported parser " + "character set: %s", current_charset->csname); + thd->variables.character_set_client = &my_charset_latin1; + WSREP_WARN("For retry temporally setting character set to : %s", + my_charset_latin1.csname); + } + return_value= dispatch_command(command, thd, thd->wsrep_retry_query, + thd->wsrep_retry_query_len); + thd->variables.character_set_client = current_charset; + } + + if (thd->wsrep_retry_query && thd->wsrep_conflict_state != REPLAYING) + { + my_free(thd->wsrep_retry_query); + thd->wsrep_retry_query = NULL; + thd->wsrep_retry_query_len = 0; + thd->wsrep_retry_command = COM_CONNECT; + } + } +#endif /* WITH_WSREP */ DBUG_ASSERT(!thd->apc_target.is_enabled()); out: @@ -1083,6 +1204,7 @@ static my_bool deny_updates_if_read_only_option(THD *thd, DBUG_RETURN(FALSE); } + /** Perform one connection-level (COM_XXXX) command. @@ -1109,9 +1231,44 @@ bool dispatch_command(enum enum_server_command command, THD *thd, { NET *net= &thd->net; bool error= 0; + bool do_end_of_statement= true; DBUG_ENTER("dispatch_command"); DBUG_PRINT("info", ("command: %d", command)); + inc_thread_running(); + +#ifdef WITH_WSREP + if (WSREP(thd)) + { + if (!thd->in_multi_stmt_transaction_mode()) + { + thd->wsrep_PA_safe= true; + } + + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + thd->wsrep_query_state= QUERY_EXEC; + if (thd->wsrep_conflict_state== RETRY_AUTOCOMMIT) + { + thd->wsrep_conflict_state= NO_CONFLICT; + } + if (thd->wsrep_conflict_state== MUST_ABORT) + { + wsrep_client_rollback(thd); + } + if (thd->wsrep_conflict_state== ABORTED) + { + my_error(ER_LOCK_DEADLOCK, MYF(0), "wsrep aborted transaction"); + WSREP_DEBUG("Deadlock error for: %s", thd->query()); + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + thd->killed = NOT_KILLED; + thd->mysys_var->abort = 0; + thd->wsrep_conflict_state = NO_CONFLICT; + thd->wsrep_retry_counter = 0; + goto dispatch_end; + } + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + } +#endif /* WITH_WSREP */ #if defined(ENABLED_PROFILING) thd->profiling.start_new_query(); #endif @@ -1133,7 +1290,7 @@ bool dispatch_command(enum enum_server_command command, THD *thd, Commands which always take a long time are logged into the slow log only if opt_log_slow_admin_statements is set. */ - thd->enable_slow_log= TRUE; + thd->enable_slow_log= thd->variables.sql_log_slow; thd->query_plan_flags= QPLAN_INIT; thd->lex->sql_command= SQLCOM_END; /* to avoid confusing VIEW detectors */ @@ -1150,7 +1307,6 @@ bool dispatch_command(enum enum_server_command command, THD *thd, */ thd->set_query_id(get_query_id()); } - inc_thread_running(); if (!(server_command_flags[command] & CF_SKIP_QUESTIONS)) statistic_increment(thd->status_var.questions, &LOCK_status); @@ -1308,7 +1464,10 @@ bool dispatch_command(enum enum_server_command command, THD *thd, if (parser_state.init(thd, thd->query(), thd->query_length())) break; - mysql_parse(thd, thd->query(), thd->query_length(), &parser_state); + if (WSREP_ON) + wsrep_mysql_parse(thd, thd->query(), thd->query_length(), &parser_state); + else + mysql_parse(thd, thd->query(), thd->query_length(), &parser_state); while (!thd->killed && (parser_state.m_lip.found_semicolon != NULL) && ! thd->is_error()) @@ -1382,10 +1541,18 @@ bool dispatch_command(enum enum_server_command command, THD *thd, Count each statement from the client. */ statistic_increment(thd->status_var.questions, &LOCK_status); - thd->set_time(); /* Reset the query start time. */ + + if(!WSREP(thd)) + thd->set_time(); /* Reset the query start time. */ + parser_state.reset(beginning_of_next_stmt, length); /* TODO: set thd->lex->sql_command to SQLCOM_END here */ - mysql_parse(thd, beginning_of_next_stmt, length, &parser_state); + + if (WSREP_ON) + wsrep_mysql_parse(thd, beginning_of_next_stmt, length, &parser_state); + else + mysql_parse(thd, beginning_of_next_stmt, length, &parser_state); + } DBUG_PRINT("info",("query ready")); @@ -1518,7 +1685,7 @@ bool dispatch_command(enum enum_server_command command, THD *thd, status_var_increment(thd->status_var.com_other); - thd->enable_slow_log= opt_log_slow_admin_statements; + thd->enable_slow_log&= opt_log_slow_admin_statements; thd->query_plan_flags|= QPLAN_ADMIN; if (check_global_access(thd, REPL_SLAVE_ACL)) break; @@ -1581,8 +1748,7 @@ bool dispatch_command(enum enum_server_command command, THD *thd, else #endif { - thd->lex->relay_log_connection_name.str= (char*) ""; - thd->lex->relay_log_connection_name.length= 0; + thd->lex->relay_log_connection_name= empty_lex_str; if (reload_acl_and_cache(thd, options, (TABLE_LIST*) 0, ¬_used)) break; } @@ -1722,15 +1888,35 @@ bool dispatch_command(enum enum_server_command command, THD *thd, my_message(ER_UNKNOWN_COM_ERROR, ER(ER_UNKNOWN_COM_ERROR), MYF(0)); break; } - DBUG_ASSERT(thd->derived_tables == NULL && - (thd->open_tables == NULL || + +#ifdef WITH_WSREP + dispatch_end: + + if (WSREP(thd)) + { + /* wsrep BF abort in query exec phase */ + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + do_end_of_statement= thd->wsrep_conflict_state != REPLAYING && + thd->wsrep_conflict_state != RETRY_AUTOCOMMIT; + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + } + else + do_end_of_statement= true; + +#endif /* WITH_WSREP */ + + if (do_end_of_statement) + { + DBUG_ASSERT(thd->derived_tables == NULL && + (thd->open_tables == NULL || (thd->locked_tables_mode == LTM_LOCK_TABLES))); - thd_proc_info(thd, "updating status"); - /* Finalize server status flags after executing a command. */ - thd->update_server_status(); - thd->protocol->end_statement(); - query_cache_end_of_result(thd); + thd_proc_info(thd, "updating status"); + /* Finalize server status flags after executing a command. */ + thd->update_server_status(); + thd->protocol->end_statement(); + query_cache_end_of_result(thd); + } if (!thd->is_error() && !thd->killed_errno()) mysql_audit_general(thd, MYSQL_AUDIT_GENERAL_RESULT, 0, 0); @@ -1786,7 +1972,6 @@ void log_slow_statement(THD *thd) { DBUG_ENTER("log_slow_statement"); - /* The following should never be true with our current code base, but better to keep this here so we don't accidently try to log a @@ -1797,12 +1982,10 @@ void log_slow_statement(THD *thd) /* Follow the slow log filter configuration. */ - if (!thd->enable_slow_log || + if (!thd->enable_slow_log || !global_system_variables.sql_log_slow || (thd->variables.log_slow_filter && !(thd->variables.log_slow_filter & thd->query_plan_flags))) - { goto end; - } if (((thd->server_status & SERVER_QUERY_WAS_SLOW) || ((thd->server_status & @@ -1934,38 +2117,15 @@ int prepare_schema_table(THD *thd, LEX *lex, Table_ident *table_ident, thd->profiling.discard_current_query(); #endif break; - case SCH_USER_STATS: - case SCH_CLIENT_STATS: - if (check_global_access(thd, SUPER_ACL | PROCESS_ACL, true)) - DBUG_RETURN(1); - case SCH_TABLE_STATS: - case SCH_INDEX_STATS: - case SCH_OPEN_TABLES: - case SCH_VARIABLES: - case SCH_STATUS: - case SCH_PROCEDURES: - case SCH_CHARSETS: - case SCH_ENGINES: - case SCH_COLLATIONS: - case SCH_COLLATION_CHARACTER_SET_APPLICABILITY: - case SCH_USER_PRIVILEGES: - case SCH_SCHEMA_PRIVILEGES: - case SCH_TABLE_PRIVILEGES: - case SCH_COLUMN_PRIVILEGES: - case SCH_TABLE_CONSTRAINTS: - case SCH_KEY_COLUMN_USAGE: default: break; } SELECT_LEX *select_lex= lex->current_select; - if (make_schema_select(thd, select_lex, schema_table_idx)) - { + if (make_schema_select(thd, select_lex, get_schema_table(schema_table_idx))) DBUG_RETURN(1); - } - TABLE_LIST *table_list= select_lex->table_list.first; - table_list->schema_select_lex= schema_select_lex; - table_list->schema_table_reformed= 1; + + select_lex->table_list.first->schema_select_lex= schema_select_lex; DBUG_RETURN(0); } @@ -2183,6 +2343,54 @@ err: } +static bool do_execute_sp(THD *thd, sp_head *sp) +{ + /* bits that should be cleared in thd->server_status */ + uint bits_to_be_cleared= 0; + if (sp->m_flags & sp_head::MULTI_RESULTS) + { + if (!(thd->client_capabilities & CLIENT_MULTI_RESULTS)) + { + /* The client does not support multiple result sets being sent back */ + my_error(ER_SP_BADSELECT, MYF(0), sp->m_qname.str); + return 1; + } + /* + If SERVER_MORE_RESULTS_EXISTS is not set, + then remember that it should be cleared + */ + bits_to_be_cleared= (~thd->server_status & + SERVER_MORE_RESULTS_EXISTS); + thd->server_status|= SERVER_MORE_RESULTS_EXISTS; + } + + ha_rows select_limit= thd->variables.select_limit; + thd->variables.select_limit= HA_POS_ERROR; + + /* + We never write CALL statements into binlog: + - If the mode is non-prelocked, each statement will be logged + separately. + - If the mode is prelocked, the invoking statement will care + about writing into binlog. + So just execute the statement. + */ + int res= sp->execute_procedure(thd, &thd->lex->value_list); + + thd->variables.select_limit= select_limit; + thd->server_status&= ~bits_to_be_cleared; + + if (res) + { + DBUG_ASSERT(thd->is_error() || thd->killed); + return 1; // Substatement should already have sent error + } + + my_ok(thd, (thd->get_row_count_func() < 0) ? 0 : thd->get_row_count_func()); + return 0; +} + + /** Execute command saved in thd and lex->sql_command. @@ -2383,7 +2591,42 @@ mysql_execute_command(THD *thd) #ifdef HAVE_REPLICATION } /* endif unlikely slave */ #endif +#ifdef WITH_WSREP + if (WSREP(thd)) + { + /* + change LOCK TABLE WRITE to transaction + */ + if (lex->sql_command== SQLCOM_LOCK_TABLES && wsrep_convert_LOCK_to_trx) + { + for (TABLE_LIST *table= all_tables; table; table= table->next_global) + { + if (table->lock_type >= TL_WRITE_ALLOW_WRITE) + { + lex->sql_command= SQLCOM_BEGIN; + thd->wsrep_converted_lock_session= true; + break; + } + } + } + if (lex->sql_command== SQLCOM_UNLOCK_TABLES && + thd->wsrep_converted_lock_session) + { + thd->wsrep_converted_lock_session= false; + lex->sql_command= SQLCOM_COMMIT; + lex->tx_release= TVL_NO; + } + /* + * bail out if DB snapshot has not been installed. We however, + * allow SET and SHOW queries + */ + if (lex->sql_command != SQLCOM_SET_OPTION && + !wsrep_is_show_query(lex->sql_command) && + !wsrep_node_is_ready(thd)) + goto error; + } +#endif /* WITH_WSREP */ status_var_increment(thd->status_var.com_stat[lex->sql_command]); thd->progress.report_to_client= MY_TEST(sql_command_flags[lex->sql_command] & CF_REPORT_PROGRESS); @@ -2424,13 +2667,17 @@ mysql_execute_command(THD *thd) if (!(thd->variables.option_bits & OPTION_GTID_BEGIN)) { /* Commit the normal transaction if one is active. */ - if (trans_commit_implicit(thd)) - goto error; + bool commit_failed= trans_commit_implicit(thd); /* Release metadata locks acquired in this transaction. */ thd->mdl_context.release_transactional_locks(); + if (commit_failed) + { + WSREP_DEBUG("implicit commit failed, MDL released: %lu", thd->thread_id); + goto error; + } } } - + #ifndef DBUG_OFF if (lex->sql_command != SQLCOM_SET_OPTION) DEBUG_SYNC(thd,"before_execute_sql_command"); @@ -2474,6 +2721,9 @@ mysql_execute_command(THD *thd) goto error; } + /* Start timeouts */ + thd->set_query_timer(); + switch (lex->sql_command) { case SQLCOM_SHOW_EVENTS: @@ -2483,6 +2733,8 @@ mysql_execute_command(THD *thd) #endif case SQLCOM_SHOW_STATUS: { + if (WSREP_CLIENT(thd) && wsrep_sync_wait(thd)) + goto error; execute_show_status(thd, all_tables); break; } @@ -2520,19 +2772,19 @@ mysql_execute_command(THD *thd) case SQLCOM_SHOW_TRIGGERS: case SQLCOM_SHOW_TABLE_STATUS: case SQLCOM_SHOW_OPEN_TABLES: - case SQLCOM_SHOW_PLUGINS: + case SQLCOM_SHOW_GENERIC: case SQLCOM_SHOW_FIELDS: case SQLCOM_SHOW_KEYS: + case SQLCOM_SELECT: + if (WSREP_CLIENT(thd) && wsrep_sync_wait(thd)) + goto error; + + case SQLCOM_SHOW_PLUGINS: case SQLCOM_SHOW_VARIABLES: case SQLCOM_SHOW_CHARSETS: case SQLCOM_SHOW_COLLATIONS: case SQLCOM_SHOW_STORAGE_ENGINES: case SQLCOM_SHOW_PROFILE: - case SQLCOM_SHOW_CLIENT_STATS: - case SQLCOM_SHOW_USER_STATS: - case SQLCOM_SHOW_TABLE_STATS: - case SQLCOM_SHOW_INDEX_STATS: - case SQLCOM_SELECT: { thd->status_var.last_query_cost= 0.0; @@ -2550,13 +2802,12 @@ mysql_execute_command(THD *thd) else res= check_access(thd, privileges_requested, any_db, NULL, NULL, 0, 0); - if (res) - break; + if (!res) + res= execute_sqlcom_select(thd, all_tables); - res= execute_sqlcom_select(thd, all_tables); break; } -case SQLCOM_PREPARE: + case SQLCOM_PREPARE: { mysql_sql_stmt_prepare(thd); break; @@ -2890,7 +3141,7 @@ case SQLCOM_PREPARE: */ if(lex->ignore) lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_CREATE_IGNORE_SELECT); - + if(lex->duplicates == DUP_REPLACE) lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_CREATE_REPLACE_SELECT); @@ -2902,9 +3153,8 @@ case SQLCOM_PREPARE: raise a warning, as it may cause problems (see 'NAME_CONST issues' in 'Binary Logging of Stored Programs') */ - if (thd->query_name_consts && - mysql_bin_log.is_open() && - thd->variables.binlog_format == BINLOG_FORMAT_STMT && + if (thd->query_name_consts && mysql_bin_log.is_open() && + thd->wsrep_binlog_format() == BINLOG_FORMAT_STMT && !mysql_bin_log.is_query_in_union(thd, thd->query_id)) { List_iterator_fast<Item> it(select_lex->item_list); @@ -2913,7 +3163,7 @@ case SQLCOM_PREPARE: /* Count SP local vars in the top-level SELECT list */ while ((item= it++)) { - if (item->is_splocal()) + if (item->get_item_splocal()) splocal_refs++; } /* @@ -3019,6 +3269,14 @@ case SQLCOM_PREPARE: } else { + /* in STATEMENT format, we probably have to replicate also temporary + tables, like mysql replication does + */ + if (WSREP(thd) && (!thd->is_current_stmt_binlog_format_row() || + !(create_info.options & HA_LEX_CREATE_TMP_TABLE))) + { + WSREP_TO_ISOLATION_BEGIN(create_table->db, create_table->table_name, NULL) + } /* Regular CREATE TABLE */ res= mysql_create_table(thd, create_table, &create_info, &alter_info); @@ -3057,12 +3315,13 @@ end_with_restore_list: DBUG_ASSERT(first_table == all_tables && first_table != 0); if (check_one_table_access(thd, INDEX_ACL, all_tables)) goto error; /* purecov: inspected */ + WSREP_TO_ISOLATION_BEGIN(first_table->db, first_table->table_name, NULL) /* Currently CREATE INDEX or DROP INDEX cause a full table rebuild and thus classify as slow administrative statements just like ALTER TABLE. */ - thd->enable_slow_log= opt_log_slow_admin_statements; + thd->enable_slow_log&= opt_log_slow_admin_statements; thd->query_plan_flags|= QPLAN_ADMIN; bzero((char*) &create_info, sizeof(create_info)); @@ -3174,6 +3433,7 @@ end_with_restore_list: #endif /* HAVE_REPLICATION */ case SQLCOM_RENAME_TABLE: { + WSREP_TO_ISOLATION_BEGIN(0, 0, first_table) if (execute_rename_table(thd, first_table, all_tables)) goto error; break; @@ -3194,13 +3454,17 @@ end_with_restore_list: #endif #endif /* EMBEDDED_LIBRARY */ case SQLCOM_SHOW_CREATE: + { DBUG_ASSERT(first_table == all_tables && first_table != 0); #ifdef DONT_ALLOW_SHOW_COMMANDS my_message(ER_NOT_ALLOWED_COMMAND, ER(ER_NOT_ALLOWED_COMMAND), MYF(0)); /* purecov: inspected */ goto error; #else - { + + if (WSREP_CLIENT(thd) && wsrep_sync_wait(thd)) + goto error; + /* Access check: SHOW CREATE TABLE require any privileges on the table level (ie @@ -3258,11 +3522,14 @@ end_with_restore_list: /* Access is granted. Execute the command. */ res= mysqld_show_create(thd, first_table); break; - } #endif + } case SQLCOM_CHECKSUM: { DBUG_ASSERT(first_table == all_tables && first_table != 0); + if (WSREP_CLIENT(thd) && wsrep_sync_wait(thd)) + goto error; + if (check_table_access(thd, SELECT_ACL, all_tables, FALSE, UINT_MAX, FALSE)) goto error; /* purecov: inspected */ @@ -3274,6 +3541,10 @@ end_with_restore_list: { ha_rows found= 0, updated= 0; DBUG_ASSERT(first_table == all_tables && first_table != 0); + if (WSREP_CLIENT(thd) && + wsrep_sync_wait(thd, WSREP_SYNC_WAIT_BEFORE_UPDATE_DELETE)) + goto error; + if (update_precheck(thd, all_tables)) break; @@ -3310,6 +3581,9 @@ end_with_restore_list: /* if we switched from normal update, rights are checked */ if (up_result != 2) { + if (WSREP_CLIENT(thd) && + wsrep_sync_wait(thd, WSREP_SYNC_WAIT_BEFORE_UPDATE_DELETE)) + goto error; if ((res= multi_update_precheck(thd, all_tables))) break; } @@ -3379,6 +3653,10 @@ end_with_restore_list: break; } case SQLCOM_REPLACE: + { + if (WSREP_CLIENT(thd) && + wsrep_sync_wait(thd, WSREP_SYNC_WAIT_BEFORE_INSERT_REPLACE)) + goto error; #ifndef DBUG_OFF if (mysql_bin_log.is_open()) { @@ -3413,10 +3691,15 @@ end_with_restore_list: DBUG_PRINT("debug", ("Just after generate_incident()")); } #endif + } case SQLCOM_INSERT: { DBUG_ASSERT(first_table == all_tables && first_table != 0); + if (WSREP_CLIENT(thd) && + wsrep_sync_wait(thd, WSREP_SYNC_WAIT_BEFORE_INSERT_REPLACE)) + goto error; + /* Since INSERT DELAYED doesn't support temporary tables, we could not pre-open temporary tables for SQLCOM_INSERT / SQLCOM_REPLACE. @@ -3471,8 +3754,20 @@ end_with_restore_list: select_result *sel_result; bool explain= MY_TEST(lex->describe); DBUG_ASSERT(first_table == all_tables && first_table != 0); + if (WSREP_CLIENT(thd) && + wsrep_sync_wait(thd, WSREP_SYNC_WAIT_BEFORE_INSERT_REPLACE)) + goto error; + if ((res= insert_precheck(thd, all_tables))) break; +#ifdef WITH_WSREP + if (WSREP(thd) && thd->wsrep_consistency_check == CONSISTENCY_CHECK_DECLARED) + { + thd->wsrep_consistency_check = CONSISTENCY_CHECK_RUNNING; + WSREP_TO_ISOLATION_BEGIN(first_table->db, first_table->table_name, NULL); + } +#endif /* WITH_WSREP */ + /* INSERT...SELECT...ON DUPLICATE KEY UPDATE/REPLACE SELECT/ INSERT...IGNORE...SELECT can be unsafe, unless ORDER BY PRIMARY KEY @@ -3563,6 +3858,10 @@ end_with_restore_list: { select_result *sel_result=lex->result; DBUG_ASSERT(first_table == all_tables && first_table != 0); + if (WSREP_CLIENT(thd) && + wsrep_sync_wait(thd, WSREP_SYNC_WAIT_BEFORE_UPDATE_DELETE)) + goto error; + if ((res= delete_precheck(thd, all_tables))) break; DBUG_ASSERT(select_lex->offset_limit == 0); @@ -3619,6 +3918,9 @@ end_with_restore_list: DBUG_ASSERT(first_table == all_tables && first_table != 0); TABLE_LIST *aux_tables= thd->lex->auxiliary_table_list.first; multi_delete *result; + if (WSREP_CLIENT(thd) && + wsrep_sync_wait(thd, WSREP_SYNC_WAIT_BEFORE_UPDATE_DELETE)) + goto error; if ((res= multi_delete_precheck(thd, all_tables))) break; @@ -3689,6 +3991,19 @@ end_with_restore_list: /* So that DROP TEMPORARY TABLE gets to binlog at commit/rollback */ thd->variables.option_bits|= OPTION_KEEP_LOG; } + if (WSREP(thd)) + { + for (TABLE_LIST *table= all_tables; table; table= table->next_global) + { + if (!lex->drop_temporary && + (!thd->is_current_stmt_binlog_format_row() || + !find_temporary_table(thd, table))) + { + WSREP_TO_ISOLATION_BEGIN(NULL, NULL, all_tables); + break; + } + } + } /* If we are a slave, we should add IF EXISTS if the query executed on the master without an error. This will help a slave to @@ -3702,8 +4017,8 @@ end_with_restore_list: /* DDL and binlog write order are protected by metadata locks. */ res= mysql_rm_table(thd, first_table, lex->check_exists, lex->drop_temporary); + break; } - break; case SQLCOM_SHOW_PROCESSLIST: if (!thd->security_ctx->priv_user[0] && check_global_access(thd,PROCESS_ACL)) @@ -3893,6 +4208,7 @@ end_with_restore_list: #endif if (check_access(thd, CREATE_ACL, lex->name.str, NULL, NULL, 1, 0)) break; + WSREP_TO_ISOLATION_BEGIN(lex->name.str, NULL, NULL) res= mysql_create_db(thd, lex->name.str, &create_info, 0); break; } @@ -3924,6 +4240,7 @@ end_with_restore_list: #endif if (check_access(thd, DROP_ACL, lex->name.str, NULL, NULL, 1, 0)) break; + WSREP_TO_ISOLATION_BEGIN(lex->name.str, NULL, NULL) res= mysql_rm_db(thd, lex->name.str, lex->check_exists, 0); break; } @@ -3955,6 +4272,7 @@ end_with_restore_list: res= 1; break; } + WSREP_TO_ISOLATION_BEGIN(db->str, NULL, NULL) res= mysql_upgrade_db(thd, db); if (!res) my_ok(thd); @@ -3990,6 +4308,7 @@ end_with_restore_list: #endif if (check_access(thd, ALTER_ACL, db->str, NULL, NULL, 1, 0)) break; + WSREP_TO_ISOLATION_BEGIN(db->str, NULL, NULL) res= mysql_alter_db(thd, db->str, &create_info); break; } @@ -4028,6 +4347,7 @@ end_with_restore_list: if (res) break; + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL) switch (lex->sql_command) { case SQLCOM_CREATE_EVENT: { @@ -4062,6 +4382,7 @@ end_with_restore_list: lex->spname->m_name); break; case SQLCOM_DROP_EVENT: + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL) if (!(res= Events::drop_event(thd, lex->spname->m_db, lex->spname->m_name, lex->check_exists))) @@ -4076,6 +4397,7 @@ end_with_restore_list: if (check_access(thd, INSERT_ACL, "mysql", NULL, NULL, 1, 0)) break; #ifdef HAVE_DLOPEN + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL) if (!(res = mysql_create_function(thd, &lex->udf))) my_ok(thd); #else @@ -4091,6 +4413,7 @@ end_with_restore_list: if (check_access(thd, INSERT_ACL, "mysql", NULL, NULL, 1, 1) && check_global_access(thd,CREATE_USER_ACL)) break; + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL) /* Conditionally writes to binlog */ if (!(res= mysql_create_user(thd, lex->users_list, lex->sql_command == SQLCOM_CREATE_ROLE))) @@ -4104,6 +4427,7 @@ end_with_restore_list: check_global_access(thd,CREATE_USER_ACL)) break; /* Conditionally writes to binlog */ + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL) if (!(res= mysql_drop_user(thd, lex->users_list, lex->sql_command == SQLCOM_DROP_ROLE))) my_ok(thd); @@ -4115,6 +4439,7 @@ end_with_restore_list: check_global_access(thd,CREATE_USER_ACL)) break; /* Conditionally writes to binlog */ + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL) if (!(res= mysql_rename_user(thd, lex->users_list))) my_ok(thd); break; @@ -4126,6 +4451,7 @@ end_with_restore_list: break; /* Conditionally writes to binlog */ + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL) if (!(res = mysql_revoke_all(thd, lex->users_list))) my_ok(thd); break; @@ -4208,6 +4534,7 @@ end_with_restore_list: lex->type == TYPE_ENUM_PROCEDURE, 0)) goto error; /* Conditionally writes to binlog */ + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL) res= mysql_routine_grant(thd, all_tables, lex->type == TYPE_ENUM_PROCEDURE, lex->users_list, grants, @@ -4221,6 +4548,7 @@ end_with_restore_list: all_tables, FALSE, UINT_MAX, FALSE)) goto error; /* Conditionally writes to binlog */ + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL) res= mysql_table_grant(thd, all_tables, lex->users_list, lex->columns, lex->grant, lex->sql_command == SQLCOM_REVOKE); @@ -4236,6 +4564,7 @@ end_with_restore_list: } else { + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL) /* Conditionally writes to binlog */ res= mysql_grant(thd, select_lex->db, lex->users_list, lex->grant, lex->sql_command == SQLCOM_REVOKE, @@ -4404,6 +4733,7 @@ end_with_restore_list: able to open it (with SQLCOM_HA_OPEN) in the first place. */ unit->set_limit(select_lex); + res= mysql_ha_read(thd, first_table, lex->ha_read_mode, lex->ident.str, lex->insert_list, lex->ha_rkey_mode, select_lex->where, unit->select_limit_cnt, unit->offset_limit_cnt); @@ -4412,7 +4742,11 @@ end_with_restore_list: case SQLCOM_BEGIN: DBUG_PRINT("info", ("Executing SQLCOM_BEGIN thd: %p", thd)); if (trans_begin(thd, lex->start_transaction_opt)) + { + thd->mdl_context.release_transactional_locks(); + WSREP_DEBUG("BEGIN failed, MDL released: %lu", thd->thread_id); goto error; + } my_ok(thd); break; case SQLCOM_COMMIT: @@ -4425,9 +4759,13 @@ end_with_restore_list: bool tx_release= (lex->tx_release == TVL_YES || (thd->variables.completion_type == 2 && lex->tx_release != TVL_NO)); - if (trans_commit(thd)) - goto error; + bool commit_failed= trans_commit(thd); thd->mdl_context.release_transactional_locks(); + if (commit_failed) + { + WSREP_DEBUG("COMMIT failed, MDL released: %lu", thd->thread_id); + goto error; + } /* Begin transaction with the same isolation level. */ if (tx_chain) { @@ -4446,7 +4784,15 @@ end_with_restore_list: thd->killed= KILL_CONNECTION; thd->print_aborted_warning(3, "RELEASE"); } - my_ok(thd); +#ifdef WITH_WSREP + if (WSREP(thd) && (thd->wsrep_conflict_state != NO_CONFLICT && + thd->wsrep_conflict_state != REPLAYING)) + { + DBUG_ASSERT(thd->is_error()); // the error is already issued + } + else +#endif /* WITH_WSREP */ + my_ok(thd); break; } case SQLCOM_ROLLBACK: @@ -4459,10 +4805,14 @@ end_with_restore_list: bool tx_release= (lex->tx_release == TVL_YES || (thd->variables.completion_type == 2 && lex->tx_release != TVL_NO)); + bool rollback_failed= trans_rollback(thd); + thd->mdl_context.release_transactional_locks(); - if (trans_rollback(thd)) + if (rollback_failed) + { + WSREP_DEBUG("rollback failed, MDL released: %lu", thd->thread_id); goto error; - thd->mdl_context.release_transactional_locks(); + } /* Begin transaction with the same isolation level. */ if (tx_chain) { @@ -4478,8 +4828,15 @@ end_with_restore_list: /* Disconnect the current client connection. */ if (tx_release) thd->killed= KILL_CONNECTION; - my_ok(thd); - break; +#ifdef WITH_WSREP + if (WSREP(thd) && thd->wsrep_conflict_state != NO_CONFLICT) + { + DBUG_ASSERT(thd->is_error()); // the error is already issued + } + else +#endif /* WITH_WSREP */ + my_ok(thd); + break; } case SQLCOM_RELEASE_SAVEPOINT: if (trans_release_savepoint(thd, lex->ident)) @@ -4547,6 +4904,7 @@ end_with_restore_list: if (sp_process_definer(thd)) goto create_sp_error; + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL) res= (sp_result= sp_create_routine(thd, lex->sphead->m_type, lex->sphead)); switch (sp_result) { case SP_OK: { @@ -4672,9 +5030,6 @@ create_sp_error: } else { - ha_rows select_limit; - /* bits that should be cleared in thd->server_status */ - uint bits_to_be_cleared= 0; /* Check that the stored procedure doesn't contain Dynamic SQL and doesn't return result sets: such stored procedures can't @@ -4688,55 +5043,19 @@ create_sp_error: goto error; } - if (sp->m_flags & sp_head::MULTI_RESULTS) - { - if (! (thd->client_capabilities & CLIENT_MULTI_RESULTS)) - { - /* - The client does not support multiple result sets being sent - back - */ - my_error(ER_SP_BADSELECT, MYF(0), sp->m_qname.str); - goto error; - } - /* - If SERVER_MORE_RESULTS_EXISTS is not set, - then remember that it should be cleared - */ - bits_to_be_cleared= (~thd->server_status & - SERVER_MORE_RESULTS_EXISTS); - thd->server_status|= SERVER_MORE_RESULTS_EXISTS; - } - - select_limit= thd->variables.select_limit; - thd->variables.select_limit= HA_POS_ERROR; - - /* - We never write CALL statements into binlog: - - If the mode is non-prelocked, each statement will be logged - separately. - - If the mode is prelocked, the invoking statement will care - about writing into binlog. - So just execute the statement. - */ - res= sp->execute_procedure(thd, &lex->value_list); - - thd->variables.select_limit= select_limit; - - thd->server_status&= ~bits_to_be_cleared; - - if (!res) - { - my_ok(thd, (thd->get_row_count_func() < 0) ? 0 : thd->get_row_count_func()); - } - else - { - DBUG_ASSERT(thd->is_error() || thd->killed); - goto error; // Substatement should already have sent error - } + if (do_execute_sp(thd, sp)) + goto error; } break; } + + case SQLCOM_COMPOUND: + DBUG_ASSERT(all_tables == 0); + DBUG_ASSERT(thd->in_sub_stmt == 0); + if (do_execute_sp(thd, lex->sphead)) + goto error; + break; + case SQLCOM_ALTER_PROCEDURE: case SQLCOM_ALTER_FUNCTION: { @@ -4828,6 +5147,7 @@ create_sp_error: if (check_routine_access(thd, ALTER_PROC_ACL, db, name, lex->sql_command == SQLCOM_DROP_PROCEDURE, 0)) goto error; + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL) /* Conditionally writes to binlog */ sp_result= sp_drop_routine(thd, type, lex->spname); @@ -4945,6 +5265,7 @@ create_sp_error: Note: SQLCOM_CREATE_VIEW also handles 'ALTER VIEW' commands as specified through the thd->lex->create_view_mode flag. */ + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL) res= mysql_create_view(thd, first_table, thd->lex->create_view_mode); break; } @@ -4953,12 +5274,14 @@ create_sp_error: if (check_table_access(thd, DROP_ACL, all_tables, FALSE, UINT_MAX, FALSE)) goto error; /* Conditionally writes to binlog. */ + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL) res= mysql_drop_view(thd, first_table, thd->lex->drop_mode); break; } case SQLCOM_CREATE_TRIGGER: { /* Conditionally writes to binlog. */ + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL) res= mysql_create_or_drop_trigger(thd, all_tables, 1); break; @@ -4966,6 +5289,7 @@ create_sp_error: case SQLCOM_DROP_TRIGGER: { /* Conditionally writes to binlog. */ + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL) res= mysql_create_or_drop_trigger(thd, all_tables, 0); break; } @@ -4985,9 +5309,14 @@ create_sp_error: my_ok(thd); break; case SQLCOM_XA_COMMIT: - if (trans_xa_commit(thd)) - goto error; + { + bool commit_failed= trans_xa_commit(thd); thd->mdl_context.release_transactional_locks(); + if (commit_failed) + { + WSREP_DEBUG("XA commit failed, MDL released: %lu", thd->thread_id); + goto error; + } /* We've just done a commit, reset transaction isolation level and access mode to the session default. @@ -4996,10 +5325,16 @@ create_sp_error: thd->tx_read_only= thd->variables.tx_read_only; my_ok(thd); break; + } case SQLCOM_XA_ROLLBACK: - if (trans_xa_rollback(thd)) - goto error; + { + bool rollback_failed= trans_xa_rollback(thd); thd->mdl_context.release_transactional_locks(); + if (rollback_failed) + { + WSREP_DEBUG("XA rollback failed, MDL released: %lu", thd->thread_id); + goto error; + } /* We've just done a rollback, reset transaction isolation level and access mode to the session default. @@ -5008,6 +5343,7 @@ create_sp_error: thd->tx_read_only= thd->variables.tx_read_only; my_ok(thd); break; + } case SQLCOM_XA_RECOVER: res= mysql_xa_recover(thd); break; @@ -5018,11 +5354,13 @@ create_sp_error: my_ok(thd); break; case SQLCOM_INSTALL_PLUGIN: + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL) if (! (res= mysql_install_plugin(thd, &thd->lex->comment, &thd->lex->ident))) my_ok(thd); break; case SQLCOM_UNINSTALL_PLUGIN: + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL) if (! (res= mysql_uninstall_plugin(thd, &thd->lex->comment, &thd->lex->ident))) my_ok(thd); @@ -5133,6 +5471,7 @@ error: finish: + thd->reset_query_timer(); DBUG_ASSERT(!thd->in_active_multi_stmt_transaction() || thd->in_multi_stmt_transaction_mode()); @@ -5171,6 +5510,9 @@ finish: /* Free tables */ close_thread_tables(thd); +#ifdef WITH_WSREP + thd->wsrep_consistency_check= NO_CONSISTENCY_CHECK; +#endif /* WITH_WSREP */ #ifndef DBUG_OFF if (lex->sql_command != SQLCOM_SET_OPTION && ! thd->in_sub_stmt) @@ -5224,6 +5566,22 @@ finish: { thd->mdl_context.release_statement_locks(); } + WSREP_TO_ISOLATION_END; + +#ifdef WITH_WSREP + /* + Force release of transactional locks if not in active MST and wsrep is on. + */ + if (WSREP(thd) && + ! thd->in_sub_stmt && + ! thd->in_active_multi_stmt_transaction() && + thd->mdl_context.has_transactional_locks()) + { + WSREP_DEBUG("Forcing release of transactional locks for thd %lu", + thd->thread_id); + thd->mdl_context.release_transactional_locks(); + } +#endif /* WITH_WSREP */ DBUG_RETURN(res || thd->is_error()); } @@ -5332,6 +5690,7 @@ static bool execute_sqlcom_select(THD *thd, TABLE_LIST *all_tables) status_var_increment(thd->status_var.empty_queries); else status_var_add(thd->status_var.rows_sent, thd->get_sent_row_count()); + return res; } @@ -6170,6 +6529,21 @@ void THD::reset_for_next_command() thd->auto_inc_intervals_in_cur_stmt_for_binlog.empty(); thd->stmt_depends_on_first_successful_insert_id_in_prev_stmt= 0; +#ifdef WITH_WSREP + /* + Autoinc variables should be adjusted only for locally executed + transactions. Appliers and replayers are either processing ROW + events or get autoinc variable values from Query_log_event. + */ + if (WSREP(thd) && thd->wsrep_exec_mode == LOCAL_STATE && + wsrep_auto_increment_control) + { + thd->variables.auto_increment_offset= + global_system_variables.auto_increment_offset; + thd->variables.auto_increment_increment= + global_system_variables.auto_increment_increment; + } +#endif /* WITH_WSREP */ thd->query_start_used= 0; thd->query_start_sec_part_used= 0; thd->is_fatal_error= thd->time_zone_used= 0; @@ -6336,7 +6710,7 @@ void create_select_for_variable(const char *var_name) { THD *thd; LEX *lex; - LEX_STRING tmp, null_lex_string; + LEX_STRING tmp; Item *var; char buff[MAX_SYS_VAR_LENGTH*2+4+8], *end; DBUG_ENTER("create_select_for_variable"); @@ -6347,12 +6721,11 @@ void create_select_for_variable(const char *var_name) lex->sql_command= SQLCOM_SELECT; tmp.str= (char*) var_name; tmp.length=strlen(var_name); - bzero((char*) &null_lex_string.str, sizeof(null_lex_string)); /* We set the name of Item to @@session.var_name because that then is used as the column name in the output. */ - if ((var= get_system_var(thd, OPT_SESSION, tmp, null_lex_string))) + if ((var= get_system_var(thd, OPT_SESSION, tmp, null_lex_str))) { end= strxmov(buff, "@@session.", var_name, NullS); var->set_name(buff, end-buff, system_charset_info); @@ -6373,6 +6746,108 @@ void mysql_init_multi_delete(LEX *lex) lex->query_tables_last= &lex->query_tables; } +static void wsrep_mysql_parse(THD *thd, char *rawbuf, uint length, + Parser_state *parser_state) +{ +#ifdef WITH_WSREP + bool is_autocommit= + !thd->in_multi_stmt_transaction_mode() && + thd->wsrep_conflict_state == NO_CONFLICT && + !thd->wsrep_applier; + + do + { + if (thd->wsrep_conflict_state== RETRY_AUTOCOMMIT) + { + thd->wsrep_conflict_state= NO_CONFLICT; + /* Performance Schema Interface instrumentation, begin */ + thd->m_statement_psi= MYSQL_REFINE_STATEMENT(thd->m_statement_psi, + com_statement_info[thd->get_command()].m_key); + MYSQL_SET_STATEMENT_TEXT(thd->m_statement_psi, thd->query(), + thd->query_length()); + } + mysql_parse(thd, rawbuf, length, parser_state); + + if (WSREP(thd)) { + /* wsrep BF abort in query exec phase */ + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + if (thd->wsrep_conflict_state == MUST_ABORT) { + wsrep_client_rollback(thd); + + WSREP_DEBUG("abort in exec query state, avoiding autocommit"); + } + + if (thd->wsrep_conflict_state== MUST_REPLAY) + { + wsrep_replay_transaction(thd); + } + + /* setting error code for BF aborted trxs */ + if (thd->wsrep_conflict_state == ABORTED || + thd->wsrep_conflict_state == CERT_FAILURE) + { + mysql_reset_thd_for_next_command(thd); + thd->killed= NOT_KILLED; + if (is_autocommit && + thd->lex->sql_command != SQLCOM_SELECT && + (thd->wsrep_retry_counter < thd->variables.wsrep_retry_autocommit)) + { + WSREP_DEBUG("wsrep retrying AC query: %s", + (thd->query()) ? thd->query() : "void"); + + /* Performance Schema Interface instrumentation, end */ + MYSQL_END_STATEMENT(thd->m_statement_psi, thd->get_stmt_da()); + thd->m_statement_psi= NULL; + close_thread_tables(thd); + + thd->wsrep_conflict_state= RETRY_AUTOCOMMIT; + thd->wsrep_retry_counter++; // grow + wsrep_copy_query(thd); + thd->set_time(); + parser_state->reset(rawbuf, length); + } + else + { + WSREP_DEBUG("%s, thd: %lu is_AC: %d, retry: %lu - %lu SQL: %s", + (thd->wsrep_conflict_state == ABORTED) ? + "BF Aborted" : "cert failure", + thd->thread_id, is_autocommit, thd->wsrep_retry_counter, + thd->variables.wsrep_retry_autocommit, thd->query()); + my_error(ER_LOCK_DEADLOCK, MYF(0), "wsrep aborted transaction"); + thd->killed= NOT_KILLED; + thd->wsrep_conflict_state= NO_CONFLICT; + if (thd->wsrep_conflict_state != REPLAYING) + thd->wsrep_retry_counter= 0; // reset + } + } + else + { + set_if_smaller(thd->wsrep_retry_counter, 0); // reset; eventually ok + } + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + } + + /* If retry is requested clean up explain structure */ + if (thd->wsrep_conflict_state == RETRY_AUTOCOMMIT && thd->lex->explain) + delete_explain_query(thd->lex); + + } while (thd->wsrep_conflict_state== RETRY_AUTOCOMMIT); + + if (thd->wsrep_retry_query) + { + WSREP_DEBUG("releasing retry_query: conf %d sent %d kill %d errno %d SQL %s", + thd->wsrep_conflict_state, + thd->get_stmt_da()->is_sent(), + thd->killed, + thd->get_stmt_da()->is_error() ? thd->get_stmt_da()->sql_errno() : 0, + thd->wsrep_retry_query); + my_free(thd->wsrep_retry_query); + thd->wsrep_retry_query = NULL; + thd->wsrep_retry_query_len = 0; + thd->wsrep_retry_command = COM_CONNECT; + } +#endif /* WITH_WSREP */ +} /* When you modify mysql_parse(), you may need to mofify @@ -7402,8 +7877,9 @@ kill_one_thread(THD *thd, longlong id, killed_state kill_signal, killed_type typ faster and do a harder kill than KILL_SYSTEM_THREAD; */ - if ((thd->security_ctx->master_access & SUPER_ACL) || - thd->security_ctx->user_matches(tmp->security_ctx)) + if (((thd->security_ctx->master_access & SUPER_ACL) || + thd->security_ctx->user_matches(tmp->security_ctx)) && + !wsrep_thd_is_BF(tmp, true)) { tmp->awake(kill_signal); error=0; @@ -8274,8 +8750,7 @@ LEX_USER *create_definer(THD *thd, LEX_STRING *user_name, LEX_STRING *host_name) definer->user= *user_name; definer->host= *host_name; - definer->password.str= NULL; - definer->password.length= 0; + definer->password= null_lex_str; return definer; } diff --git a/sql/sql_parse.h b/sql/sql_parse.h index 926a4d800ad..da024a5e746 100644 --- a/sql/sql_parse.h +++ b/sql/sql_parse.h @@ -148,15 +148,6 @@ inline bool check_identifier_name(LEX_STRING *str) return check_identifier_name(str, NAME_CHAR_LEN, 0, ""); } - -/* - check_access() is needed for the connect engine. - It cannot be inlined - it must be exported. -*/ -bool check_access(THD *thd, ulong want_access, const char *db, ulong *save_priv, - GRANT_INTERNAL_INFO *grant_internal_info, - bool dont_check_global_grants, bool no_errors); - #ifndef NO_EMBEDDED_ACCESS_CHECKS bool check_one_table_access(THD *thd, ulong privilege, TABLE_LIST *tables); bool check_single_table_access(THD *thd, ulong privilege, @@ -194,14 +185,4 @@ check_table_access(THD *thd, ulong requirements,TABLE_LIST *tables, { return false; } #endif /*NO_EMBEDDED_ACCESS_CHECKS*/ -/* These were under the INNODB_COMPATIBILITY_HOOKS */ - -bool check_global_access(THD *thd, ulong want_access, bool no_errors= false); - -inline bool is_supported_parser_charset(CHARSET_INFO *cs) -{ - return MY_TEST(cs->mbminlen == 1); -} - - #endif /* SQL_PARSE_INCLUDED */ diff --git a/sql/sql_partition.cc b/sql/sql_partition.cc index 1ce952b9030..9bc8147c75f 100644 --- a/sql/sql_partition.cc +++ b/sql/sql_partition.cc @@ -6887,7 +6887,7 @@ uint fast_alter_partition_table(THD *thd, TABLE *table, 1) Write the new frm, pack it and then delete it 2) Perform the change within the handler */ - if (mysql_write_frm(lpt, WFRM_WRITE_SHADOW | WFRM_PACK_FRM) || + if (mysql_write_frm(lpt, WFRM_WRITE_SHADOW) || mysql_change_partitions(lpt)) { goto err; diff --git a/sql/sql_partition_admin.cc b/sql/sql_partition_admin.cc index 8c59febeb77..8755ec47c54 100644 --- a/sql/sql_partition_admin.cc +++ b/sql/sql_partition_admin.cc @@ -1,4 +1,5 @@ /* Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved. + Copyright (c) 2014, SkySQL Ab. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -128,7 +129,7 @@ static bool check_exchange_partition(TABLE *table, TABLE *part_table) { /* Only allowed on partitioned tables throught the generic ha_partition - handler, i.e not yet for native partitioning (NDB). + handler, i.e not yet for native partitioning. */ my_error(ER_PARTITION_MGMT_ON_NONPARTITIONED, MYF(0)); DBUG_RETURN(TRUE); @@ -763,14 +764,26 @@ bool Sql_cmd_alter_table_truncate_partition::execute(THD *thd) if (check_one_table_access(thd, DROP_ACL, first_table)) DBUG_RETURN(TRUE); +#ifdef WITH_WSREP + if (WSREP_ON) + { + TABLE *find_temporary_table(THD *thd, const TABLE_LIST *tl); + + if ((!thd->is_current_stmt_binlog_format_row() || + !find_temporary_table(thd, first_table)) && + wsrep_to_isolation_begin( + thd, first_table->db, first_table->table_name, NULL) + ) + { + WSREP_WARN("ALTER TABLE isolation failure"); + DBUG_RETURN(TRUE); + } + } +#endif /* WITH_WSREP */ + if (open_tables(thd, &first_table, &table_counter, 0)) DBUG_RETURN(true); - /* - TODO: Add support for TRUNCATE PARTITION for NDB and other - engines supporting native partitioning. - */ - if (!first_table->table || first_table->view || first_table->table->s->db_type() != partition_hton) { diff --git a/sql/sql_plugin.cc b/sql/sql_plugin.cc index b2dd38ad720..9ae3d792744 100644 --- a/sql/sql_plugin.cc +++ b/sql/sql_plugin.cc @@ -175,7 +175,6 @@ static struct { "performance_schema", PLUGIN_FORCE }, /* we disable few other plugins by default */ - { "ndbcluster", PLUGIN_OFF }, { "feedback", PLUGIN_OFF } }; @@ -247,41 +246,27 @@ struct st_mysql_sys_var MYSQL_PLUGIN_VAR_HEADER; }; -static SHOW_TYPE pluginvar_show_type(st_mysql_sys_var *plugin_var); - - /* sys_var class for access to all plugin variables visible to the user */ -class sys_var_pluginvar: public sys_var +class sys_var_pluginvar: public sys_var, public Sql_alloc { public: struct st_plugin_int *plugin; struct st_mysql_sys_var *plugin_var; - static void *operator new(size_t size, MEM_ROOT *mem_root) - { return (void*) alloc_root(mem_root, size); } - static void operator delete(void *ptr_arg,size_t size) - { TRASH(ptr_arg, size); } sys_var_pluginvar(sys_var_chain *chain, const char *name_arg, - struct st_mysql_sys_var *plugin_var_arg) - :sys_var(chain, name_arg, plugin_var_arg->comment, - (plugin_var_arg->flags & PLUGIN_VAR_THDLOCAL ? SESSION : GLOBAL) | - (plugin_var_arg->flags & PLUGIN_VAR_READONLY ? READONLY : 0), - 0, -1, NO_ARG, pluginvar_show_type(plugin_var_arg), 0, 0, - VARIABLE_NOT_IN_BINLOG, NULL, NULL, NULL), - plugin_var(plugin_var_arg) - { plugin_var->name= name_arg; } + st_plugin_int *p, st_mysql_sys_var *plugin_var_arg); sys_var_pluginvar *cast_pluginvar() { return this; } - bool check_update_type(Item_result type); - SHOW_TYPE show_type(); uchar* real_value_ptr(THD *thd, enum_var_type type); TYPELIB* plugin_var_typelib(void); - uchar* do_value_ptr(THD *thd, enum_var_type type, LEX_STRING *base); - uchar* session_value_ptr(THD *thd, LEX_STRING *base) + uchar* do_value_ptr(THD *thd, enum_var_type type, const LEX_STRING *base); + uchar* session_value_ptr(THD *thd, const LEX_STRING *base) { return do_value_ptr(thd, OPT_SESSION, base); } - uchar* global_value_ptr(THD *thd, LEX_STRING *base) + uchar* global_value_ptr(THD *thd, const LEX_STRING *base) { return do_value_ptr(thd, OPT_GLOBAL, base); } + uchar *default_value_ptr(THD *thd) + { return do_value_ptr(thd, OPT_DEFAULT, 0); } bool do_check(THD *thd, set_var *var); virtual void session_save_default(THD *thd, set_var *var) {} virtual void global_save_default(THD *thd, set_var *var) {} @@ -1151,7 +1136,7 @@ static void plugin_deinitialize(struct st_plugin_int *plugin, bool ref_check) historical ndb behavior caused MySQL plugins to specify status var names in full, with the plugin name prefix. this was never fixed in MySQL. - MariaDB fixes that but support MySQL style too. + MariaDB fixes that but supports MySQL style too. */ SHOW_VAR *show_vars= plugin->plugin->status_vars; SHOW_VAR tmp_array[2]= { @@ -1183,10 +1168,6 @@ static void plugin_deinitialize(struct st_plugin_int *plugin, bool ref_check) } plugin->state= PLUGIN_IS_UNINITIALIZED; - /* - We do the check here because NDB has a worker THD which doesn't - exit until NDB is shut down. - */ if (ref_check && plugin->ref_count) sql_print_error("Plugin '%s' has ref_count=%d after deinitialization.", plugin->name.str, plugin->ref_count); @@ -1393,7 +1374,7 @@ static int plugin_initialize(MEM_ROOT *tmp_root, struct st_plugin_int *plugin, historical ndb behavior caused MySQL plugins to specify status var names in full, with the plugin name prefix. this was never fixed in MySQL. - MariaDB fixes that, but supports MySQL style too. + MariaDB fixes that but supports MySQL style too. */ SHOW_VAR *show_vars= plugin->plugin->status_vars; SHOW_VAR tmp_array[2]= { @@ -1407,22 +1388,6 @@ static int plugin_initialize(MEM_ROOT *tmp_root, struct st_plugin_int *plugin, goto err; } - /* - set the plugin attribute of plugin's sys vars so they are pointing - to the active plugin - */ - if (plugin->system_vars) - { - sys_var_pluginvar *var= plugin->system_vars->cast_pluginvar(); - for (;;) - { - var->plugin= plugin; - if (!var->next) - break; - var= var->next->cast_pluginvar(); - } - } - ret= 0; err: @@ -3074,12 +3039,12 @@ void plugin_thdvar_init(THD *thd) plugin_ref old_table_plugin= thd->variables.table_plugin; plugin_ref old_tmp_table_plugin= thd->variables.tmp_table_plugin; DBUG_ENTER("plugin_thdvar_init"); - + // This function may be called many times per THD (e.g. on COM_CHANGE_USER) thd->variables.table_plugin= NULL; thd->variables.tmp_table_plugin= NULL; cleanup_variables(thd, &thd->variables); - + thd->variables= global_system_variables; /* we are going to allocate these lazily */ @@ -3087,15 +3052,22 @@ void plugin_thdvar_init(THD *thd) thd->variables.dynamic_variables_size= 0; thd->variables.dynamic_variables_ptr= 0; - mysql_mutex_lock(&LOCK_plugin); - thd->variables.table_plugin= + if (IF_WSREP((!WSREP(thd) || !thd->wsrep_applier),1)) + { + mysql_mutex_lock(&LOCK_plugin); + thd->variables.table_plugin= intern_plugin_lock(NULL, global_system_variables.table_plugin); - if (global_system_variables.tmp_table_plugin) - thd->variables.tmp_table_plugin= + if (global_system_variables.tmp_table_plugin) + thd->variables.tmp_table_plugin= intern_plugin_lock(NULL, global_system_variables.tmp_table_plugin); - intern_plugin_unlock(NULL, old_table_plugin); - intern_plugin_unlock(NULL, old_tmp_table_plugin); - mysql_mutex_unlock(&LOCK_plugin); + intern_plugin_unlock(NULL, old_table_plugin); + intern_plugin_unlock(NULL, old_tmp_table_plugin); + mysql_mutex_unlock(&LOCK_plugin); + } else { + thd->variables.table_plugin= NULL; + thd->variables.tmp_table_plugin= NULL; + } + DBUG_VOID_RETURN; } @@ -3213,7 +3185,7 @@ static void plugin_vars_free_values(sys_var *vars) DBUG_VOID_RETURN; } -static SHOW_TYPE pluginvar_show_type(st_mysql_sys_var *plugin_var) +static SHOW_TYPE pluginvar_show_type(const st_mysql_sys_var *plugin_var) { switch (plugin_var->flags & (PLUGIN_VAR_TYPEMASK | PLUGIN_VAR_UNSIGNED)) { case PLUGIN_VAR_BOOL: @@ -3244,29 +3216,29 @@ static SHOW_TYPE pluginvar_show_type(st_mysql_sys_var *plugin_var) } -bool sys_var_pluginvar::check_update_type(Item_result type) +static int pluginvar_sysvar_flags(const st_mysql_sys_var *p) { - switch (plugin_var->flags & PLUGIN_VAR_TYPEMASK) { - case PLUGIN_VAR_INT: - case PLUGIN_VAR_LONG: - case PLUGIN_VAR_LONGLONG: - return type != INT_RESULT; - case PLUGIN_VAR_STR: - return type != STRING_RESULT; - case PLUGIN_VAR_ENUM: - case PLUGIN_VAR_BOOL: - case PLUGIN_VAR_SET: - return type != STRING_RESULT && type != INT_RESULT; - case PLUGIN_VAR_DOUBLE: - return type != INT_RESULT && type != REAL_RESULT && type != DECIMAL_RESULT; - default: - return true; - } + return (p->flags & PLUGIN_VAR_THDLOCAL ? sys_var::SESSION : sys_var::GLOBAL) + | (p->flags & PLUGIN_VAR_READONLY ? sys_var::READONLY : 0); } +sys_var_pluginvar::sys_var_pluginvar(sys_var_chain *chain, const char *name_arg, + st_plugin_int *p, st_mysql_sys_var *pv) + : sys_var(chain, name_arg, pv->comment, pluginvar_sysvar_flags(pv), + 0, pv->flags & PLUGIN_VAR_NOCMDOPT ? -1 : 0, NO_ARG, + pluginvar_show_type(pv), 0, + NULL, VARIABLE_NOT_IN_BINLOG, NULL, NULL, NULL), + plugin(p), plugin_var(pv) +{ + plugin_var->name= name_arg; + plugin_opt_set_limits(&option, pv); +} uchar* sys_var_pluginvar::real_value_ptr(THD *thd, enum_var_type type) { + if (type == OPT_DEFAULT) + return (uchar*)&option.def_value; + DBUG_ASSERT(thd || (type == OPT_GLOBAL)); if (plugin_var->flags & PLUGIN_VAR_THDLOCAL) { @@ -3298,7 +3270,7 @@ TYPELIB* sys_var_pluginvar::plugin_var_typelib(void) uchar* sys_var_pluginvar::do_value_ptr(THD *thd, enum_var_type type, - LEX_STRING *base) + const LEX_STRING *base) { uchar* result; @@ -3335,7 +3307,7 @@ bool sys_var_pluginvar::session_update(THD *thd, set_var *var) DBUG_ASSERT(thd == current_thd); mysql_mutex_lock(&LOCK_global_system_variables); - void *tgt= real_value_ptr(thd, var->type); + void *tgt= real_value_ptr(thd, OPT_SESSION); const void *src= var->value ? (void*)&var->save_result : (void*)real_value_ptr(thd, OPT_GLOBAL); mysql_mutex_unlock(&LOCK_global_system_variables); @@ -3350,7 +3322,7 @@ bool sys_var_pluginvar::global_update(THD *thd, set_var *var) DBUG_ASSERT(!is_readonly()); mysql_mutex_assert_owner(&LOCK_global_system_variables); - void *tgt= real_value_ptr(thd, var->type); + void *tgt= real_value_ptr(thd, OPT_GLOBAL); const void *src= &var->save_result; if (!var->value) @@ -3613,12 +3585,6 @@ static int construct_options(MEM_ROOT *mem_root, struct st_plugin_int *tmp, options+= 2; } - if (!my_strcasecmp(&my_charset_latin1, plugin_name_ptr, "NDBCLUSTER")) - { - plugin_name_ptr= const_cast<char*>("ndb"); // Use legacy "ndb" prefix - plugin_name_len= 3; - } - /* Two passes as the 2nd pass will take pointer addresses for use by my_getopt and register_var() in the first pass uses realloc @@ -3829,6 +3795,17 @@ static my_option *construct_help_options(MEM_ROOT *mem_root, DBUG_RETURN(opts); } +extern "C" my_bool mark_changed(int, const struct my_option *, char *); +my_bool mark_changed(int, const struct my_option *opt, char *) +{ + if (opt->app_type) + { + sys_var *var= (sys_var*) opt->app_type; + var->value_origin= sys_var::CONFIG; + } + return 0; +} + /** Create and register system variables supplied from the plugin and assigns initial values from corresponding command line arguments. @@ -3860,21 +3837,22 @@ static int test_plugin_options(MEM_ROOT *tmp_root, struct st_plugin_int *tmp, &tmp->mem_root : &plugin_vars_mem_root; st_mysql_sys_var **opt; my_option *opts= NULL; - LEX_STRING plugin_name; - char *varname; int error; - sys_var *v __attribute__((unused)); struct st_bookmark *var; - uint len, count= EXTRA_OPTIONS; + uint len=0, count= EXTRA_OPTIONS; st_ptr_backup *tmp_backup= 0; DBUG_ENTER("test_plugin_options"); DBUG_ASSERT(tmp->plugin && tmp->name.str); - for (opt= tmp->plugin->system_vars; opt && *opt; opt++) - count+= 2; /* --{plugin}-{optname} and --plugin-{plugin}-{optname} */ - - if (count > EXTRA_OPTIONS || (*argc > 1)) + if (tmp->plugin->system_vars || (*argc > 1)) { + for (opt= tmp->plugin->system_vars; opt && *opt; opt++) + { + len++; + if (!((*opt)->flags & PLUGIN_VAR_NOCMDOPT)) + count+= 2; /* --{plugin}-{optname} and --plugin-{plugin}-{optname} */ + } + if (!(opts= (my_option*) alloc_root(tmp_root, sizeof(my_option) * count))) { sql_print_error("Out of memory for plugin '%s'.", tmp->name.str); @@ -3888,15 +3866,61 @@ static int test_plugin_options(MEM_ROOT *tmp_root, struct st_plugin_int *tmp, DBUG_RETURN(-1); } - /* - We adjust the default value to account for the hardcoded exceptions - we have set for the federated and ndbcluster storage engines. - */ + if (tmp->plugin->system_vars) + { + tmp_backup= (st_ptr_backup *)my_alloca(len * sizeof(tmp_backup[0])); + DBUG_ASSERT(tmp->nbackups == 0); + DBUG_ASSERT(tmp->ptr_backup == 0); + + for (opt= tmp->plugin->system_vars; *opt; opt++) + { + st_mysql_sys_var *o= *opt; + char *varname; + sys_var *v; + + if (o->flags & PLUGIN_VAR_NOSYSVAR) + continue; + + tmp_backup[tmp->nbackups++].save(&o->name); + if ((var= find_bookmark(tmp->name.str, o->name, o->flags))) + varname= var->key + 1; + else + { + len= tmp->name.length + strlen(o->name) + 2; + varname= (char*) alloc_root(mem_root, len); + strxmov(varname, tmp->name.str, "-", o->name, NullS); + my_casedn_str(&my_charset_latin1, varname); + convert_dash_to_underscore(varname, len-1); + } + v= new (mem_root) sys_var_pluginvar(&chain, varname, tmp, o); + if (!(o->flags & PLUGIN_VAR_NOCMDOPT)) + { + for (my_option *mo=opts; mo->name; mo++) + if (mo->app_type == o) + mo->app_type= v; + } + } + + if (tmp->nbackups) + { + size_t bytes= tmp->nbackups * sizeof(tmp->ptr_backup[0]); + tmp->ptr_backup= (st_ptr_backup *)alloc_root(mem_root, bytes); + if (!tmp->ptr_backup) + { + restore_ptr_backup(tmp->nbackups, tmp_backup); + my_afree(tmp_backup); + goto err; + } + memcpy(tmp->ptr_backup, tmp_backup, bytes); + } + my_afree(tmp_backup); + } + if (tmp->load_option != PLUGIN_FORCE && tmp->load_option != PLUGIN_FORCE_PLUS_PERMANENT) opts[0].def_value= opts[1].def_value= plugin_load_option; - error= handle_options(argc, &argv, opts, NULL); + error= handle_options(argc, &argv, opts, mark_changed); (*argc)++; /* add back one for the program name */ if (error) @@ -3917,6 +3941,8 @@ static int test_plugin_options(MEM_ROOT *tmp_root, struct st_plugin_int *tmp, disable_plugin= (plugin_load_option == PLUGIN_OFF); tmp->load_option= plugin_load_option; + error= 1; + /* If the plugin is disabled it should not be initialized. */ @@ -3925,79 +3951,32 @@ static int test_plugin_options(MEM_ROOT *tmp_root, struct st_plugin_int *tmp, if (global_system_variables.log_warnings) sql_print_information("Plugin '%s' is disabled.", tmp->name.str); - if (opts) - my_cleanup_options(opts); - DBUG_RETURN(1); - } - - if (!my_strcasecmp(&my_charset_latin1, tmp->name.str, "NDBCLUSTER")) - { - plugin_name.str= const_cast<char*>("ndb"); // Use legacy "ndb" prefix - plugin_name.length= 3; + goto err; } - else - plugin_name= tmp->name; - - error= 1; if (tmp->plugin->system_vars) { - for (len=0, opt= tmp->plugin->system_vars; *opt; len++, opt++) /* no-op */; - tmp_backup= (st_ptr_backup *)my_alloca(len * sizeof(tmp_backup[0])); - DBUG_ASSERT(tmp->nbackups == 0); - DBUG_ASSERT(tmp->ptr_backup == 0); - for (opt= tmp->plugin->system_vars; *opt; opt++) { - st_mysql_sys_var *o= *opt; - /* PLUGIN_VAR_STR command-line options without PLUGIN_VAR_MEMALLOC, point directly to values in the argv[] array. For plugins started at the server startup, argv[] array is allocated with load_defaults(), and freed when the server is shut down. But for plugins loaded with INSTALL PLUGIN, the memory allocated with load_defaults() is freed with - freed() at the end of mysql_install_plugin(). Which means we cannot + free() at the end of mysql_install_plugin(). Which means we cannot allow any pointers into that area. Thus, for all plugins loaded after the server was started, we copy string values to a plugin's memroot. */ if (mysqld_server_started && - ((o->flags & (PLUGIN_VAR_STR | PLUGIN_VAR_NOCMDOPT | - PLUGIN_VAR_MEMALLOC)) == PLUGIN_VAR_STR)) + (((*opt)->flags & (PLUGIN_VAR_STR | PLUGIN_VAR_NOCMDOPT | + PLUGIN_VAR_MEMALLOC)) == PLUGIN_VAR_STR)) { - sysvar_str_t* str= (sysvar_str_t *)o; + sysvar_str_t* str= (sysvar_str_t *)*opt; if (*str->value) *str->value= strdup_root(mem_root, *str->value); } - - if (o->flags & PLUGIN_VAR_NOSYSVAR) - continue; - tmp_backup[tmp->nbackups++].save(&o->name); - if ((var= find_bookmark(plugin_name.str, o->name, o->flags))) - v= new (mem_root) sys_var_pluginvar(&chain, var->key + 1, o); - else - { - len= plugin_name.length + strlen(o->name) + 2; - varname= (char*) alloc_root(mem_root, len); - strxmov(varname, plugin_name.str, "-", o->name, NullS); - my_casedn_str(&my_charset_latin1, varname); - convert_dash_to_underscore(varname, len-1); - v= new (mem_root) sys_var_pluginvar(&chain, varname, o); - } - DBUG_ASSERT(v); /* check that an object was actually constructed */ - } /* end for */ - - if (tmp->nbackups) - { - size_t bytes= tmp->nbackups * sizeof(tmp->ptr_backup[0]); - tmp->ptr_backup= (st_ptr_backup *)alloc_root(mem_root, bytes); - if (!tmp->ptr_backup) - { - restore_ptr_backup(tmp->nbackups, tmp_backup); - goto err; - } - memcpy(tmp->ptr_backup, tmp_backup, bytes); } if (chain.first) @@ -4011,14 +3990,11 @@ static int test_plugin_options(MEM_ROOT *tmp_root, struct st_plugin_int *tmp, } tmp->system_vars= chain.first; } - my_afree(tmp_backup); } DBUG_RETURN(0); err: - if (tmp_backup) - my_afree(tmp_backup); if (opts) my_cleanup_options(opts); DBUG_RETURN(error); diff --git a/sql/sql_plugin.h b/sql/sql_plugin.h index a0225f4a071..6b310865bba 100644 --- a/sql/sql_plugin.h +++ b/sql/sql_plugin.h @@ -17,7 +17,6 @@ #ifndef _sql_plugin_h #define _sql_plugin_h - /* the following #define adds server-only members to enum_mysql_show_type, that is defined in plugin.h diff --git a/sql/sql_plugin_services.h b/sql/sql_plugin_services.h index 38b4c4074be..399de854218 100644 --- a/sql/sql_plugin_services.h +++ b/sql/sql_plugin_services.h @@ -1,5 +1,5 @@ /* Copyright (c) 2009, 2010, Oracle and/or its affiliates. - Copyright (c) 2012, 2013, Monty Program Ab + Copyright (c) 2012, 2014, Monty Program Ab This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -16,6 +16,7 @@ /* support for Services */ #include <service_versions.h> +#include <mysql/service_wsrep.h> struct st_service_ref { const char *name; @@ -61,7 +62,20 @@ static struct thd_timezone_service_st thd_timezone_handler= { static struct my_sha1_service_st my_sha1_handler = { my_sha1, - my_sha1_multi + my_sha1_multi, + my_sha1_context_size, + my_sha1_init, + my_sha1_input, + my_sha1_result +}; + +static struct my_md5_service_st my_md5_handler = { + my_md5, + my_md5_multi, + my_md5_context_size, + my_md5_init, + my_md5_input, + my_md5_result }; static struct logger_service_st logger_service_handler= { @@ -86,6 +100,45 @@ static struct thd_error_context_service_st thd_error_conext_handler= { thd_get_error_context_description }; +static struct wsrep_service_st wsrep_handler = { + get_wsrep, + get_wsrep_certify_nonPK, + get_wsrep_debug, + get_wsrep_drupal_282555_workaround, + get_wsrep_load_data_splitting, + get_wsrep_log_conflicts, + get_wsrep_protocol_version, + wsrep_aborting_thd_contains, + wsrep_aborting_thd_enqueue, + wsrep_consistency_check, + wsrep_is_wsrep_xid, + wsrep_lock_rollback, + wsrep_on, + wsrep_post_commit, + wsrep_prepare_key, + wsrep_run_wsrep_commit, + wsrep_thd_LOCK, + wsrep_thd_UNLOCK, + wsrep_thd_awake, + wsrep_thd_conflict_state, + wsrep_thd_conflict_state_str, + wsrep_thd_exec_mode, + wsrep_thd_exec_mode_str, + wsrep_thd_get_conflict_state, + wsrep_thd_is_BF, + wsrep_thd_is_wsrep, + wsrep_thd_query, + wsrep_thd_query_state, + wsrep_thd_query_state_str, + wsrep_thd_retry_counter, + wsrep_thd_set_conflict_state, + wsrep_thd_trx_seqno, + wsrep_thd_ws_handle, + wsrep_trx_is_aborting, + wsrep_trx_order_before, + wsrep_unlock_rollback +}; + static struct st_service_ref list_of_services[]= { { "my_snprintf_service", VERSION_my_snprintf, &my_snprintf_handler }, @@ -96,8 +149,10 @@ static struct st_service_ref list_of_services[]= { "thd_kill_statement_service", VERSION_kill_statement, &thd_kill_statement_handler }, { "thd_timezone_service", VERSION_thd_timezone, &thd_timezone_handler }, { "my_sha1_service", VERSION_my_sha1, &my_sha1_handler}, + { "my_md5_service", VERSION_my_md5, &my_md5_handler}, { "logger_service", VERSION_logger, &logger_service_handler }, { "thd_autoinc_service", VERSION_thd_autoinc, &thd_autoinc_handler }, + { "wsrep_service", VERSION_wsrep, &wsrep_handler }, { "thd_error_context_service", VERSION_thd_error_context, &thd_error_conext_handler }, }; diff --git a/sql/sql_prepare.cc b/sql/sql_prepare.cc index ebceae70ee5..b1765cdda04 100644 --- a/sql/sql_prepare.cc +++ b/sql/sql_prepare.cc @@ -1,5 +1,5 @@ /* Copyright (c) 2002, 2013, Oracle and/or its affiliates. - Copyright (c) 2008, 2013, Monty Program Ab + Copyright (c) 2008, 2014, Monty Program Ab This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -117,6 +117,7 @@ When one supplies long data for a placeholder: #include "lock.h" // MYSQL_OPEN_FORCE_SHARED_MDL #include "sql_handler.h" #include "transaction.h" // trans_rollback_implicit +#include "wsrep_mysqld.h" /** A result class used to send cursor rows using the binary protocol. @@ -160,6 +161,20 @@ public: uint param_count; uint last_errno; uint flags; + /* + The value of thd->select_number at the end of the PREPARE phase. + + The issue is: each statement execution opens VIEWs, which may cause + select_lex objects to be created, and select_number values to be assigned. + + On the other hand, PREPARE assigns select_number values for triggers and + subqueries. + + In order for select_number values from EXECUTE not to conflict with + select_number values from PREPARE, we keep the number and set it at each + execution. + */ + uint select_number_after_prepare; char last_error[MYSQL_ERRMSG_SIZE]; #ifndef EMBEDDED_LIBRARY bool (*set_params)(Prepared_statement *st, uchar *data, uchar *data_end, @@ -858,14 +873,9 @@ static bool insert_params_with_log(Prepared_statement *stmt, uchar *null_array, THD *thd= stmt->thd; Item_param **begin= stmt->param_array; Item_param **end= begin + stmt->param_count; - uint32 length= 0; - String str; - const String *res; + Copy_query_with_rewrite acc(thd, stmt->query(), stmt->query_length(), query); DBUG_ENTER("insert_params_with_log"); - if (query->copy(stmt->query(), stmt->query_length(), default_charset_info)) - DBUG_RETURN(1); - for (Item_param **it= begin; it < end; ++it) { Item_param *param= *it; @@ -898,15 +908,16 @@ static bool insert_params_with_log(Prepared_statement *stmt, uchar *null_array, */ else if (! is_param_long_data_type(param)) DBUG_RETURN(1); - res= param->query_val_str(thd, &str); - if (param->convert_str_value(thd)) - DBUG_RETURN(1); /* out of memory */ - if (query->replace(param->pos_in_query+length, 1, *res)) + if (acc.append(param)) DBUG_RETURN(1); - length+= res->length()-1; + if (param->convert_str_value(thd)) + DBUG_RETURN(1); /* out of memory */ } + if (acc.finalize()) + DBUG_RETURN(1); + DBUG_RETURN(0); } @@ -1035,23 +1046,15 @@ static bool emb_insert_params(Prepared_statement *stmt, String *expanded_query) } -static bool emb_insert_params_with_log(Prepared_statement *stmt, - String *query) +static bool emb_insert_params_with_log(Prepared_statement *stmt, String *query) { THD *thd= stmt->thd; Item_param **it= stmt->param_array; Item_param **end= it + stmt->param_count; MYSQL_BIND *client_param= thd->client_params; - - String str; - const String *res; - uint32 length= 0; - + Copy_query_with_rewrite acc(thd, stmt->query(), stmt->query_length(), query); DBUG_ENTER("emb_insert_params_with_log"); - if (query->copy(stmt->query(), stmt->query_length(), default_charset_info)) - DBUG_RETURN(1); - for (; it < end; ++it, ++client_param) { Item_param *param= *it; @@ -1072,15 +1075,15 @@ static bool emb_insert_params_with_log(Prepared_statement *stmt, DBUG_RETURN(1); } } - res= param->query_val_str(thd, &str); - if (param->convert_str_value(thd)) - DBUG_RETURN(1); /* out of memory */ - - if (query->replace(param->pos_in_query+length, 1, *res)) + if (acc.append(param)) DBUG_RETURN(1); - length+= res->length()-1; + if (param->convert_str_value(thd)) + DBUG_RETURN(1); /* out of memory */ } + if (acc.finalize()) + DBUG_RETURN(1); + DBUG_RETURN(0); } @@ -1171,6 +1174,30 @@ static bool insert_params_from_vars(Prepared_statement *stmt, DBUG_RETURN(0); } +static bool update_vars_from_params(Prepared_statement *stmt, + List<LEX_STRING>& varnames) +{ + Item_param **begin= stmt->param_array; + Item_param **end= begin + stmt->param_count; + LEX_STRING *varname; + List_iterator<LEX_STRING> var_it(varnames); + DBUG_ENTER("update_vars_from_params"); + + for (Item_param **it= begin; it < end; ++it) + { + Item_param *param= *it; + varname= var_it++; + if (param->inout != Item_param::OUT_PARAM) + continue; + Item_func_set_user_var *suv= new Item_func_set_user_var(*varname, param); + if (suv->fix_fields(stmt->thd, 0)) + DBUG_RETURN(1); + suv->save_item_result(param); + if (suv->update()) + DBUG_RETURN(1); + } + DBUG_RETURN(0); +} /** Do the same as insert_params_from_vars but also construct query text for @@ -1193,16 +1220,11 @@ static bool insert_params_from_vars_with_log(Prepared_statement *stmt, user_var_entry *entry; LEX_STRING *varname; List_iterator<LEX_STRING> var_it(varnames); - String buf; - const String *val; - uint32 length= 0; THD *thd= stmt->thd; + Copy_query_with_rewrite acc(thd, stmt->query(), stmt->query_length(), query); DBUG_ENTER("insert_params_from_vars_with_log"); - if (query->copy(stmt->query(), stmt->query_length(), default_charset_info)) - DBUG_RETURN(1); - for (Item_param **it= begin; it < end; ++it) { Item_param *param= *it; @@ -1218,15 +1240,16 @@ static bool insert_params_from_vars_with_log(Prepared_statement *stmt, setup_one_conversion_function(thd, param, param->param_type); if (param->set_from_user_var(thd, entry)) DBUG_RETURN(1); - val= param->query_val_str(thd, &buf); - if (param->convert_str_value(thd)) - DBUG_RETURN(1); /* out of memory */ + if (acc.append(param)) + DBUG_RETURN(1); - if (query->replace(param->pos_in_query+length, 1, *val)) + if (param->convert_str_value(thd)) DBUG_RETURN(1); - length+= val->length()-1; } + if (acc.finalize()) + DBUG_RETURN(1); + DBUG_RETURN(0); } @@ -2213,6 +2236,7 @@ static bool check_prepared_statement(Prepared_statement *stmt) case SQLCOM_GRANT: case SQLCOM_REVOKE: case SQLCOM_KILL: + case SQLCOM_COMPOUND: case SQLCOM_SHUTDOWN: break; @@ -3204,9 +3228,17 @@ void Prepared_statement::setup_set_params() Decide if we have to expand the query (because we must write it to logs or because we want to look it up in the query cache) or not. */ - if ((mysql_bin_log.is_open() && is_update_query(lex->sql_command)) || - opt_log || opt_slow_log || - query_cache_is_cacheable_query(lex)) + bool replace_params_with_values= false; + // binlog + replace_params_with_values|= mysql_bin_log.is_open() && is_update_query(lex->sql_command); + // general or slow log + replace_params_with_values|= opt_log || thd->variables.sql_log_slow; + // query cache + replace_params_with_values|= query_cache_is_cacheable_query(lex); + // but never for compound statements + replace_params_with_values&= lex->sql_command != SQLCOM_COMPOUND; + + if (replace_params_with_values) { set_params_from_vars= insert_params_from_vars_with_log; #ifndef EMBEDDED_LIBRARY @@ -3248,6 +3280,7 @@ Prepared_statement::~Prepared_statement() free_items(); if (lex) { + delete lex->sphead; delete lex->result; delete (st_lex_local *) lex; } @@ -3426,12 +3459,15 @@ bool Prepared_statement::prepare(const char *packet, uint packet_len) if (error == 0) error= check_prepared_statement(this); - /* - Currently CREATE PROCEDURE/TRIGGER/EVENT are prohibited in prepared - statements: ensure we have no memory leak here if by someone tries - to PREPARE stmt FROM "CREATE PROCEDURE ..." - */ - DBUG_ASSERT(lex->sphead == NULL || error != 0); + if (error) + { + /* + let the following code know we're not in PS anymore, + the won't be any EXECUTE, so we need a full cleanup + */ + lex->context_analysis_only&= ~CONTEXT_ANALYSIS_ONLY_PREPARE; + } + /* The order is important */ lex->unit.cleanup(); @@ -3455,6 +3491,8 @@ bool Prepared_statement::prepare(const char *packet, uint packet_len) trans_rollback_implicit(thd); thd->mdl_context.release_transactional_locks(); } + + select_number_after_prepare= thd->select_number; lex_end(lex); cleanup_stmt(); @@ -3581,7 +3619,8 @@ Prepared_statement::execute_loop(String *expanded_query, Reprepare_observer reprepare_observer; bool error; int reprepare_attempt= 0; - + + thd->select_number= select_number_after_prepare; /* Check if we got an error when sending long data */ if (state == Query_arena::STMT_ERROR) { @@ -3626,6 +3665,29 @@ reexecute: error= execute(expanded_query, open_cursor) || thd->is_error(); thd->m_reprepare_observer= NULL; +#ifdef WITH_WSREP + + if (WSREP_ON) + { + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + switch (thd->wsrep_conflict_state) + { + case CERT_FAILURE: + WSREP_DEBUG("PS execute fail for CERT_FAILURE: thd: %ld err: %d", + thd->thread_id, thd->get_stmt_da()->sql_errno() ); + thd->wsrep_conflict_state = NO_CONFLICT; + break; + + case MUST_REPLAY: + (void)wsrep_replay_transaction(thd); + break; + + default: + break; + } + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + } +#endif /* WITH_WSREP */ if ((sql_command_flags[lex->sql_command] & CF_REEXECUTION_FRAGILE) && error && !thd->is_fatal_error && !thd->killed && @@ -3640,6 +3702,11 @@ reexecute: if (! error) /* Success */ goto reexecute; } + + /* Assign values for OUT-parameters (SELECT INTO) in the SQL PS */ + if (!packet) + update_vars_from_params(this, thd->lex->prepared_stmt_params); + reset_stmt_params(this); return error; diff --git a/sql/sql_priv.h b/sql/sql_priv.h index 5dc19181e9b..9578c2fe8df 100644 --- a/sql/sql_priv.h +++ b/sql/sql_priv.h @@ -199,7 +199,7 @@ template <class T> bool valid_buffer_range(T jump, #define OPTIMIZER_SWITCH_INDEX_MERGE_SORT_UNION (1ULL << 2) #define OPTIMIZER_SWITCH_INDEX_MERGE_INTERSECT (1ULL << 3) #define OPTIMIZER_SWITCH_INDEX_MERGE_SORT_INTERSECT (1ULL << 4) -#define OPTIMIZER_SWITCH_ENGINE_CONDITION_PUSHDOWN (1ULL << 5) +#define deprecated_ENGINE_CONDITION_PUSHDOWN (1ULL << 5) #define OPTIMIZER_SWITCH_INDEX_COND_PUSHDOWN (1ULL << 6) #define OPTIMIZER_SWITCH_DERIVED_MERGE (1ULL << 7) #define OPTIMIZER_SWITCH_DERIVED_WITH_KEYS (1ULL << 8) diff --git a/sql/sql_reload.cc b/sql/sql_reload.cc index bb3d5bb899a..24e5d053145 100644 --- a/sql/sql_reload.cc +++ b/sql/sql_reload.cc @@ -25,6 +25,7 @@ #include "hostname.h" // hostname_cache_refresh #include "sql_repl.h" // reset_master, reset_slave #include "rpl_mi.h" // Master_info::data_lock +#include "sql_show.h" #include "debug_sync.h" #include "rpl_mi.h" @@ -130,7 +131,7 @@ bool reload_acl_and_cache(THD *thd, unsigned long long options, result= 1; } - if ((options & REFRESH_SLOW_LOG) && opt_slow_log) + if ((options & REFRESH_SLOW_LOG) && global_system_variables.sql_log_slow) logger.flush_slow_log(); if ((options & REFRESH_GENERAL_LOG) && opt_log) @@ -253,7 +254,16 @@ bool reload_acl_and_cache(THD *thd, unsigned long long options, } if (options & REFRESH_CHECKPOINT) disable_checkpoints(thd); - } + /* + We need to do it second time after wsrep appliers were blocked in + make_global_read_lock_block_commit(thd) above since they could have + modified the tables too. + */ + if (WSREP(thd) && + close_cached_tables(thd, tables, (options & REFRESH_FAST) ? + FALSE : TRUE, TRUE)) + result= 1; + } else { if (thd && thd->locked_tables_mode) @@ -368,35 +378,17 @@ bool reload_acl_and_cache(THD *thd, unsigned long long options, #endif if (options & REFRESH_USER_RESOURCES) reset_mqh((LEX_USER *) NULL, 0); /* purecov: inspected */ - if (options & REFRESH_TABLE_STATS) - { - mysql_mutex_lock(&LOCK_global_table_stats); - free_global_table_stats(); - init_global_table_stats(); - mysql_mutex_unlock(&LOCK_global_table_stats); - } - if (options & REFRESH_INDEX_STATS) - { - mysql_mutex_lock(&LOCK_global_index_stats); - free_global_index_stats(); - init_global_index_stats(); - mysql_mutex_unlock(&LOCK_global_index_stats); - } - if (options & (REFRESH_USER_STATS | REFRESH_CLIENT_STATS)) - { - mysql_mutex_lock(&LOCK_global_user_client_stats); - if (options & REFRESH_USER_STATS) - { - free_global_user_stats(); - init_global_user_stats(); - } - if (options & REFRESH_CLIENT_STATS) - { - free_global_client_stats(); - init_global_client_stats(); - } - mysql_mutex_unlock(&LOCK_global_user_client_stats); - } + if (options & REFRESH_GENERIC) + { + List_iterator_fast<LEX_STRING> li(thd->lex->view_list); + LEX_STRING *ls; + while ((ls= li++)) + { + ST_SCHEMA_TABLE *table= find_schema_table(thd, ls->str); + if (table->reset_table()) + result= 1; + } + } if (*write_to_binlog != -1) *write_to_binlog= tmp_write_to_binlog; /* diff --git a/sql/sql_repl.cc b/sql/sql_repl.cc index c7bd28259ae..e91b3b0a2ed 100644 --- a/sql/sql_repl.cc +++ b/sql/sql_repl.cc @@ -1,5 +1,5 @@ /* Copyright (c) 2000, 2013, Oracle and/or its affiliates. - Copyright (c) 2008, 2013, Monty Program Ab + Copyright (c) 2008, 2014, Monty Program Ab This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -2946,6 +2946,7 @@ int start_slave(THD* thd , Master_info* mi, bool net_report) err: unlock_slave_threads(mi); + thd_proc_info(thd, 0); if (slave_errno) { @@ -3054,8 +3055,6 @@ int reset_slave(THD *thd, Master_info* mi) DBUG_RETURN(ER_SLAVE_MUST_STOP); } - ha_reset_slave(thd); - // delete relay logs, clear relay log coordinates if ((error= purge_relay_logs(&mi->rli, thd, 1 /* just reset */, @@ -3618,13 +3617,6 @@ bool mysql_show_binlog_events(THD* thd) /* select wich binary log to use: binlog or relay */ if ( thd->lex->sql_command == SQLCOM_SHOW_BINLOG_EVENTS ) { - /* - Wait for handlers to insert any pending information - into the binlog. For e.g. ndb which updates the binlog asynchronously - this is needed so that the uses sees all its own commands in the binlog - */ - ha_binlog_wait(thd); - binary_log= &mysql_bin_log; } else /* showing relay log contents */ diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 8ef3f64016e..7c1a9810db0 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -69,7 +69,7 @@ struct st_sargable_param; static void optimize_keyuse(JOIN *join, DYNAMIC_ARRAY *keyuse_array); static bool make_join_statistics(JOIN *join, List<TABLE_LIST> &leaves, - COND *conds, DYNAMIC_ARRAY *keyuse); + DYNAMIC_ARRAY *keyuse); static bool update_ref_and_keys(THD *thd, DYNAMIC_ARRAY *keyuse, JOIN_TAB *join_tab, uint tables, COND *conds, @@ -1338,7 +1338,7 @@ TODO: make view to decide if it is possible to write to WHERE directly or make S /* Calculate how to do the join */ THD_STAGE_INFO(thd, stage_statistics); - if (make_join_statistics(this, select_lex->leaf_tables, conds, &keyuse) || + if (make_join_statistics(this, select_lex->leaf_tables, &keyuse) || thd->is_fatal_error) { DBUG_PRINT("error",("Error: make_join_statistics() failed")); @@ -3355,7 +3355,8 @@ static ha_rows get_quick_record_count(THD *thd, SQL_SELECT *select, select->head=table; table->reginfo.impossible_range=0; if ((error= select->test_quick_select(thd, *(key_map *)keys,(table_map) 0, - limit, 0, FALSE)) == 1) + limit, 0, FALSE, + TRUE /* remove_where_parts*/)) == 1) DBUG_RETURN(select->quick->records); if (error == -1) { @@ -3393,7 +3394,7 @@ typedef struct st_sargable_param static bool make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list, - COND *conds, DYNAMIC_ARRAY *keyuse_array) + DYNAMIC_ARRAY *keyuse_array) { int error= 0; TABLE *table; @@ -3597,10 +3598,10 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list, } } - if (conds || outer_join) + if (join->conds || outer_join) { if (update_ref_and_keys(join->thd, keyuse_array, stat, join->table_count, - conds, ~outer_join, join->select_lex, &sargables)) + join->conds, ~outer_join, join->select_lex, &sargables)) goto error; /* Keyparts without prefixes may be useful if this JOIN is a subquery, and @@ -3844,8 +3845,9 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list, } join->impossible_where= false; - if (conds && const_count) - { + if (join->conds && const_count) + { + Item* &conds= join->conds; conds->update_used_tables(); conds= remove_eq_conds(join->thd, conds, &join->cond_value); if (conds && conds->type() == Item::COND_ITEM && @@ -3857,7 +3859,7 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list, join->impossible_where= true; conds=new Item_int((longlong) 0,1); } - join->conds= conds; + join->cond_equal= NULL; if (conds) { @@ -3942,12 +3944,18 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list, { select= make_select(s->table, found_const_table_map, found_const_table_map, - *s->on_expr_ref ? *s->on_expr_ref : conds, + *s->on_expr_ref ? *s->on_expr_ref : join->conds, 1, &error); if (!select) goto error; records= get_quick_record_count(join->thd, select, s->table, &s->const_keys, join->row_limit); + /* Range analyzer could modify the condition. */ + if (*s->on_expr_ref) + *s->on_expr_ref= select->cond; + else + join->conds= select->cond; + s->quick=select->quick; s->needed_reg=select->needed_reg; select->quick=0; @@ -3958,7 +3966,7 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list, if (join->thd->variables.optimizer_use_condition_selectivity > 1) calculate_cond_selectivity_for_table(join->thd, s->table, *s->on_expr_ref ? - *s->on_expr_ref : conds); + s->on_expr_ref : &join->conds); if (s->table->reginfo.impossible_range) { impossible_range= TRUE; @@ -9561,10 +9569,8 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) if (tab->table) { tab->table->file->pushed_cond= NULL; - if (((thd->variables.optimizer_switch & - OPTIMIZER_SWITCH_ENGINE_CONDITION_PUSHDOWN) || - (tab->table->file->ha_table_flags() & - HA_MUST_USE_TABLE_CONDITION_PUSHDOWN)) && + if ((tab->table->file->ha_table_flags() & + HA_CAN_TABLE_CONDITION_PUSHDOWN) && !first_inner_tab) { COND *push_cond= @@ -9658,7 +9664,7 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) OPTION_FOUND_ROWS ? HA_POS_ERROR : join->unit->select_limit_cnt), 0, - FALSE) < 0) + FALSE, FALSE) < 0) { /* Before reporting "Impossible WHERE" for the whole query @@ -9672,7 +9678,7 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) OPTION_FOUND_ROWS ? HA_POS_ERROR : join->unit->select_limit_cnt),0, - FALSE) < 0) + FALSE, FALSE) < 0) DBUG_RETURN(1); // Impossible WHERE } else @@ -12703,8 +12709,8 @@ static COND *build_equal_items_for_cond(THD *thd, COND *cond, } if (and_level) { - args->concat(&eq_list); - args->concat((List<Item> *)&cond_equal.current_level); + args->append(&eq_list); + args->append((List<Item> *)&cond_equal.current_level); } } else if (cond->type() == Item::FUNC_ITEM || @@ -12759,7 +12765,7 @@ static COND *build_equal_items_for_cond(THD *thd, COND *cond, } and_cond->cond_equal.copy(cond_equal); cond_equal.current_level= and_cond->cond_equal.current_level; - args->concat((List<Item> *)&cond_equal.current_level); + args->append((List<Item> *)&cond_equal.current_level); return and_cond; } @@ -13413,7 +13419,7 @@ static COND* substitute_for_best_equal_field(JOIN_TAB *context_tab, This is a fatal error now. However we bail out by returning the original condition that we had before we started the transformation. */ - cond_list->concat((List<Item> *) &cond_equal->current_level); + cond_list->append((List<Item> *) &cond_equal->current_level); } } } @@ -14684,7 +14690,7 @@ internal_remove_eq_conds(THD *thd, COND *cond, Item::cond_result *cond_value) if (eq_item->const_item() && eq_item->val_int()) it.remove(); } - cond_arg_list->concat((List<Item> *) cond_equalities); + cond_arg_list->append((List<Item> *) cond_equalities); } List<Item_equal> new_equalities; @@ -14741,7 +14747,7 @@ internal_remove_eq_conds(THD *thd, COND *cond, Item::cond_result *cond_value) of cond_arg_list all together. */ new_item_arg_list->disjoin((List<Item> *) new_item_equalities); - new_equalities.concat(new_item_equalities); + new_equalities.append(new_item_equalities); } } if (new_item_arg_list->is_empty()) @@ -14836,7 +14842,7 @@ internal_remove_eq_conds(THD *thd, COND *cond, Item::cond_result *cond_value) } } } - cond_arg_list->concat((List<Item> *) cond_equalities); + cond_arg_list->append((List<Item> *) cond_equalities); /* Propagate the newly formed multiple equalities to the all AND/OR levels of cond @@ -15036,6 +15042,11 @@ remove_eq_conds(THD *thd, COND *cond, Item::cond_result *cond_value) @retval true can be used @retval false cannot be used */ + +/* + psergey-todo: this returns false for int_column='1234' (here '1234' is a + constant. Need to discuss this with Bar). +*/ static bool test_if_equality_guarantees_uniqueness(Item *l, Item *r) { @@ -18496,7 +18507,7 @@ test_if_quick_select(JOIN_TAB *tab) tab->select->quick=0; return tab->select->test_quick_select(tab->join->thd, tab->keys, (table_map) 0, HA_POS_ERROR, 0, - FALSE); + FALSE, /*remove where parts*/FALSE); } @@ -19668,12 +19679,21 @@ static int test_if_order_by_key(ORDER *order, TABLE *table, uint idx, { KEY_PART_INFO *key_part,*key_part_end; key_part=table->key_info[idx].key_part; - key_part_end=key_part+table->key_info[idx].user_defined_key_parts; + key_part_end=key_part + table->key_info[idx].ext_key_parts; key_part_map const_key_parts=table->const_key_parts[idx]; + uint user_defined_kp= table->key_info[idx].user_defined_key_parts; int reverse=0; uint key_parts; - my_bool on_pk_suffix= FALSE; + bool have_pk_suffix= false; + uint pk= table->s->primary_key; DBUG_ENTER("test_if_order_by_key"); + + if ((table->file->ha_table_flags() & HA_PRIMARY_KEY_IN_READ_INDEX) && + table->key_info[idx].ext_key_part_map && + pk != MAX_KEY && pk != idx) + { + have_pk_suffix= true; + } for (; order ; order=order->next, const_key_parts>>=1) { @@ -19686,58 +19706,37 @@ static int test_if_order_by_key(ORDER *order, TABLE *table, uint idx, */ for (; const_key_parts & 1 ; const_key_parts>>= 1) key_part++; + + /* + This check was in this function historically (although I think it's + better to check it outside of this function): - if (key_part >= key_part_end) - { - /* - We are at the end of the key. Check if the engine has the primary - key as a suffix to the secondary keys. If it has continue to check - the primary key as a suffix. - */ - if (!on_pk_suffix && (table->key_info[idx].ext_key_part_map & 1) && - (table->file->ha_table_flags() & HA_PRIMARY_KEY_IN_READ_INDEX) && - table->s->primary_key != MAX_KEY && - table->s->primary_key != idx) - { - KEY_PART_INFO *start,*end; - uint pk_part_idx= 0; - on_pk_suffix= TRUE; - start= key_part= table->key_info[table->s->primary_key].key_part; - const_key_parts=table->const_key_parts[table->s->primary_key]; - - /* - Calculate true key_part_end and const_key_parts - (we have to stop as first not continous primary key part) - */ - for (key_part_end= key_part, - end= key_part+table->key_info[table->s->primary_key].user_defined_key_parts; - key_part_end < end; key_part_end++, pk_part_idx++) - { - /* Found hole in the pk_parts; Abort */ - if (!(table->key_info[idx].ext_key_part_map & - (((key_part_map) 1) << pk_part_idx))) - break; - } + "Test if the primary key parts were all const (i.e. there's one row). + The sorting doesn't matter" - /* Adjust const_key_parts */ - const_key_parts&= (((key_part_map) 1) << pk_part_idx) -1; + So, we're checking that + (1) this is an extended key + (2) we've reached its end + */ + key_parts= (key_part - table->key_info[idx].key_part); + if (have_pk_suffix && + reverse == 0 && // all were =const so far + key_parts == table->key_info[idx].ext_key_parts && + table->const_key_parts[pk] == PREV_BITS(uint, + table->key_info[pk]. + user_defined_key_parts)) + { + key_parts= 0; + reverse= 1; // Key is ok to use + goto ok; + } - for (; const_key_parts & 1 ; const_key_parts>>= 1) - key_part++; - /* - Test if the primary key parts were all const (i.e. there's one row). - The sorting doesn't matter. - */ - if (key_part == start+table->key_info[table->s->primary_key].user_defined_key_parts && - reverse == 0) - { - key_parts= 0; - reverse= 1; // Key is ok to use - goto ok; - } - } - else - DBUG_RETURN(0); + if (key_part == key_part_end) + { + /* + There are some items left in ORDER BY that we don't + */ + DBUG_RETURN(0); } if (key_part->field != field || !field->part_of_sortkey.is_set(idx)) @@ -19752,27 +19751,20 @@ static int test_if_order_by_key(ORDER *order, TABLE *table, uint idx, if (key_part < key_part_end) key_part++; } - if (on_pk_suffix) - { - uint used_key_parts_secondary= table->key_info[idx].user_defined_key_parts; - uint used_key_parts_pk= - (uint) (key_part - table->key_info[table->s->primary_key].key_part); - key_parts= used_key_parts_pk + used_key_parts_secondary; - if (reverse == -1 && - (!(table->file->index_flags(idx, used_key_parts_secondary - 1, 1) & - HA_READ_PREV) || - !(table->file->index_flags(table->s->primary_key, - used_key_parts_pk - 1, 1) & HA_READ_PREV))) - reverse= 0; // Index can't be used - } - else + key_parts= (uint) (key_part - table->key_info[idx].key_part); + + if (reverse == -1 && + !(table->file->index_flags(idx, user_defined_kp, 1) & HA_READ_PREV)) + reverse= 0; // Index can't be used + + if (have_pk_suffix && reverse == -1) { - key_parts= (uint) (key_part - table->key_info[idx].key_part); - if (reverse == -1 && - !(table->file->index_flags(idx, key_parts-1, 1) & HA_READ_PREV)) + uint pk_parts= table->key_info[pk].user_defined_key_parts; + if (!table->file->index_flags(pk, pk_parts, 1) & HA_READ_PREV) reverse= 0; // Index can't be used } + ok: if (used_key_parts != NULL) *used_key_parts= key_parts; @@ -19860,7 +19852,12 @@ test_if_subkey(ORDER *order, TABLE *table, uint ref, uint ref_key_parts, uint best= MAX_KEY; KEY_PART_INFO *ref_key_part= table->key_info[ref].key_part; KEY_PART_INFO *ref_key_part_end= ref_key_part + ref_key_parts; - + + /* + Find the shortest key that + - produces the required ordering + - has key #ref (up to ref_key_parts) as its subkey. + */ for (nr= 0 ; nr < table->s->keys ; nr++) { if (usable_keys->is_set(nr) && @@ -20053,7 +20050,8 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit, been taken into account. */ usable_keys= *map; - + + /* Find indexes that cover all ORDER/GROUP BY fields */ for (ORDER *tmp_order=order; tmp_order ; tmp_order=tmp_order->next) { Item *item= (*tmp_order->item)->real_item(); @@ -20073,6 +20071,10 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit, { ref_key= tab->ref.key; ref_key_parts= tab->ref.key_parts; + /* + todo: why does JT_REF_OR_NULL mean filesort? We could find another index + that satisfies the ordering. I would just set ref_key=MAX_KEY here... + */ if (tab->type == JT_REF_OR_NULL || tab->type == JT_FT) goto use_filesort; } @@ -20099,15 +20101,12 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit, if (ref_key >= 0 && ref_key != MAX_KEY) { - /* - We come here when there is a REF key. - */ + /* Current access method uses index ref_key with ref_key_parts parts */ if (!usable_keys.is_set(ref_key)) { - /* - We come here when ref_key is not among usable_keys - */ + /* However, ref_key doesn't match the needed ordering */ uint new_ref_key; + /* If using index only read, only consider other possible index only keys @@ -20123,27 +20122,23 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit, if ((new_ref_key= test_if_subkey(order, table, ref_key, ref_key_parts, &usable_keys)) < MAX_KEY) { - if (tab->ref.key >= 0) - { - /* - We'll use ref access method on key new_ref_key. In general case - the index search tuple for new_ref_key will be different (e.g. - when one index is defined as (part1, part2, ...) and another as - (part1, part2(N), ...) and the WHERE clause contains - "part1 = const1 AND part2=const2". - So we build tab->ref from scratch here. - */ - KEYUSE *keyuse= tab->keyuse; - while (keyuse->key != new_ref_key && keyuse->table == tab->table) - keyuse++; - if (create_ref_for_key(tab->join, tab, keyuse, FALSE, - (tab->join->const_table_map | - OUTER_REF_TABLE_BIT))) - goto use_filesort; + /* + Index new_ref_key + - produces the required ordering, + - also has the same columns as ref_key for #ref_key_parts (this + means we will read the same number of rows as with ref_key). + */ - pick_table_access_method(tab); - } - else + /* + If new_ref_key allows to construct a quick select which uses more key + parts than ref(new_ref_key) would, do that. + + Otherwise, construct a ref access (todo: it's not clear what is the + win in using ref access when we could use quick select also?) + */ + if ((table->quick_keys.is_set(new_ref_key) && + table->quick_key_parts[new_ref_key] > ref_key_parts) || + !(tab->ref.key >= 0)) { /* The range optimizer constructed QUICK_RANGE for ref_key, and @@ -20168,19 +20163,47 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit, (tab->join->select_options & OPTION_FOUND_ROWS) ? HA_POS_ERROR : - tab->join->unit->select_limit_cnt,0, - TRUE) <= 0; + tab->join->unit->select_limit_cnt,TRUE, + TRUE, FALSE) <= 0; if (res) { select->cond= save_cond; goto use_filesort; } + DBUG_ASSERT(tab->select->quick); + tab->type= JT_ALL; + tab->ref.key= -1; + tab->ref.key_parts= 0; + tab->use_quick= 1; + best_key= new_ref_key; /* We don't restore select->cond as we want to use the original condition as index condition pushdown is not active for the new index. + todo: why not perform index condition pushdown for the new index? */ } + else + { + /* + We'll use ref access method on key new_ref_key. In general case + the index search tuple for new_ref_key will be different (e.g. + when one index is defined as (part1, part2, ...) and another as + (part1, part2(N), ...) and the WHERE clause contains + "part1 = const1 AND part2=const2". + So we build tab->ref from scratch here. + */ + KEYUSE *keyuse= tab->keyuse; + while (keyuse->key != new_ref_key && keyuse->table == tab->table) + keyuse++; + if (create_ref_for_key(tab->join, tab, keyuse, FALSE, + (tab->join->const_table_map | + OUTER_REF_TABLE_BIT))) + goto use_filesort; + + pick_table_access_method(tab); + } + ref_key= new_ref_key; changed_key= true; } @@ -20216,7 +20239,7 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit, !(table->file->index_flags(best_key, 0, 1) & HA_CLUSTERED_INDEX))) goto use_filesort; - if (select && + if (select && // psergey: why doesn't this use a quick? table->quick_keys.is_set(best_key) && best_key != ref_key) { key_map map; @@ -20227,7 +20250,7 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit, join->select_options & OPTION_FOUND_ROWS ? HA_POS_ERROR : join->unit->select_limit_cnt, - TRUE, FALSE); + TRUE, FALSE, FALSE); } order_direction= best_key_direction; /* @@ -20286,6 +20309,12 @@ check_reverse_order: */ if (!table->covering_keys.is_set(best_key)) table->disable_keyread(); + else + { + if (!table->key_read) + table->enable_keyread(); + } + if (!quick_created) { if (select) // Throw any existing quick select @@ -21978,7 +22007,7 @@ setup_copy_fields(THD *thd, TMP_TABLE_PARAM *param, Put elements from HAVING, ORDER BY and GROUP BY last to ensure that any reference used in these will resolve to a item that is already calculated */ - param->copy_funcs.concat(&extra_funcs); + param->copy_funcs.append(&extra_funcs); DBUG_RETURN(0); @@ -22455,8 +22484,7 @@ static bool add_ref_to_table_cond(THD *thd, JOIN_TAB *join_tab) } if (join_tab->select) { - Item *cond_copy; - UNINIT_VAR(cond_copy); // used when pre_idx_push_select_cond!=NULL + Item *UNINIT_VAR(cond_copy); if (join_tab->select->pre_idx_push_select_cond) cond_copy= cond->copy_andor_structure(thd); if (join_tab->select->cond) @@ -23474,16 +23502,19 @@ void JOIN_TAB::save_explain_data(Explain_table_access *eta, table_map prefix_tab const char *tmp_buff; int f_idx; StringBuffer<64> key_name_buf; - if (is_table_read_plan->has_db_lookup_value()) + if (is_table_read_plan->trivial_show_command || + is_table_read_plan->has_db_lookup_value()) { /* The "key" has the name of the column referring to the database */ f_idx= table_list->schema_table->idx_field1; tmp_buff= table_list->schema_table->fields_info[f_idx].field_name; key_name_buf.append(tmp_buff, strlen(tmp_buff), cs); } - if (is_table_read_plan->has_table_lookup_value()) + if (is_table_read_plan->trivial_show_command || + is_table_read_plan->has_table_lookup_value()) { - if (is_table_read_plan->has_db_lookup_value()) + if (is_table_read_plan->trivial_show_command || + is_table_read_plan->has_db_lookup_value()) key_name_buf.append(','); f_idx= table_list->schema_table->idx_field2; @@ -23584,22 +23615,11 @@ void JOIN_TAB::save_explain_data(Explain_table_access *eta, table_map prefix_tab { const COND *pushed_cond= tab->table->file->pushed_cond; - if (((thd->variables.optimizer_switch & - OPTIMIZER_SWITCH_ENGINE_CONDITION_PUSHDOWN) || - (tab->table->file->ha_table_flags() & - HA_MUST_USE_TABLE_CONDITION_PUSHDOWN)) && + if ((tab->table->file->ha_table_flags() & + HA_CAN_TABLE_CONDITION_PUSHDOWN) && pushed_cond) { eta->push_extra(ET_USING_WHERE_WITH_PUSHED_CONDITION); - /* - psergey-todo: what to do? This was useful with NDB only. - - if (explain_flags & DESCRIBE_EXTENDED) - { - extra.append(STRING_WITH_LEN(": ")); - ((COND *)pushed_cond)->print(&extra, QT_ORDINARY); - } - */ } else { @@ -23620,8 +23640,9 @@ void JOIN_TAB::save_explain_data(Explain_table_access *eta, table_map prefix_tab else eta->push_extra(ET_OPEN_FULL_TABLE); /* psergey-note: the following has a bug.*/ - if (table_list->is_table_read_plan->has_db_lookup_value() && - table_list->is_table_read_plan->has_table_lookup_value()) + if (table_list->is_table_read_plan->trivial_show_command || + (table_list->is_table_read_plan->has_db_lookup_value() && + table_list->is_table_read_plan->has_table_lookup_value())) eta->push_extra(ET_SCANNED_0_DATABASES); else if (table_list->is_table_read_plan->has_db_lookup_value() || table_list->is_table_read_plan->has_table_lookup_value()) @@ -24675,6 +24696,114 @@ void JOIN::cache_const_exprs() } } + +/* + Get a cost of reading rows_limit rows through index keynr. + + @detail + - If there is a quick select, we try to use it. + - if there is a ref(const) access, we try to use it, too. + - quick and ref(const) use different cost formulas, so if both are possible + we should make a cost-based choice. + + @param tab JOIN_TAB with table access (is NULL for single-table + UPDATE/DELETE) + @param read_time OUT Cost of reading using quick or ref(const) access. + + + @return + true There was a possible quick or ref access, its cost is in the OUT + parameters. + false No quick or ref(const) possible (and so, the caller will attempt + to use a full index scan on this index). +*/ + +static bool get_range_limit_read_cost(const JOIN_TAB *tab, + const TABLE *table, + uint keynr, + ha_rows rows_limit, + double *read_time) +{ + bool res= false; + /* + We need to adjust the estimates if we had a quick select (or ref(const)) on + index keynr. + */ + if (table->quick_keys.is_set(keynr)) + { + /* + Start from quick select's rows and cost. These are always cheaper than + full index scan/cost. + */ + double best_rows= table->quick_rows[keynr]; + double best_cost= table->quick_costs[keynr]; + + /* + Check if ref(const) access was possible on this index. + */ + if (tab) + { + key_part_map const_parts= 0; + key_part_map map= 1; + uint kp; + /* Find how many key parts would be used by ref(const) */ + for (kp=0; kp < MAX_REF_PARTS; map=map << 1, kp++) + { + if (!(table->const_key_parts[keynr] & map)) + break; + const_parts |= map; + } + + if (kp > 0) + { + ha_rows ref_rows; + /* + Two possible cases: + 1. ref(const) uses the same #key parts as range access. + 2. ref(const) uses fewer key parts, becasue there is a + range_cond(key_part+1). + */ + if (kp == table->quick_key_parts[keynr]) + ref_rows= table->quick_rows[keynr]; + else + ref_rows= table->key_info[keynr].actual_rec_per_key(kp-1); + + if (ref_rows > 0) + { + double tmp= ref_rows; + /* Reuse the cost formula from best_access_path: */ + set_if_smaller(tmp, (double) tab->join->thd->variables.max_seeks_for_key); + if (table->covering_keys.is_set(keynr)) + tmp= table->file->keyread_time(keynr, 1, (ha_rows) tmp); + else + tmp= table->file->read_time(keynr, 1, + (ha_rows) MY_MIN(tmp,tab->worst_seeks)); + if (tmp < best_cost) + { + best_cost= tmp; + best_rows= ref_rows; + } + } + } + } + + if (best_rows > rows_limit) + { + /* + LIMIT clause specifies that we will need to read fewer records than + quick select will return. Assume that quick select's cost is + proportional to the number of records we need to return (e.g. if we + only need 1/3rd of records, it will cost us 1/3rd of quick select's + read time) + */ + best_cost *= rows_limit / best_rows; + } + *read_time= best_cost; + res= true; + } + return res; +} + /** Find a cheaper access key than a given @a key @@ -24768,6 +24897,11 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table, } else read_time= table->file->scan_time(); + + /* + TODO: add cost of sorting here. + */ + read_time += COST_EPS; /* Calculate the selectivity of the ref_key for REF_ACCESS. For @@ -24927,6 +25061,14 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table, */ index_scan_time= select_limit/rec_per_key * MY_MIN(rec_per_key, table->file->scan_time()); + double range_scan_time; + if (get_range_limit_read_cost(tab, table, nr, select_limit, + &range_scan_time)) + { + if (range_scan_time < index_scan_time) + index_scan_time= range_scan_time; + } + if ((ref_key < 0 && (group || table->force_index || is_covering)) || index_scan_time < read_time) { @@ -25081,15 +25223,18 @@ uint get_index_for_order(ORDER *order, TABLE *table, SQL_SELECT *select, return MAX_KEY; } + /* - Count how much times conditions are true for several first rows of the table + Count how many times the specified conditions are true for first rows_to_read + rows of the table. - @param thd thread handle - @param rows_to_read how much rows to check - @param table table which should be checked - @conds conds list of conditions and countars for them + @param thd Thread handle + @param rows_to_read How many rows to sample + @param table Table to use + @conds conds INOUT List of conditions and counters for them - @return number of really checked rows or 0 in case of error or empty table + @return Number of we've checked. It can be equal or less than rows_to_read. + 0 is returned for error or when the table had no rows. */ ulong check_selectivity(THD *thd, diff --git a/sql/sql_show.cc b/sql/sql_show.cc index 79c444ea442..fcca91c456b 100644 --- a/sql/sql_show.cc +++ b/sql/sql_show.cc @@ -116,10 +116,7 @@ static void get_cs_converted_string_value(THD *thd, bool use_hex); #endif -static void -append_algorithm(TABLE_LIST *table, String *buff); - -static COND * make_cond_for_info_schema(COND *cond, TABLE_LIST *table); +static void append_algorithm(TABLE_LIST *table, String *buff); bool get_lookup_field_values(THD *, COND *, TABLE_LIST *, LOOKUP_FIELD_VALUES *); @@ -2900,10 +2897,9 @@ void remove_status_vars(SHOW_VAR *list) } - static bool show_status_array(THD *thd, const char *wild, SHOW_VAR *variables, - enum enum_var_type value_type, + enum enum_var_type scope, struct system_status_var *status_var, const char *prefix, TABLE *table, bool ucase_names, @@ -2912,10 +2908,8 @@ static bool show_status_array(THD *thd, const char *wild, my_aligned_storage<SHOW_VAR_FUNC_BUFF_SIZE, MY_ALIGNOF(long)> buffer; char * const buff= buffer.data; char *prefix_end; - /* the variable name should not be longer than 64 characters */ - char name_buffer[64]; + char name_buffer[NAME_CHAR_LEN]; int len; - LEX_STRING null_lex_str; SHOW_VAR tmp, *var; enum_check_fields save_count_cuted_fields= thd->count_cuted_fields; bool res= FALSE; @@ -2923,19 +2917,45 @@ static bool show_status_array(THD *thd, const char *wild, DBUG_ENTER("show_status_array"); thd->count_cuted_fields= CHECK_FIELD_WARN; - null_lex_str.str= 0; // For sys_var->value_ptr() - null_lex_str.length= 0; prefix_end=strnmov(name_buffer, prefix, sizeof(name_buffer)-1); if (*prefix) *prefix_end++= '_'; len=name_buffer + sizeof(name_buffer) - prefix_end; +#ifdef WITH_WSREP + bool is_wsrep_var= FALSE; + /* + This is a workaround for lp:1306875 (PBX) to skip switching of wsrep + status variable name's first letter to uppercase. This is an optimization + for status variables defined under wsrep plugin. + TODO: remove once lp:1306875 has been addressed. + */ + if (*prefix && !my_strcasecmp(system_charset_info, prefix, "wsrep")) + { + is_wsrep_var= TRUE; + } +#endif /* WITH_WSREP */ + for (; variables->name; variables++) { bool wild_checked; strnmov(prefix_end, variables->name, len); name_buffer[sizeof(name_buffer)-1]=0; /* Safety */ + +#ifdef WITH_WSREP + /* + If the prefix is NULL, that means we are looking into the status variables + defined directly under mysqld.cc. Do not capitalize wsrep status variable + names until lp:1306875 has been fixed. + TODO: remove once lp:1306875 has been addressed. + */ + if (!(*prefix) && !strncasecmp(name_buffer, "wsrep", strlen("wsrep"))) + { + is_wsrep_var= TRUE; + } +#endif /* WITH_WSREP */ + if (ucase_names) my_caseup_str(system_charset_info, name_buffer); else @@ -2944,8 +2964,9 @@ static bool show_status_array(THD *thd, const char *wild, DBUG_ASSERT(name_buffer[0] >= 'a'); DBUG_ASSERT(name_buffer[0] <= 'z'); - /* traditionally status variables have a first letter uppercased */ - if (status_var) + // WSREP_TODO: remove once lp:1306875 has been addressed. + if (IF_WSREP(is_wsrep_var == FALSE, 1) && + status_var) name_buffer[0]-= 'a' - 'A'; } @@ -2977,7 +2998,7 @@ static bool show_status_array(THD *thd, const char *wild, SHOW_TYPE show_type=var->type; if (show_type == SHOW_ARRAY) { - show_status_array(thd, wild, (SHOW_VAR *) var->value, value_type, + show_status_array(thd, wild, (SHOW_VAR *) var->value, scope, status_var, name_buffer, table, ucase_names, cond); } else @@ -2996,7 +3017,7 @@ static bool show_status_array(THD *thd, const char *wild, { sys_var *var= ((sys_var *) value); show_type= var->show_type(); - value= (char*) var->value_ptr(thd, value_type, &null_lex_str); + value= (char*) var->value_ptr(thd, scope, &null_lex_str); charset= var->charset(thd); } @@ -3066,13 +3087,6 @@ static bool show_status_array(THD *thd, const char *wild, if (!(pos= *(char**) value)) pos= ""; - DBUG_EXECUTE_IF("alter_server_version_str", - if (!my_strcasecmp(system_charset_info, - variables->name, - "version")) { - pos= "some-other-version"; - }); - end= strend(pos); break; } @@ -3093,7 +3107,6 @@ static bool show_status_array(THD *thd, const char *wild, break; } table->field[1]->store(pos, (uint32) (end - pos), charset); - thd->count_cuted_fields= CHECK_FIELD_IGNORE; table->field[1]->set_notnull(); mysql_mutex_unlock(&LOCK_global_system_variables); @@ -3103,6 +3116,7 @@ static bool show_status_array(THD *thd, const char *wild, res= TRUE; goto end; } + thd->get_stmt_da()->inc_current_row_for_warning(); } } } @@ -3111,323 +3125,6 @@ end: DBUG_RETURN(res); } -#ifdef COMPLETE_PATCH_NOT_ADDED_YET -/* - Aggregate values for mapped_user entries by their role. - - SYNOPSIS - aggregate_user_stats - all_user_stats - input to aggregate - agg_user_stats - returns aggregated values - - RETURN - 0 - OK - 1 - error -*/ - -static int aggregate_user_stats(HASH *all_user_stats, HASH *agg_user_stats) -{ - DBUG_ENTER("aggregate_user_stats"); - if (my_hash_init(agg_user_stats, system_charset_info, - MY_MAX(all_user_stats->records, 1), - 0, 0, (my_hash_get_key)get_key_user_stats, - (my_hash_free_key)free_user_stats, 0)) - { - sql_print_error("Malloc in aggregate_user_stats failed"); - DBUG_RETURN(1); - } - - for (uint i= 0; i < all_user_stats->records; i++) - { - USER_STATS *user= (USER_STATS*)my_hash_element(all_user_stats, i); - USER_STATS *agg_user; - uint name_length= strlen(user->priv_user); - - if (!(agg_user= (USER_STATS*) my_hash_search(agg_user_stats, - (uchar*)user->priv_user, - name_length))) - { - // First entry for this role. - if (!(agg_user= (USER_STATS*) my_malloc(sizeof(USER_STATS), - MYF(MY_WME | MY_ZEROFILL| - MY_THREAD_SPECIFIC)))) - { - sql_print_error("Malloc in aggregate_user_stats failed"); - DBUG_RETURN(1); - } - - init_user_stats(agg_user, user->priv_user, name_length, - user->priv_user, - user->total_connections, user->concurrent_connections, - user->connected_time, user->busy_time, user->cpu_time, - user->bytes_received, user->bytes_sent, - user->binlog_bytes_written, - user->rows_sent, user->rows_read, - user->rows_inserted, user->rows_deleted, - user->rows_updated, - user->select_commands, user->update_commands, - user->other_commands, - user->commit_trans, user->rollback_trans, - user->denied_connections, user->lost_connections, - user->access_denied_errors, user->empty_queries); - - if (my_hash_insert(agg_user_stats, (uchar*) agg_user)) - { - /* Out of memory */ - my_free(agg_user, 0); - sql_print_error("Malloc in aggregate_user_stats failed"); - DBUG_RETURN(1); - } - } - else - { - /* Aggregate with existing values for this role. */ - add_user_stats(agg_user, - user->total_connections, user->concurrent_connections, - user->connected_time, user->busy_time, user->cpu_time, - user->bytes_received, user->bytes_sent, - user->binlog_bytes_written, - user->rows_sent, user->rows_read, - user->rows_inserted, user->rows_deleted, - user->rows_updated, - user->select_commands, user->update_commands, - user->other_commands, - user->commit_trans, user->rollback_trans, - user->denied_connections, user->lost_connections, - user->access_denied_errors, user->empty_queries); - } - } - DBUG_PRINT("exit", ("aggregated %lu input into %lu output entries", - all_user_stats->records, agg_user_stats->records)); - DBUG_RETURN(0); -} -#endif - -/* - Write result to network for SHOW USER_STATISTICS - - SYNOPSIS - send_user_stats - all_user_stats - values to return - table - I_S table - - RETURN - 0 - OK - 1 - error -*/ - -int send_user_stats(THD* thd, HASH *all_user_stats, TABLE *table) -{ - DBUG_ENTER("send_user_stats"); - - for (uint i= 0; i < all_user_stats->records; i++) - { - uint j= 0; - USER_STATS *user_stats= (USER_STATS*) my_hash_element(all_user_stats, i); - - table->field[j++]->store(user_stats->user, user_stats->user_name_length, - system_charset_info); - table->field[j++]->store((longlong)user_stats->total_connections,TRUE); - table->field[j++]->store((longlong)user_stats->concurrent_connections, TRUE); - table->field[j++]->store((longlong)user_stats->connected_time, TRUE); - table->field[j++]->store((double)user_stats->busy_time); - table->field[j++]->store((double)user_stats->cpu_time); - table->field[j++]->store((longlong)user_stats->bytes_received, TRUE); - table->field[j++]->store((longlong)user_stats->bytes_sent, TRUE); - table->field[j++]->store((longlong)user_stats->binlog_bytes_written, TRUE); - table->field[j++]->store((longlong)user_stats->rows_read, TRUE); - table->field[j++]->store((longlong)user_stats->rows_sent, TRUE); - table->field[j++]->store((longlong)user_stats->rows_deleted, TRUE); - table->field[j++]->store((longlong)user_stats->rows_inserted, TRUE); - table->field[j++]->store((longlong)user_stats->rows_updated, TRUE); - table->field[j++]->store((longlong)user_stats->select_commands, TRUE); - table->field[j++]->store((longlong)user_stats->update_commands, TRUE); - table->field[j++]->store((longlong)user_stats->other_commands, TRUE); - table->field[j++]->store((longlong)user_stats->commit_trans, TRUE); - table->field[j++]->store((longlong)user_stats->rollback_trans, TRUE); - table->field[j++]->store((longlong)user_stats->denied_connections, TRUE); - table->field[j++]->store((longlong)user_stats->lost_connections, TRUE); - table->field[j++]->store((longlong)user_stats->access_denied_errors, TRUE); - table->field[j++]->store((longlong)user_stats->empty_queries, TRUE); - if (schema_table_store_record(thd, table)) - { - DBUG_PRINT("error", ("store record error")); - DBUG_RETURN(1); - } - } - DBUG_RETURN(0); -} - -/* - Process SHOW USER_STATISTICS - - SYNOPSIS - mysqld_show_user_stats - thd - current thread - wild - limit results to the entry for this user - with_roles - when true, display role for mapped users - - RETURN - 0 - OK - 1 - error -*/ - -int fill_schema_user_stats(THD* thd, TABLE_LIST* tables, COND* cond) -{ - TABLE *table= tables->table; - int result; - DBUG_ENTER("fill_schema_user_stats"); - - if (check_global_access(thd, SUPER_ACL | PROCESS_ACL, true)) - DBUG_RETURN(0); - - /* - Iterates through all the global stats and sends them to the client. - Pattern matching on the client IP is supported. - */ - - mysql_mutex_lock(&LOCK_global_user_client_stats); - result= send_user_stats(thd, &global_user_stats, table) != 0; - mysql_mutex_unlock(&LOCK_global_user_client_stats); - - DBUG_PRINT("exit", ("result: %d", result)); - DBUG_RETURN(result); -} - -/* - Process SHOW CLIENT_STATISTICS - - SYNOPSIS - mysqld_show_client_stats - thd - current thread - wild - limit results to the entry for this client - - RETURN - 0 - OK - 1 - error -*/ - -int fill_schema_client_stats(THD* thd, TABLE_LIST* tables, COND* cond) -{ - TABLE *table= tables->table; - int result; - DBUG_ENTER("fill_schema_client_stats"); - - if (check_global_access(thd, SUPER_ACL | PROCESS_ACL, true)) - DBUG_RETURN(0); - - /* - Iterates through all the global stats and sends them to the client. - Pattern matching on the client IP is supported. - */ - - mysql_mutex_lock(&LOCK_global_user_client_stats); - result= send_user_stats(thd, &global_client_stats, table) != 0; - mysql_mutex_unlock(&LOCK_global_user_client_stats); - - DBUG_PRINT("exit", ("result: %d", result)); - DBUG_RETURN(result); -} - - -/* Fill information schema table with table statistics */ - -int fill_schema_table_stats(THD *thd, TABLE_LIST *tables, COND *cond) -{ - TABLE *table= tables->table; - DBUG_ENTER("fill_schema_table_stats"); - - mysql_mutex_lock(&LOCK_global_table_stats); - for (uint i= 0; i < global_table_stats.records; i++) - { - char *end_of_schema; - TABLE_STATS *table_stats= - (TABLE_STATS*)my_hash_element(&global_table_stats, i); - TABLE_LIST tmp_table; - size_t schema_length, table_name_length; - - end_of_schema= strend(table_stats->table); - schema_length= (size_t) (end_of_schema - table_stats->table); - table_name_length= strlen(table_stats->table + schema_length + 1); - - bzero((char*) &tmp_table,sizeof(tmp_table)); - tmp_table.db= table_stats->table; - tmp_table.table_name= end_of_schema+1; - tmp_table.grant.privilege= 0; - if (check_access(thd, SELECT_ACL, tmp_table.db, - &tmp_table.grant.privilege, NULL, 0, 1) || - check_grant(thd, SELECT_ACL, &tmp_table, 1, UINT_MAX, - 1)) - continue; - - table->field[0]->store(table_stats->table, schema_length, - system_charset_info); - table->field[1]->store(table_stats->table + schema_length+1, - table_name_length, system_charset_info); - table->field[2]->store((longlong)table_stats->rows_read, TRUE); - table->field[3]->store((longlong)table_stats->rows_changed, TRUE); - table->field[4]->store((longlong)table_stats->rows_changed_x_indexes, - TRUE); - if (schema_table_store_record(thd, table)) - { - mysql_mutex_unlock(&LOCK_global_table_stats); - DBUG_RETURN(1); - } - } - mysql_mutex_unlock(&LOCK_global_table_stats); - DBUG_RETURN(0); -} - - -/* Fill information schema table with index statistics */ - -int fill_schema_index_stats(THD *thd, TABLE_LIST *tables, COND *cond) -{ - TABLE *table= tables->table; - DBUG_ENTER("fill_schema_index_stats"); - - mysql_mutex_lock(&LOCK_global_index_stats); - for (uint i= 0; i < global_index_stats.records; i++) - { - INDEX_STATS *index_stats = - (INDEX_STATS*) my_hash_element(&global_index_stats, i); - TABLE_LIST tmp_table; - char *index_name; - size_t schema_name_length, table_name_length, index_name_length; - - bzero((char*) &tmp_table,sizeof(tmp_table)); - tmp_table.db= index_stats->index; - tmp_table.table_name= strend(index_stats->index)+1; - tmp_table.grant.privilege= 0; - if (check_access(thd, SELECT_ACL, tmp_table.db, - &tmp_table.grant.privilege, NULL, 0, 1) || - check_grant(thd, SELECT_ACL, &tmp_table, 1, UINT_MAX, 1)) - continue; - - index_name= strend(tmp_table.table_name)+1; - schema_name_length= (tmp_table.table_name - index_stats->index) -1; - table_name_length= (index_name - tmp_table.table_name)-1; - index_name_length= (index_stats->index_name_length - schema_name_length - - table_name_length - 3); - - table->field[0]->store(tmp_table.db, schema_name_length, - system_charset_info); - table->field[1]->store(tmp_table.table_name, table_name_length, - system_charset_info); - table->field[2]->store(index_name, index_name_length, system_charset_info); - table->field[3]->store((longlong)index_stats->rows_read, TRUE); - - if (schema_table_store_record(thd, table)) - { - mysql_mutex_unlock(&LOCK_global_index_stats); - DBUG_RETURN(1); - } - } - mysql_mutex_unlock(&LOCK_global_index_stats); - DBUG_RETURN(0); -} - - /* collect status for all running threads */ void calc_sum_of_all_status(STATUS_VAR *to) @@ -3671,7 +3368,7 @@ bool uses_only_table_name_fields(Item *item, TABLE_LIST *table) } -static COND * make_cond_for_info_schema(COND *cond, TABLE_LIST *table) +COND *make_cond_for_info_schema(COND *cond, TABLE_LIST *table) { if (!cond) return (COND*) 0; @@ -3754,6 +3451,15 @@ bool get_lookup_field_values(THD *thd, COND *cond, TABLE_LIST *tables, bzero((char*) lookup_field_values, sizeof(LOOKUP_FIELD_VALUES)); switch (lex->sql_command) { + case SQLCOM_SHOW_PLUGINS: + if (lex->ident.str) + { + thd->make_lex_string(&lookup_field_values->db_value, + lex->ident.str, lex->ident.length); + break; + } + /* fall through */ + case SQLCOM_SHOW_GENERIC: case SQLCOM_SHOW_DATABASES: if (wild) { @@ -3775,17 +3481,6 @@ bool get_lookup_field_values(THD *thd, COND *cond, TABLE_LIST *tables, lookup_field_values->wild_table_value= 1; } break; - case SQLCOM_SHOW_PLUGINS: - if (lex->ident.str) - thd->make_lex_string(&lookup_field_values->db_value, - lex->ident.str, lex->ident.length); - else if (lex->wild) - { - thd->make_lex_string(&lookup_field_values->db_value, - lex->wild->ptr(), lex->wild->length()); - lookup_field_values->wild_db_value= 1; - } - break; default: /* The "default" is for queries over I_S. @@ -7204,19 +6899,19 @@ int fill_variables(THD *thd, TABLE_LIST *tables, COND *cond) const char *wild= lex->wild ? lex->wild->ptr() : NullS; enum enum_schema_tables schema_table_idx= get_schema_table_idx(tables->schema_table); - enum enum_var_type option_type= OPT_SESSION; - bool upper_case_names= (schema_table_idx != SCH_VARIABLES); - bool sorted_vars= (schema_table_idx == SCH_VARIABLES); + enum enum_var_type scope= OPT_SESSION; + bool upper_case_names= lex->sql_command != SQLCOM_SHOW_VARIABLES; + bool sorted_vars= lex->sql_command == SQLCOM_SHOW_VARIABLES; if (lex->option_type == OPT_GLOBAL || schema_table_idx == SCH_GLOBAL_VARIABLES) - option_type= OPT_GLOBAL; + scope= OPT_GLOBAL; COND *partial_cond= make_cond_for_info_schema(cond, tables); mysql_rwlock_rdlock(&LOCK_system_variables_hash); - res= show_status_array(thd, wild, enumerate_sys_vars(thd, sorted_vars, option_type), - option_type, NULL, "", tables->table, + res= show_status_array(thd, wild, enumerate_sys_vars(thd, sorted_vars, scope), + scope, NULL, "", tables->table, upper_case_names, partial_cond); mysql_rwlock_unlock(&LOCK_system_variables_hash); DBUG_RETURN(res); @@ -7232,25 +6927,25 @@ int fill_status(THD *thd, TABLE_LIST *tables, COND *cond) STATUS_VAR *tmp1, tmp; enum enum_schema_tables schema_table_idx= get_schema_table_idx(tables->schema_table); - enum enum_var_type option_type; - bool upper_case_names= (schema_table_idx != SCH_STATUS); + enum enum_var_type scope; + bool upper_case_names= lex->sql_command != SQLCOM_SHOW_STATUS; - if (schema_table_idx == SCH_STATUS) + if (lex->sql_command == SQLCOM_SHOW_STATUS) { - option_type= lex->option_type; - if (option_type == OPT_GLOBAL) + scope= lex->option_type; + if (scope == OPT_GLOBAL) tmp1= &tmp; else tmp1= thd->initial_status_var; } else if (schema_table_idx == SCH_GLOBAL_STATUS) { - option_type= OPT_GLOBAL; + scope= OPT_GLOBAL; tmp1= &tmp; } else { - option_type= OPT_SESSION; + scope= OPT_SESSION; tmp1= &thd->status_var; } @@ -7260,11 +6955,11 @@ int fill_status(THD *thd, TABLE_LIST *tables, COND *cond) partial_cond->val_int(); mysql_mutex_lock(&LOCK_status); - if (option_type == OPT_GLOBAL) + if (scope == OPT_GLOBAL) calc_sum_of_all_status(&tmp); res= show_status_array(thd, wild, (SHOW_VAR *)all_status_vars.buffer, - option_type, tmp1, "", tables->table, + scope, tmp1, "", tables->table, upper_case_names, partial_cond); mysql_mutex_unlock(&LOCK_status); DBUG_RETURN(res); @@ -7356,82 +7051,6 @@ struct schema_table_ref ST_SCHEMA_TABLE *schema_table; }; -ST_FIELD_INFO user_stats_fields_info[]= -{ - {"USER", USERNAME_CHAR_LENGTH, MYSQL_TYPE_STRING, 0, 0, "User", SKIP_OPEN_TABLE}, - {"TOTAL_CONNECTIONS", MY_INT32_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Total_connections",SKIP_OPEN_TABLE}, - {"CONCURRENT_CONNECTIONS", MY_INT32_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Concurrent_connections",SKIP_OPEN_TABLE}, - {"CONNECTED_TIME", MY_INT32_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, 0, "Connected_time",SKIP_OPEN_TABLE}, - {"BUSY_TIME", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_DOUBLE, 0, 0, "Busy_time",SKIP_OPEN_TABLE}, - {"CPU_TIME", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_DOUBLE, 0, 0, "Cpu_time",SKIP_OPEN_TABLE}, - {"BYTES_RECEIVED", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Bytes_received",SKIP_OPEN_TABLE}, - {"BYTES_SENT", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Bytes_sent",SKIP_OPEN_TABLE}, - {"BINLOG_BYTES_WRITTEN", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Binlog_bytes_written",SKIP_OPEN_TABLE}, - {"ROWS_READ", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Rows_read",SKIP_OPEN_TABLE}, - {"ROWS_SENT", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Rows_sent",SKIP_OPEN_TABLE}, - {"ROWS_DELETED", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Rows_deleted",SKIP_OPEN_TABLE}, - {"ROWS_INSERTED", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Rows_inserted",SKIP_OPEN_TABLE}, - {"ROWS_UPDATED", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Rows_updated",SKIP_OPEN_TABLE}, - {"SELECT_COMMANDS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Select_commands",SKIP_OPEN_TABLE}, - {"UPDATE_COMMANDS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Update_commands",SKIP_OPEN_TABLE}, - {"OTHER_COMMANDS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Other_commands",SKIP_OPEN_TABLE}, - {"COMMIT_TRANSACTIONS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Commit_transactions",SKIP_OPEN_TABLE}, - {"ROLLBACK_TRANSACTIONS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Rollback_transactions",SKIP_OPEN_TABLE}, - {"DENIED_CONNECTIONS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Denied_connections",SKIP_OPEN_TABLE}, - {"LOST_CONNECTIONS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Lost_connections",SKIP_OPEN_TABLE}, - {"ACCESS_DENIED", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Access_denied",SKIP_OPEN_TABLE}, - {"EMPTY_QUERIES", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Empty_queries",SKIP_OPEN_TABLE}, - {0, 0, MYSQL_TYPE_STRING, 0, 0, 0, 0} -}; - -ST_FIELD_INFO client_stats_fields_info[]= -{ - {"CLIENT", LIST_PROCESS_HOST_LEN, MYSQL_TYPE_STRING, 0, 0, "Client",SKIP_OPEN_TABLE}, - {"TOTAL_CONNECTIONS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Total_connections",SKIP_OPEN_TABLE}, - {"CONCURRENT_CONNECTIONS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Concurrent_connections",SKIP_OPEN_TABLE}, - {"CONNECTED_TIME", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Connected_time",SKIP_OPEN_TABLE}, - {"BUSY_TIME", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_DOUBLE, 0, 0, "Busy_time",SKIP_OPEN_TABLE}, - {"CPU_TIME", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_DOUBLE, 0, 0, "Cpu_time",SKIP_OPEN_TABLE}, - {"BYTES_RECEIVED", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Bytes_received",SKIP_OPEN_TABLE}, - {"BYTES_SENT", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Bytes_sent",SKIP_OPEN_TABLE}, - {"BINLOG_BYTES_WRITTEN", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Binlog_bytes_written",SKIP_OPEN_TABLE}, - {"ROWS_READ", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Rows_read",SKIP_OPEN_TABLE}, - {"ROWS_SENT", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Rows_sent",SKIP_OPEN_TABLE}, - {"ROWS_DELETED", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Rows_deleted",SKIP_OPEN_TABLE}, - {"ROWS_INSERTED", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Rows_inserted",SKIP_OPEN_TABLE}, - {"ROWS_UPDATED", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Rows_updated",SKIP_OPEN_TABLE}, - {"SELECT_COMMANDS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Select_commands",SKIP_OPEN_TABLE}, - {"UPDATE_COMMANDS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Update_commands",SKIP_OPEN_TABLE}, - {"OTHER_COMMANDS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Other_commands",SKIP_OPEN_TABLE}, - {"COMMIT_TRANSACTIONS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Commit_transactions",SKIP_OPEN_TABLE}, - {"ROLLBACK_TRANSACTIONS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Rollback_transactions",SKIP_OPEN_TABLE}, - {"DENIED_CONNECTIONS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Denied_connections",SKIP_OPEN_TABLE}, - {"LOST_CONNECTIONS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Lost_connections",SKIP_OPEN_TABLE}, - {"ACCESS_DENIED", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Access_denied",SKIP_OPEN_TABLE}, - {"EMPTY_QUERIES", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Empty_queries",SKIP_OPEN_TABLE}, - {0, 0, MYSQL_TYPE_STRING, 0, 0, 0, 0} -}; - - -ST_FIELD_INFO table_stats_fields_info[]= -{ - {"TABLE_SCHEMA", NAME_LEN, MYSQL_TYPE_STRING, 0, 0, "Table_schema",SKIP_OPEN_TABLE}, - {"TABLE_NAME", NAME_LEN, MYSQL_TYPE_STRING, 0, 0, "Table_name",SKIP_OPEN_TABLE}, - {"ROWS_READ", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Rows_read",SKIP_OPEN_TABLE}, - {"ROWS_CHANGED", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Rows_changed",SKIP_OPEN_TABLE}, - {"ROWS_CHANGED_X_INDEXES", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Rows_changed_x_#indexes",SKIP_OPEN_TABLE}, - {0, 0, MYSQL_TYPE_STRING, 0, 0, 0, 0} -}; - -ST_FIELD_INFO index_stats_fields_info[]= -{ - {"TABLE_SCHEMA", NAME_LEN, MYSQL_TYPE_STRING, 0, 0, "Table_schema",SKIP_OPEN_TABLE}, - {"TABLE_NAME", NAME_LEN, MYSQL_TYPE_STRING, 0, 0, "Table_name",SKIP_OPEN_TABLE}, - {"INDEX_NAME", NAME_LEN, MYSQL_TYPE_STRING, 0, 0, "Index_name",SKIP_OPEN_TABLE}, - {"ROWS_READ", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0, 0, "Rows_read",SKIP_OPEN_TABLE}, - {0, 0, MYSQL_TYPE_STRING, 0, 0, 0,0} -}; - /* Find schema_tables elment by name @@ -7668,7 +7287,7 @@ TABLE *create_schema_table(THD *thd, TABLE_LIST *table_list) 0 success */ -int make_old_format(THD *thd, ST_SCHEMA_TABLE *schema_table) +static int make_old_format(THD *thd, ST_SCHEMA_TABLE *schema_table) { ST_FIELD_INFO *field_info= schema_table->fields_info; Name_resolution_context *context= &thd->lex->select_lex.context; @@ -7854,7 +7473,7 @@ int mysql_schema_table(THD *thd, LEX *lex, TABLE_LIST *table_list) { TABLE *table; DBUG_ENTER("mysql_schema_table"); - if (!(table= table_list->schema_table->create_table(thd, table_list))) + if (!(table= create_schema_table(thd, table_list))) DBUG_RETURN(1); table->s->tmp_table= SYSTEM_TMP_TABLE; table->grant.privilege= SELECT_ACL; @@ -7934,9 +7553,8 @@ int mysql_schema_table(THD *thd, LEX *lex, TABLE_LIST *table_list) */ int make_schema_select(THD *thd, SELECT_LEX *sel, - enum enum_schema_tables schema_table_idx) + ST_SCHEMA_TABLE *schema_table) { - ST_SCHEMA_TABLE *schema_table= get_schema_table(schema_table_idx); LEX_STRING db, table; DBUG_ENTER("make_schema_select"); DBUG_PRINT("enter", ("mysql_schema_select: %s", schema_table->table_name)); @@ -7953,13 +7571,13 @@ int make_schema_select(THD *thd, SELECT_LEX *sel, DBUG_RETURN(1); if (schema_table->old_format(thd, schema_table)) - DBUG_RETURN(1); if (!sel->add_table_to_list(thd, new Table_ident(thd, db, table, 0), 0, 0, TL_READ, MDL_SHARED_READ)) DBUG_RETURN(1); + sel->table_list.first->schema_table_reformed= 1; DBUG_RETURN(0); } @@ -8006,6 +7624,7 @@ static bool optimize_for_get_all_tables(THD *thd, TABLE_LIST *tables, COND *cond if (lsel && lsel->table_list.first) { /* These do not need to have a query plan */ + plan->trivial_show_command= true; goto end; } @@ -8827,11 +8446,31 @@ ST_FIELD_INFO variables_fields_info[]= { {"VARIABLE_NAME", 64, MYSQL_TYPE_STRING, 0, 0, "Variable_name", SKIP_OPEN_TABLE}, - {"VARIABLE_VALUE", 1024, MYSQL_TYPE_STRING, 0, 1, "Value", SKIP_OPEN_TABLE}, + {"VARIABLE_VALUE", 1024, MYSQL_TYPE_STRING, 0, 0, "Value", SKIP_OPEN_TABLE}, {0, 0, MYSQL_TYPE_STRING, 0, 0, 0, SKIP_OPEN_TABLE} }; +ST_FIELD_INFO sysvars_fields_info[]= +{ + {"VARIABLE_NAME", NAME_CHAR_LEN, MYSQL_TYPE_STRING, 0, 0, 0, 0}, + {"SESSION_VALUE", 1024, MYSQL_TYPE_STRING, 0, MY_I_S_MAYBE_NULL, 0, 0}, + {"GLOBAL_VALUE", 1024, MYSQL_TYPE_STRING, 0, MY_I_S_MAYBE_NULL, 0, 0}, + {"GLOBAL_VALUE_ORIGIN", NAME_CHAR_LEN, MYSQL_TYPE_STRING, 0, 0, 0, 0}, + {"DEFAULT_VALUE", 1024, MYSQL_TYPE_STRING, 0, MY_I_S_MAYBE_NULL, 0, 0}, + {"VARIABLE_SCOPE", NAME_CHAR_LEN, MYSQL_TYPE_STRING, 0, 0, 0, 0}, + {"VARIABLE_TYPE", NAME_CHAR_LEN, MYSQL_TYPE_STRING, 0, 0, 0, 0}, + {"VARIABLE_COMMENT", TABLE_COMMENT_MAXLEN, MYSQL_TYPE_STRING, 0, 0, 0, 0}, + {"NUMERIC_MIN_VALUE", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_STRING, 0, MY_I_S_MAYBE_NULL, 0, 0}, + {"NUMERIC_MAX_VALUE", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_STRING, 0, MY_I_S_MAYBE_NULL, 0, 0}, + {"NUMERIC_BLOCK_SIZE", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_STRING, 0, MY_I_S_MAYBE_NULL, 0, 0}, + {"ENUM_VALUE_LIST", 65535, MYSQL_TYPE_STRING, 0, MY_I_S_MAYBE_NULL, 0, 0}, + {"READ_ONLY", 3, MYSQL_TYPE_STRING, 0, 0, 0, 0}, + {"COMMAND_LINE_ARGUMENT", NAME_CHAR_LEN, MYSQL_TYPE_STRING, 0, MY_I_S_MAYBE_NULL, 0, 0}, + {0, 0, MYSQL_TYPE_STRING, 0, 0, 0, 0} +}; + + ST_FIELD_INFO processlist_fields_info[]= { {"ID", 4, MYSQL_TYPE_LONGLONG, 0, 0, "Id", SKIP_OPEN_TABLE}, @@ -9078,105 +8717,95 @@ ST_FIELD_INFO show_explain_fields_info[]= ST_SCHEMA_TABLE schema_tables[]= { - {"ALL_PLUGINS", plugin_fields_info, create_schema_table, + {"ALL_PLUGINS", plugin_fields_info, 0, fill_all_plugins, make_old_format, 0, 5, -1, 0, 0}, - {"APPLICABLE_ROLES", applicable_roles_fields_info, create_schema_table, + {"APPLICABLE_ROLES", applicable_roles_fields_info, 0, fill_schema_applicable_roles, 0, 0, -1, -1, 0, 0}, - {"CHARACTER_SETS", charsets_fields_info, create_schema_table, + {"CHARACTER_SETS", charsets_fields_info, 0, fill_schema_charsets, make_character_sets_old_format, 0, -1, -1, 0, 0}, - {"CLIENT_STATISTICS", client_stats_fields_info, create_schema_table, - fill_schema_client_stats, make_old_format, 0, -1, -1, 0, 0}, - {"COLLATIONS", collation_fields_info, create_schema_table, + {"COLLATIONS", collation_fields_info, 0, fill_schema_collation, make_old_format, 0, -1, -1, 0, 0}, {"COLLATION_CHARACTER_SET_APPLICABILITY", coll_charset_app_fields_info, - create_schema_table, fill_schema_coll_charset_app, 0, 0, -1, -1, 0, 0}, - {"COLUMNS", columns_fields_info, create_schema_table, + 0, fill_schema_coll_charset_app, 0, 0, -1, -1, 0, 0}, + {"COLUMNS", columns_fields_info, 0, get_all_tables, make_columns_old_format, get_schema_column_record, 1, 2, 0, OPTIMIZE_I_S_TABLE|OPEN_VIEW_FULL}, - {"COLUMN_PRIVILEGES", column_privileges_fields_info, create_schema_table, + {"COLUMN_PRIVILEGES", column_privileges_fields_info, 0, fill_schema_column_privileges, 0, 0, -1, -1, 0, 0}, - {"ENABLED_ROLES", enabled_roles_fields_info, create_schema_table, + {"ENABLED_ROLES", enabled_roles_fields_info, 0, fill_schema_enabled_roles, 0, 0, -1, -1, 0, 0}, - {"ENGINES", engines_fields_info, create_schema_table, + {"ENGINES", engines_fields_info, 0, fill_schema_engines, make_old_format, 0, -1, -1, 0, 0}, #ifdef HAVE_EVENT_SCHEDULER - {"EVENTS", events_fields_info, create_schema_table, + {"EVENTS", events_fields_info, 0, Events::fill_schema_events, make_old_format, 0, -1, -1, 0, 0}, #else - {"EVENTS", events_fields_info, create_schema_table, + {"EVENTS", events_fields_info, 0, 0, make_old_format, 0, -1, -1, 0, 0}, #endif - {"EXPLAIN", show_explain_fields_info, create_schema_table, fill_show_explain, + {"EXPLAIN", show_explain_fields_info, 0, fill_show_explain, make_old_format, 0, -1, -1, TRUE /*hidden*/ , 0}, - {"FILES", files_fields_info, create_schema_table, + {"FILES", files_fields_info, 0, hton_fill_schema_table, 0, 0, -1, -1, 0, 0}, - {"GLOBAL_STATUS", variables_fields_info, create_schema_table, + {"GLOBAL_STATUS", variables_fields_info, 0, fill_status, make_old_format, 0, 0, -1, 0, 0}, - {"GLOBAL_VARIABLES", variables_fields_info, create_schema_table, + {"GLOBAL_VARIABLES", variables_fields_info, 0, fill_variables, make_old_format, 0, 0, -1, 0, 0}, - {"INDEX_STATISTICS", index_stats_fields_info, create_schema_table, - fill_schema_index_stats, make_old_format, 0, -1, -1, 0, 0}, - {"KEY_CACHES", keycache_fields_info, create_schema_table, - fill_key_cache_tables, make_old_format, 0, -1,-1, 0, 0}, - {"KEY_COLUMN_USAGE", key_column_usage_fields_info, create_schema_table, + {"KEY_CACHES", keycache_fields_info, 0, + fill_key_cache_tables, 0, 0, -1,-1, 0, 0}, + {"KEY_COLUMN_USAGE", key_column_usage_fields_info, 0, get_all_tables, 0, get_schema_key_column_usage_record, 4, 5, 0, OPTIMIZE_I_S_TABLE|OPEN_TABLE_ONLY}, - {"OPEN_TABLES", open_tables_fields_info, create_schema_table, + {"OPEN_TABLES", open_tables_fields_info, 0, fill_open_tables, make_old_format, 0, -1, -1, 1, 0}, - {"PARAMETERS", parameters_fields_info, create_schema_table, + {"PARAMETERS", parameters_fields_info, 0, fill_schema_proc, 0, 0, -1, -1, 0, 0}, - {"PARTITIONS", partitions_fields_info, create_schema_table, + {"PARTITIONS", partitions_fields_info, 0, get_all_tables, 0, get_schema_partitions_record, 1, 2, 0, OPTIMIZE_I_S_TABLE|OPEN_TABLE_ONLY}, - {"PLUGINS", plugin_fields_info, create_schema_table, + {"PLUGINS", plugin_fields_info, 0, fill_plugins, make_old_format, 0, -1, -1, 0, 0}, - {"PROCESSLIST", processlist_fields_info, create_schema_table, + {"PROCESSLIST", processlist_fields_info, 0, fill_schema_processlist, make_old_format, 0, -1, -1, 0, 0}, - {"PROFILING", query_profile_statistics_info, create_schema_table, + {"PROFILING", query_profile_statistics_info, 0, fill_query_profile_statistics_info, make_profile_table_for_show, NULL, -1, -1, false, 0}, {"REFERENTIAL_CONSTRAINTS", referential_constraints_fields_info, - create_schema_table, get_all_tables, 0, get_referential_constraints_record, + 0, get_all_tables, 0, get_referential_constraints_record, 1, 9, 0, OPTIMIZE_I_S_TABLE|OPEN_TABLE_ONLY}, - {"ROUTINES", proc_fields_info, create_schema_table, + {"ROUTINES", proc_fields_info, 0, fill_schema_proc, make_proc_old_format, 0, -1, -1, 0, 0}, - {"SCHEMATA", schema_fields_info, create_schema_table, + {"SCHEMATA", schema_fields_info, 0, fill_schema_schemata, make_schemata_old_format, 0, 1, -1, 0, 0}, - {"SCHEMA_PRIVILEGES", schema_privileges_fields_info, create_schema_table, + {"SCHEMA_PRIVILEGES", schema_privileges_fields_info, 0, fill_schema_schema_privileges, 0, 0, -1, -1, 0, 0}, - {"SESSION_STATUS", variables_fields_info, create_schema_table, + {"SESSION_STATUS", variables_fields_info, 0, fill_status, make_old_format, 0, 0, -1, 0, 0}, - {"SESSION_VARIABLES", variables_fields_info, create_schema_table, + {"SESSION_VARIABLES", variables_fields_info, 0, fill_variables, make_old_format, 0, 0, -1, 0, 0}, - {"STATISTICS", stat_fields_info, create_schema_table, + {"STATISTICS", stat_fields_info, 0, get_all_tables, make_old_format, get_schema_stat_record, 1, 2, 0, OPEN_TABLE_ONLY|OPTIMIZE_I_S_TABLE}, - {"STATUS", variables_fields_info, create_schema_table, fill_status, - make_old_format, 0, 0, -1, 1, 0}, - {"TABLES", tables_fields_info, create_schema_table, + {"SYSTEM_VARIABLES", sysvars_fields_info, 0, + fill_sysvars, make_old_format, 0, 0, -1, 0, 0}, + {"TABLES", tables_fields_info, 0, get_all_tables, make_old_format, get_schema_tables_record, 1, 2, 0, OPTIMIZE_I_S_TABLE}, - {"TABLESPACES", tablespaces_fields_info, create_schema_table, + {"TABLESPACES", tablespaces_fields_info, 0, hton_fill_schema_table, 0, 0, -1, -1, 0, 0}, - {"TABLE_CONSTRAINTS", table_constraints_fields_info, create_schema_table, + {"TABLE_CONSTRAINTS", table_constraints_fields_info, 0, get_all_tables, 0, get_schema_constraints_record, 3, 4, 0, OPTIMIZE_I_S_TABLE|OPEN_TABLE_ONLY}, - {"TABLE_NAMES", table_names_fields_info, create_schema_table, + {"TABLE_NAMES", table_names_fields_info, 0, get_all_tables, make_table_names_old_format, 0, 1, 2, 1, OPTIMIZE_I_S_TABLE}, - {"TABLE_PRIVILEGES", table_privileges_fields_info, create_schema_table, + {"TABLE_PRIVILEGES", table_privileges_fields_info, 0, fill_schema_table_privileges, 0, 0, -1, -1, 0, 0}, - {"TABLE_STATISTICS", table_stats_fields_info, create_schema_table, - fill_schema_table_stats, make_old_format, 0, -1, -1, 0, 0}, - {"TRIGGERS", triggers_fields_info, create_schema_table, + {"TRIGGERS", triggers_fields_info, 0, get_all_tables, make_old_format, get_schema_triggers_record, 5, 6, 0, OPEN_TRIGGER_ONLY|OPTIMIZE_I_S_TABLE}, - {"USER_PRIVILEGES", user_privileges_fields_info, create_schema_table, + {"USER_PRIVILEGES", user_privileges_fields_info, 0, fill_schema_user_privileges, 0, 0, -1, -1, 0, 0}, - {"USER_STATISTICS", user_stats_fields_info, create_schema_table, - fill_schema_user_stats, make_old_format, 0, -1, -1, 0, 0}, - {"VARIABLES", variables_fields_info, create_schema_table, fill_variables, - make_old_format, 0, 0, -1, 1, 0}, - {"VIEWS", view_fields_info, create_schema_table, + {"VIEWS", view_fields_info, 0, get_all_tables, 0, get_schema_views_record, 1, 2, 0, OPEN_VIEW_ONLY|OPTIMIZE_I_S_TABLE}, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0} @@ -9195,8 +8824,6 @@ int initialize_schema_table(st_plugin_int *plugin) plugin->data= schema_table; // shortcut for the future if (plugin->plugin->init) { - schema_table->create_table= create_schema_table; - schema_table->old_format= make_old_format; schema_table->idx_field1= -1, schema_table->idx_field2= -1; @@ -9212,6 +8839,14 @@ int initialize_schema_table(st_plugin_int *plugin) DBUG_RETURN(1); } + if (!schema_table->old_format) + for (ST_FIELD_INFO *f= schema_table->fields_info; f->field_name; f++) + if (f->old_name && f->old_name[0]) + { + schema_table->old_format= make_old_format; + break; + } + /* Make sure the plugin name is not set inside the init() function. */ schema_table->table_name= plugin->name.str; } diff --git a/sql/sql_show.h b/sql/sql_show.h index 2f1cb26d17a..a759c8d94f5 100644 --- a/sql/sql_show.h +++ b/sql/sql_show.h @@ -112,15 +112,17 @@ void view_store_options(THD *thd, TABLE_LIST *table, String *buff); void init_fill_schema_files_row(TABLE* table); bool schema_table_store_record(THD *thd, TABLE *table); void initialize_information_schema_acl(); +COND *make_cond_for_info_schema(COND *cond, TABLE_LIST *table); ST_SCHEMA_TABLE *find_schema_table(THD *thd, const char* table_name); ST_SCHEMA_TABLE *get_schema_table(enum enum_schema_tables schema_table_idx); int make_schema_select(THD *thd, SELECT_LEX *sel, - enum enum_schema_tables schema_table_idx); + ST_SCHEMA_TABLE *schema_table); int mysql_schema_table(THD *thd, LEX *lex, TABLE_LIST *table_list); bool get_schema_tables_result(JOIN *join, enum enum_schema_table_state executed_place); enum enum_schema_tables get_schema_table_idx(ST_SCHEMA_TABLE *schema_table); +TABLE *create_schema_table(THD *thd, TABLE_LIST *table_list); /* These functions were under INNODB_COMPATIBILITY_HOOKS */ int get_quote_char_for_identifier(THD *thd, const char *name, uint length); @@ -192,9 +194,17 @@ typedef struct st_lookup_field_values class IS_table_read_plan : public Sql_alloc { public: - IS_table_read_plan() : no_rows(false) {} + IS_table_read_plan() : no_rows(false), trivial_show_command(FALSE) {} bool no_rows; + /* + For EXPLAIN only: For SHOW KEYS and SHOW COLUMNS, we know which + db_name.table_name will be read, however for some reason we don't + set the fields in this->lookup_field_vals. + In order to not have JOIN::save_explain_data() walking over uninitialized + data, we set trivial_show_command=true. + */ + bool trivial_show_command; LOOKUP_FIELD_VALUES lookup_field_vals; Item *partial_cond; diff --git a/sql/sql_string.h b/sql/sql_string.h index 95c82518f9e..0b7e949392d 100644 --- a/sql/sql_string.h +++ b/sql/sql_string.h @@ -211,10 +211,12 @@ public: str_charset=cs; } bool set_int(longlong num, bool unsigned_flag, CHARSET_INFO *cs); - bool set(longlong num, CHARSET_INFO *cs) - { return set_int(num, false, cs); } - bool set(ulonglong num, CHARSET_INFO *cs) - { return set_int((longlong)num, true, cs); } + bool set(int num, CHARSET_INFO *cs) { return set_int(num, false, cs); } + bool set(uint num, CHARSET_INFO *cs) { return set_int(num, true, cs); } + bool set(long num, CHARSET_INFO *cs) { return set_int(num, false, cs); } + bool set(ulong num, CHARSET_INFO *cs) { return set_int(num, true, cs); } + bool set(longlong num, CHARSET_INFO *cs) { return set_int(num, false, cs); } + bool set(ulonglong num, CHARSET_INFO *cs) { return set_int((longlong)num, true, cs); } bool set_real(double num,uint decimals, CHARSET_INFO *cs); /* Move handling of buffer from some other object to String */ diff --git a/sql/sql_table.cc b/sql/sql_table.cc index 80ac0978834..68c25438f0c 100644 --- a/sql/sql_table.cc +++ b/sql/sql_table.cc @@ -1853,27 +1853,6 @@ bool mysql_write_frm(ALTER_PARTITION_PARAM_TYPE *lpt, uint flags) goto end; } } - if (flags & WFRM_PACK_FRM) - { - /* - We need to pack the frm file and after packing it we delete the - frm file to ensure it doesn't get used. This is only used for - handlers that have the main version of the frm file stored in the - handler. - */ - const uchar *data; - size_t length; - if (readfrm(shadow_path, &data, &length) || - packfrm(data, length, &lpt->pack_frm_data, &lpt->pack_frm_len)) - { - my_free(const_cast<uchar*>(data)); - my_free(lpt->pack_frm_data); - mem_alloc_error(length); - error= 1; - goto end; - } - error= mysql_file_delete(key_file_frm, shadow_frm_name, MYF(MY_WME)); - } if (flags & WFRM_INSTALL_SHADOW) { #ifdef WITH_PARTITION_STORAGE_ENGINE @@ -5264,6 +5243,12 @@ bool mysql_create_like_table(THD* thd, TABLE_LIST* table, int create_res; DBUG_ENTER("mysql_create_like_table"); +#ifdef WITH_WSREP + if (WSREP_ON && !thd->wsrep_applier && + wsrep_create_like_table(thd, table, src_table, create_info)) + DBUG_RETURN(res); +#endif + /* We the open source table to get its description in HA_CREATE_INFO and Alter_info objects. This also acquires a shared metadata lock @@ -5525,6 +5510,7 @@ err: thd->query_length(), is_trans)) res= 1; } + DBUG_RETURN(res); } @@ -8117,6 +8103,7 @@ simple_rename_or_index_change(THD *thd, TABLE_LIST *table_list, if (!error) { error= write_bin_log(thd, TRUE, thd->query(), thd->query_length()); + if (!error) my_ok(thd); } diff --git a/sql/sql_table.h b/sql/sql_table.h index 444626e0363..6a7fddb96ab 100644 --- a/sql/sql_table.h +++ b/sql/sql_table.h @@ -122,8 +122,7 @@ enum enum_explain_filename_mode #define WFRM_WRITE_SHADOW 1 #define WFRM_INSTALL_SHADOW 2 -#define WFRM_PACK_FRM 4 -#define WFRM_KEEP_SHARE 8 +#define WFRM_KEEP_SHARE 4 /* Flags for conversion functions. */ static const uint FN_FROM_IS_TMP= 1 << 0; diff --git a/sql/sql_trigger.cc b/sql/sql_trigger.cc index 70ac6265046..443a82a9df2 100644 --- a/sql/sql_trigger.cc +++ b/sql/sql_trigger.cc @@ -434,7 +434,8 @@ bool mysql_create_or_drop_trigger(THD *thd, TABLE_LIST *tables, bool create) binlogged, so they share the same danger, so trust_function_creators applies to them too. */ - if (!trust_function_creators && mysql_bin_log.is_open() && + if (!trust_function_creators && + (WSREP_EMULATE_BINLOG(thd) || mysql_bin_log.is_open()) && !(thd->security_ctx->master_access & SUPER_ACL)) { my_error(ER_BINLOG_CREATE_ROUTINE_NEED_SUPER, MYF(0)); @@ -2437,3 +2438,4 @@ bool load_table_name_for_trigger(THD *thd, DBUG_RETURN(FALSE); } + diff --git a/sql/sql_truncate.cc b/sql/sql_truncate.cc index e98679b1d51..bac691e35ea 100644 --- a/sql/sql_truncate.cc +++ b/sql/sql_truncate.cc @@ -24,6 +24,7 @@ #include "sql_acl.h" // DROP_ACL #include "sql_parse.h" // check_one_table_access() #include "sql_truncate.h" +#include "wsrep_mysqld.h" #include "sql_show.h" //append_identifier() @@ -411,6 +412,9 @@ bool Sql_cmd_truncate_table::truncate_table(THD *thd, TABLE_LIST *table_ref) { bool hton_can_recreate; + if (WSREP(thd) && + wsrep_to_isolation_begin(thd, table_ref->db, table_ref->table_name, 0)) + DBUG_RETURN(TRUE); if (lock_table(thd, table_ref, &hton_can_recreate)) DBUG_RETURN(TRUE); diff --git a/sql/sql_update.cc b/sql/sql_update.cc index faf8e4c61d2..aa290c91569 100644 --- a/sql/sql_update.cc +++ b/sql/sql_update.cc @@ -1,5 +1,5 @@ /* Copyright (c) 2000, 2013, Oracle and/or its affiliates. - Copyright (c) 2011, 2013, Monty Program Ab. + Copyright (c) 2011, 2014, Monty Program Ab. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -976,7 +976,7 @@ int mysql_update(THD *thd, */ if ((error < 0) || thd->transaction.stmt.modified_non_trans_table) { - if (mysql_bin_log.is_open()) + if (WSREP_EMULATE_BINLOG(thd) || mysql_bin_log.is_open()) { int errcode= 0; if (error < 0) @@ -1965,7 +1965,7 @@ loop_end: DBUG_RETURN(1); } while ((tbl= tbl_it++)); - temp_fields.concat(fields_for_table[cnt]); + temp_fields.append(fields_for_table[cnt]); /* Make an unique key over the first field to avoid duplicated updates */ bzero((char*) &group, sizeof(group)); @@ -2219,7 +2219,7 @@ void multi_update::abort_result_set() The query has to binlog because there's a modified non-transactional table either from the query's list or via a stored routine: bug#13270,23333 */ - if (mysql_bin_log.is_open()) + if (WSREP_EMULATE_BINLOG(thd) || mysql_bin_log.is_open()) { /* THD::killed status might not have been set ON at time of an error @@ -2488,7 +2488,7 @@ bool multi_update::send_eof() if (local_error == 0 || thd->transaction.stmt.modified_non_trans_table) { - if (mysql_bin_log.is_open()) + if (WSREP_EMULATE_BINLOG(thd) || mysql_bin_log.is_open()) { int errcode= 0; if (local_error == 0) diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy index eb0d359f608..d5c8b7c479e 100644 --- a/sql/sql_yacc.yy +++ b/sql/sql_yacc.yy @@ -48,6 +48,7 @@ #include "sp_pcontext.h" #include "sp_rcontext.h" #include "sp.h" +#include "sql_show.h" #include "sql_alter.h" // Sql_cmd_alter_table* #include "sql_truncate.h" // Sql_cmd_truncate_table #include "sql_admin.h" // Sql_cmd_analyze/Check..._table @@ -139,12 +140,13 @@ int yylex(void *yylval, void *yythd); parser. */ -void my_parse_error(const char *s) +void my_parse_error(const char *s, const char *yytext=0) { THD *thd= current_thd; Lex_input_stream *lip= & thd->m_parser_state->m_lip; - const char *yytext= lip->get_tok_start(); + if (!yytext) + yytext= lip->get_tok_start(); if (!yytext) yytext= ""; @@ -224,59 +226,39 @@ static bool is_native_function(THD *thd, const LEX_STRING *name) } -/** - Helper action for a case statement (entering the CASE). - This helper is used for both 'simple' and 'searched' cases. - This helper, with the other case_stmt_action_..., is executed when - the following SQL code is parsed: -<pre> -CREATE PROCEDURE proc_19194_simple(i int) -BEGIN - DECLARE str CHAR(10); - - CASE i - WHEN 1 THEN SET str="1"; - WHEN 2 THEN SET str="2"; - WHEN 3 THEN SET str="3"; - ELSE SET str="unknown"; - END CASE; - - SELECT str; -END -</pre> - The actions are used to generate the following code: -<pre> -SHOW PROCEDURE CODE proc_19194_simple; -Pos Instruction -0 set str@1 NULL -1 set_case_expr (12) 0 i@0 -2 jump_if_not 5(12) (case_expr@0 = 1) -3 set str@1 _latin1'1' -4 jump 12 -5 jump_if_not 8(12) (case_expr@0 = 2) -6 set str@1 _latin1'2' -7 jump 12 -8 jump_if_not 11(12) (case_expr@0 = 3) -9 set str@1 _latin1'3' -10 jump 12 -11 set str@1 _latin1'unknown' -12 stmt 0 "SELECT str" -</pre> +static sp_head *make_sp_head(THD *thd, sp_name *name, + enum stored_procedure_type type) +{ + LEX *lex= thd->lex; + sp_head *sp; - @param lex the parser lex context -*/ + /* Order is important here: new - reset - init */ + if ((sp= new sp_head())) + { + sp->reset_thd_mem_root(thd); + sp->init(lex); + sp->m_type= type; + if (name) + sp->init_sp_name(thd, name); + sp->m_chistics= &lex->sp_chistics; + lex->sphead= sp; + } + bzero(&lex->sp_chistics, sizeof(lex->sp_chistics)); + return sp; +} -void case_stmt_action_case(LEX *lex) +static bool maybe_start_compound_statement(THD *thd) { - lex->sphead->new_cont_backpatch(NULL); - - /* - BACKPATCH: Creating target label for the jump to - "case_stmt_action_end_case" - (Instruction 12 in the example) - */ + if (!thd->lex->sphead) + { + if (!make_sp_head(thd, NULL, TYPE_ENUM_PROCEDURE)) + return 1; - lex->spcont->push_label(current_thd, empty_lex_str, lex->sphead->instructions()); + Lex->sp_chistics.suid= SP_IS_NOT_SUID; + Lex->sphead->set_body_start(thd, YYLIP->get_cpp_ptr()); + Lex->sphead->m_sql_mode= thd->variables.sql_mode; + } + return 0; } /** @@ -375,36 +357,13 @@ int case_stmt_action_then(LEX *lex) /* BACKPATCH: Registering forward jump from - "case_stmt_action_then" to "case_stmt_action_end_case" + "case_stmt_action_then" to after END CASE (jump from instruction 4 to 12, 7 to 12 ... in the example) */ return sp->push_backpatch(i, ctx->last_label()); } -/** - Helper action for an end case. - This helper is used for both 'simple' and 'searched' cases. - @param lex the parser lex context - @param simple true for simple cases, false for searched cases -*/ - -void case_stmt_action_end_case(LEX *lex, bool simple) -{ - /* - BACKPATCH: Resolving forward jump from - "case_stmt_action_then" to "case_stmt_action_end_case" - (jump from instruction 4 to 12, 7 to 12 ... in the example) - */ - lex->sphead->backpatch(lex->spcont->pop_label()); - - if (simple) - lex->spcont->pop_case_expr_id(); - - lex->sphead->do_cont_backpatch(); -} - - static bool find_sys_var_null_base(THD *thd, struct sys_var_with_base *tmp) { @@ -904,6 +863,7 @@ static bool sp_create_assignment_instr(THD *thd, bool no_lookahead) Table_ident *table; char *simple_string; Item *item; + Item_param *item_param; Item_num *item_num; List<Item> *item_list; List<String> *string_list; @@ -931,8 +891,10 @@ static bool sp_create_assignment_instr(THD *thd, bool no_lookahead) chooser_compare_func_creator boolfunc2creator; class sp_condition_value *spcondvalue; struct { int vars, conds, hndlrs, curs; } spblock; - sp_name *spname; + class sp_name *spname; + class sp_label *splabel; LEX *lex; + class my_var *myvar; sp_head *sphead; struct p_elem_val *p_elem_value; enum index_hint_type index_hint; @@ -950,7 +912,6 @@ static bool sp_create_assignment_instr(THD *thd, bool no_lookahead) List<Condition_information_item> *cond_info_list; DYNCALL_CREATE_DEF *dyncol_def; List<DYNCALL_CREATE_DEF> *dyncol_def_list; - bool is_not_empty; } %{ @@ -1005,6 +966,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token ASCII_SYM /* MYSQL-FUNC */ %token ASENSITIVE_SYM /* FUTURE-USE */ %token AT_SYM /* SQL-2003-R */ +%token ATOMIC_SYM /* SQL-2003-R */ %token AUTHORS_SYM %token AUTOEXTEND_SIZE_SYM %token AUTO_INC @@ -1049,7 +1011,6 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token CIPHER_SYM %token CLASS_ORIGIN_SYM /* SQL-2003-N */ %token CLIENT_SYM -%token CLIENT_STATS_SYM %token CLOSE_SYM /* SQL-2003-R */ %token COALESCE /* SQL-2003-N */ %token CODE_SYM @@ -1215,13 +1176,12 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token IDENT %token IDENTIFIED_SYM %token IDENT_QUOTED -%token IF +%token IF_SYM %token IGNORE_SYM %token IGNORE_SERVER_IDS_SYM %token IMPORT %token INDEXES %token INDEX_SYM -%token INDEX_STATS_SYM %token INFILE %token INITIAL_SIZE_SYM %token INNER_SYM /* SQL-2003-R */ @@ -1305,6 +1265,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token MAX_SIZE_SYM %token MAX_SYM /* SQL-2003-N */ %token MAX_UPDATES_PER_HOUR +%token MAX_STATEMENT_TIME_SYM %token MAX_USER_CONNECTIONS_SYM %token MAX_VALUE_SYM /* SQL-2003-N */ %token MEDIUMBLOB @@ -1337,7 +1298,6 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token NATURAL /* SQL-2003-R */ %token NCHAR_STRING %token NCHAR_SYM /* SQL-2003-R */ -%token NDBCLUSTER_SYM %token NE /* OPERATOR */ %token NEG %token NEW_SYM /* SQL-2003-R */ @@ -1537,7 +1497,6 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token TABLES %token TABLESPACE %token TABLE_REF_PRIORITY -%token TABLE_STATS_SYM %token TABLE_SYM /* SQL-2003-R */ %token TABLE_CHECKSUM_SYM %token TABLE_NAME_SYM /* SQL-2003-N */ @@ -1586,7 +1545,6 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token UPGRADE_SYM %token USAGE /* SQL-2003-N */ %token USER /* SQL-2003-R */ -%token USER_STATS_SYM %token USE_FRM %token USE_SYM %token USING /* SQL-2003-R */ @@ -1664,7 +1622,8 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); table_ident_opt_wild create_like %type <simple_string> - remember_name remember_end opt_db text_or_password + remember_name remember_end opt_db text_or_password remember_tok_start + wild_and_where %type <string> text_string opt_gconcat_separator @@ -1675,7 +1634,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); table_option opt_if_not_exists create_or_replace opt_no_write_to_binlog opt_temporary all_or_any opt_distinct opt_ignore_leaves fulltext_options spatial_type union_option - field_def + field_def opt_not opt_union_order_or_limit union_opt select_derived_init transaction_access_mode_types opt_natural_language_mode opt_query_expansion opt_ev_status opt_ev_on_completion ev_on_completion opt_ev_comment @@ -1683,6 +1642,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); optional_flush_tables_arguments opt_dyncol_type dyncol_type opt_time_precision kill_type kill_option int_num opt_default_time_precision + case_stmt_body /* Bit field of MYSQL_START_TRANS_OPT_* flags. @@ -1719,7 +1679,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); predicate bit_expr table_wild simple_expr udf_expr expr_or_default set_expr_or_default - param_marker geometry_function + geometry_function signed_literal now_or_signed_literal opt_escape sp_opt_default simple_ident_nospvar simple_ident_q @@ -1733,6 +1693,8 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); simple_target_specification condition_number +%type <item_param> param_marker + %type <item_num> NUM_literal @@ -1804,10 +1766,12 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %type <dyncol_def_list> dyncall_create_list +%type <myvar> select_outvar + %type <NONE> analyze_stmt_command query verb_clause create change select do drop insert replace insert2 - insert_values update delete truncate rename + insert_values update delete truncate rename compound_statement show describe load alter optimize keycache preload flush reset purge begin commit rollback savepoint release slave master_def master_defs master_file_def slave_until_opts @@ -1873,19 +1837,21 @@ END_OF_INPUT %type <NONE> call sp_proc_stmts sp_proc_stmts1 sp_proc_stmt %type <NONE> sp_proc_stmt_statement sp_proc_stmt_return +%type <NONE> sp_proc_stmt_compound_ok %type <NONE> sp_proc_stmt_if -%type <NONE> sp_labeled_control sp_proc_stmt_unlabeled -%type <NONE> sp_labeled_block sp_unlabeled_block +%type <NONE> sp_labeled_control sp_unlabeled_control +%type <NONE> sp_labeled_block sp_unlabeled_block sp_unlabeled_block_not_atomic %type <NONE> sp_proc_stmt_leave %type <NONE> sp_proc_stmt_iterate %type <NONE> sp_proc_stmt_open sp_proc_stmt_fetch sp_proc_stmt_close -%type <NONE> case_stmt_specification simple_case_stmt searched_case_stmt +%type <NONE> case_stmt_specification %type <num> sp_decl_idents sp_opt_inout sp_handler_type sp_hcond_list %type <spcondvalue> sp_cond sp_hcond sqlstate signal_value opt_signal_value %type <spblock> sp_decls sp_decl %type <lex> sp_cursor_stmt %type <spname> sp_name +%type <splabel> sp_block_content %type <index_hint> index_hint_type %type <num> index_hint_clause normal_join inner_join %type <filetype> data_or_xml @@ -1906,10 +1872,7 @@ END_OF_INPUT '-' '+' '*' '/' '%' '(' ')' ',' '!' '{' '}' '&' '|' AND_SYM OR_SYM OR_OR_SYM BETWEEN_SYM CASE_SYM THEN_SYM WHEN_SYM DIV_SYM MOD_SYM OR2_SYM AND_AND_SYM DELETE_SYM - -%type <is_not_empty> opt_union_order_or_limit - -%type <NONE> ROLE_SYM + ROLE_SYM %% @@ -1987,9 +1950,10 @@ opt_end_of_input: verb_clause: statement | begin + | compound_statement ; -/* Verb clauses, except begin */ +/* Verb clauses, except begin and compound_statement */ statement: alter | analyze @@ -2112,7 +2076,7 @@ execute_var_ident: '@' ident_or_text { LEX *lex=Lex; - LEX_STRING *lexstr= (LEX_STRING*)sql_memdup(&$2, sizeof(LEX_STRING)); + LEX_STRING *lexstr= (LEX_STRING*)thd->memdup(&$2, sizeof(LEX_STRING)); if (!lexstr || lex->prepared_stmt_params.push_back(lexstr)) MYSQL_YYABORT; } @@ -2375,8 +2339,7 @@ create: lex->query_tables->open_strategy= TABLE_LIST::OPEN_STUB; lex->create_info.options= ($1 | $2 | $4); lex->create_info.default_table_charset= NULL; - lex->name.str= 0; - lex->name.length= 0; + lex->name= null_lex_str; lex->create_last_non_select_table= lex->last_table(); } create_body @@ -2623,16 +2586,11 @@ opt_ev_on_completion: ; ev_on_completion: - ON COMPLETION_SYM PRESERVE_SYM + ON COMPLETION_SYM opt_not PRESERVE_SYM { - Lex->event_parse_data->on_completion= - Event_parse_data::ON_COMPLETION_PRESERVE; - $$= 1; - } - | ON COMPLETION_SYM NOT_SYM PRESERVE_SYM - { - Lex->event_parse_data->on_completion= - Event_parse_data::ON_COMPLETION_DROP; + Lex->event_parse_data->on_completion= $3 + ? Event_parse_data::ON_COMPLETION_DROP + : Event_parse_data::ON_COMPLETION_PRESERVE; $$= 1; } ; @@ -2674,21 +2632,13 @@ ev_sql_stmt: MYSQL_YYABORT; } - if (!(lex->sphead= new sp_head())) + if (!make_sp_head(thd, lex->event_parse_data->identifier, TYPE_ENUM_PROCEDURE)) MYSQL_YYABORT; - lex->sphead->reset_thd_mem_root(thd); - lex->sphead->init(lex); - lex->sphead->init_sp_name(thd, lex->event_parse_data->identifier); - - lex->sphead->m_type= TYPE_ENUM_PROCEDURE; - - bzero((char *)&lex->sp_chistics, sizeof(st_sp_chistics)); - lex->sphead->m_chistics= &lex->sp_chistics; - + lex->sp_chistics.suid= SP_IS_SUID; //always the definer! lex->sphead->set_body_start(thd, lip->get_cpp_ptr()); } - ev_sql_stmt_inner + sp_proc_stmt { LEX *lex= thd->lex; @@ -2696,28 +2646,10 @@ ev_sql_stmt: lex->sphead->set_stmt_end(thd); lex->sphead->restore_thd_mem_root(thd); - lex->sp_chistics.suid= SP_IS_SUID; //always the definer! - lex->event_parse_data->body_changed= TRUE; } ; -ev_sql_stmt_inner: - sp_proc_stmt_statement - | sp_proc_stmt_return - | sp_proc_stmt_if - | case_stmt_specification - | sp_labeled_block - | sp_unlabeled_block - | sp_labeled_control - | sp_proc_stmt_unlabeled - | sp_proc_stmt_leave - | sp_proc_stmt_iterate - | sp_proc_stmt_open - | sp_proc_stmt_fetch - | sp_proc_stmt_close - ; - clear_privileges: /* Nothing */ { @@ -2798,8 +2730,7 @@ sp_chistic: /* Create characteristics */ sp_c_chistic: sp_chistic { } - | DETERMINISTIC_SYM { Lex->sp_chistics.detistic= TRUE; } - | not DETERMINISTIC_SYM { Lex->sp_chistics.detistic= FALSE; } + | opt_not DETERMINISTIC_SYM { Lex->sp_chistics.detistic= ! $1; } ; sp_suid: @@ -3634,23 +3565,32 @@ sp_opt_default: sp_proc_stmt: sp_proc_stmt_statement | sp_proc_stmt_return - | sp_proc_stmt_if - | case_stmt_specification | sp_labeled_block | sp_unlabeled_block | sp_labeled_control - | sp_proc_stmt_unlabeled | sp_proc_stmt_leave | sp_proc_stmt_iterate | sp_proc_stmt_open | sp_proc_stmt_fetch | sp_proc_stmt_close + | sp_proc_stmt_compound_ok + ; + +sp_proc_stmt_compound_ok: + sp_proc_stmt_if + | case_stmt_specification + | sp_unlabeled_block_not_atomic + | sp_unlabeled_control ; sp_proc_stmt_if: - IF - { Lex->sphead->new_cont_backpatch(NULL); } - sp_if END IF + IF_SYM + { + if (maybe_start_compound_statement(thd)) + MYSQL_YYABORT; + Lex->sphead->new_cont_backpatch(NULL); + } + sp_if END IF_SYM { Lex->sphead->do_cont_backpatch(); } ; @@ -3737,18 +3677,17 @@ sp_proc_stmt_return: } ; -sp_proc_stmt_unlabeled: - { /* Unlabeled controls get a secret label. */ - LEX *lex= Lex; - - lex->spcont->push_label(thd, empty_lex_str, - lex->sphead->instructions()); +sp_unlabeled_control: + { + if (maybe_start_compound_statement(thd)) + MYSQL_YYABORT; + /* Unlabeled controls get an empty label. */ + Lex->spcont->push_label(thd, empty_lex_str, + Lex->sphead->instructions()); } - sp_unlabeled_control + sp_control_content { - LEX *lex= Lex; - - lex->sphead->backpatch(lex->spcont->pop_label()); + Lex->sphead->backpatch(Lex->spcont->pop_label()); } ; @@ -4004,51 +3943,89 @@ sp_elseifs: ; case_stmt_specification: - simple_case_stmt - | searched_case_stmt - ; - -simple_case_stmt: CASE_SYM { - LEX *lex= Lex; - case_stmt_action_case(lex); - lex->sphead->reset_lex(thd); /* For expr $3 */ - } - expr - { - LEX *lex= Lex; - if (case_stmt_action_expr(lex, $3)) - MYSQL_YYABORT; + if (maybe_start_compound_statement(thd)) + MYSQL_YYABORT; + + /** + An example of the CASE statement in use is + <pre> + CREATE PROCEDURE proc_19194_simple(i int) + BEGIN + DECLARE str CHAR(10); + + CASE i + WHEN 1 THEN SET str="1"; + WHEN 2 THEN SET str="2"; + WHEN 3 THEN SET str="3"; + ELSE SET str="unknown"; + END CASE; + + SELECT str; + END + </pre> + The actions are used to generate the following code: + <pre> + SHOW PROCEDURE CODE proc_19194_simple; + Pos Instruction + 0 set str@1 NULL + 1 set_case_expr (12) 0 i@0 + 2 jump_if_not 5(12) (case_expr@0 = 1) + 3 set str@1 _latin1'1' + 4 jump 12 + 5 jump_if_not 8(12) (case_expr@0 = 2) + 6 set str@1 _latin1'2' + 7 jump 12 + 8 jump_if_not 11(12) (case_expr@0 = 3) + 9 set str@1 _latin1'3' + 10 jump 12 + 11 set str@1 _latin1'unknown' + 12 stmt 0 "SELECT str" + </pre> + */ - /* For expr $3 */ - if (lex->sphead->restore_lex(thd)) - MYSQL_YYABORT; + Lex->sphead->new_cont_backpatch(NULL); + + /* + BACKPATCH: Creating target label for the jump to after END CASE + (instruction 12 in the example) + */ + Lex->spcont->push_label(current_thd, empty_lex_str, Lex->sphead->instructions()); } - simple_when_clause_list + case_stmt_body else_clause_opt END CASE_SYM { - LEX *lex= Lex; - case_stmt_action_end_case(lex, true); + /* + BACKPATCH: Resolving forward jump from + "case_stmt_action_then" to after END CASE + (jump from instruction 4 to 12, 7 to 12 ... in the example) + */ + Lex->sphead->backpatch(Lex->spcont->pop_label()); + + if ($3) + Lex->spcont->pop_case_expr_id(); + + Lex->sphead->do_cont_backpatch(); } ; -searched_case_stmt: - CASE_SYM - { - LEX *lex= Lex; - case_stmt_action_case(lex); - } - searched_when_clause_list - else_clause_opt - END - CASE_SYM +case_stmt_body: + { Lex->sphead->reset_lex(thd); /* For expr $2 */ } + expr { - LEX *lex= Lex; - case_stmt_action_end_case(lex, false); + if (case_stmt_action_expr(Lex, $2)) + MYSQL_YYABORT; + + if (Lex->sphead->restore_lex(thd)) + MYSQL_YYABORT; } + simple_when_clause_list + { $$= 1; } + | searched_when_clause_list + { $$= 0; } ; simple_when_clause_list: @@ -4142,7 +4119,7 @@ sp_labeled_control: lab->type= sp_label::ITERATION; } } - sp_unlabeled_control sp_opt_label + sp_control_content sp_opt_label { LEX *lex= Lex; sp_label *lab= lex->spcont->pop_label(); @@ -4165,7 +4142,7 @@ sp_opt_label: ; sp_labeled_block: - label_ident ':' + label_ident ':' BEGIN_SYM { LEX *lex= Lex; sp_pcontext *ctx= lex->spcont; @@ -4176,20 +4153,15 @@ sp_labeled_block: my_error(ER_SP_LABEL_REDEFINE, MYF(0), $1.str); MYSQL_YYABORT; } - - lab= lex->spcont->push_label(thd, $1, lex->sphead->instructions()); - lab->type= sp_label::BEGIN; + lex->name= $1; } sp_block_content sp_opt_label { - LEX *lex= Lex; - sp_label *lab= lex->spcont->pop_label(); - - if ($5.str) + if ($6.str) { - if (my_strcasecmp(system_charset_info, $5.str, lab->name.str) != 0) + if (my_strcasecmp(system_charset_info, $6.str, $5->name.str) != 0) { - my_error(ER_SP_LABEL_MISMATCH, MYF(0), $5.str); + my_error(ER_SP_LABEL_MISMATCH, MYF(0), $6.str); MYSQL_YYABORT; } } @@ -4197,25 +4169,31 @@ sp_labeled_block: ; sp_unlabeled_block: - { /* Unlabeled blocks get a secret label. */ - LEX *lex= Lex; - uint ip= lex->sphead->instructions(); - sp_label *lab= lex->spcont->push_label(thd, empty_lex_str, ip); - lab->type= sp_label::BEGIN; + BEGIN_SYM + { + Lex->name= empty_lex_str; // Unlabeled blocks get an empty label } sp_block_content + { } + ; + +sp_unlabeled_block_not_atomic: + BEGIN_SYM not ATOMIC_SYM /* TODO: BEGIN ATOMIC (not -> opt_not) */ { - LEX *lex= Lex; - lex->spcont->pop_label(); + if (maybe_start_compound_statement(thd)) + MYSQL_YYABORT; + Lex->name= empty_lex_str; // Unlabeled blocks get an empty label } + sp_block_content + { } ; sp_block_content: - BEGIN_SYM - { /* QQ This is just a dummy for grouping declarations and statements - together. No [[NOT] ATOMIC] yet, and we need to figure out how - make it coexist with the existing BEGIN COMMIT/ROLLBACK. */ + { LEX *lex= Lex; + sp_label *lab= lex->spcont->push_label(thd, lex->name, + lex->sphead->instructions()); + lab->type= sp_label::BEGIN; lex->spcont= lex->spcont->push_context(thd, sp_pcontext::REGULAR_SCOPE); } @@ -4229,25 +4207,26 @@ sp_block_content: sp_instr *i; sp->backpatch(ctx->last_label()); /* We always have a label */ - if ($3.hndlrs) + if ($2.hndlrs) { - i= new sp_instr_hpop(sp->instructions(), ctx, $3.hndlrs); + i= new sp_instr_hpop(sp->instructions(), ctx, $2.hndlrs); if (i == NULL || sp->add_instr(i)) MYSQL_YYABORT; } - if ($3.curs) + if ($2.curs) { - i= new sp_instr_cpop(sp->instructions(), ctx, $3.curs); + i= new sp_instr_cpop(sp->instructions(), ctx, $2.curs); if (i == NULL || sp->add_instr(i)) MYSQL_YYABORT; } lex->spcont= ctx->pop_context(); + $$ = lex->spcont->pop_label(); } ; -sp_unlabeled_control: +sp_control_content: LOOP_SYM sp_proc_stmts1 END LOOP_SYM { @@ -5530,7 +5509,7 @@ opt_if_not_exists: Lex->check_exists= FALSE; $$= 0; } - | IF not EXISTS + | IF_SYM not EXISTS { Lex->check_exists= TRUE; $$=HA_LEX_CREATE_IF_NOT_EXISTS; @@ -6114,7 +6093,7 @@ virtual_column_func: MYSQL_YYABORT; } uint expr_len= (uint)($3 - $1) - 1; - Lex->vcol_info->expr_str.str= (char* ) sql_memdup($1 + 1, expr_len); + Lex->vcol_info->expr_str.str= (char* ) thd->memdup($1 + 1, expr_len); Lex->vcol_info->expr_str.length= expr_len; Lex->vcol_info->expr_item= $2; } @@ -7030,8 +7009,7 @@ string_list: alter: ALTER { - Lex->name.str= 0; - Lex->name.length= 0; + Lex->name= null_lex_str; Lex->sql_command= SQLCOM_ALTER_TABLE; Lex->duplicates= DUP_ERROR; Lex->col_list.empty(); @@ -7158,7 +7136,7 @@ alter: } view_tail {} - | ALTER definer_opt EVENT_SYM sp_name + | ALTER definer_opt remember_name EVENT_SYM sp_name { /* It is safe to use Lex->spname because @@ -7170,9 +7148,10 @@ alter: if (!(Lex->event_parse_data= Event_parse_data::new_instance(thd))) MYSQL_YYABORT; - Lex->event_parse_data->identifier= $4; + Lex->event_parse_data->identifier= $5; Lex->sql_command= SQLCOM_ALTER_EVENT; + Lex->stmt_definition_begin= $3; } ev_alter_on_schedule_completion opt_ev_rename_to @@ -7180,7 +7159,7 @@ alter: opt_ev_comment opt_ev_sql_stmt { - if (!($6 || $7 || $8 || $9 || $10)) + if (!($7 || $8 || $9 || $10 || $11)) { my_parse_error(ER(ER_SYNTAX_ERROR)); MYSQL_YYABORT; @@ -7190,6 +7169,7 @@ alter: can overwrite it */ Lex->sql_command= SQLCOM_ALTER_EVENT; + Lex->stmt_definition_end= (char*)YYLIP->get_cpp_ptr(); } | ALTER TABLESPACE alter_tablespace_info { @@ -7246,7 +7226,7 @@ opt_ev_sql_stmt: ; ident_or_empty: - /* empty */ { $$.str= 0; $$.length= 0; } + /* empty */ { $$= null_lex_str; } | ident { $$= $1; } ; @@ -8039,12 +8019,12 @@ table_column_list: | ident { Lex->column_list->push_back((LEX_STRING*) - sql_memdup(&$1, sizeof(LEX_STRING))); + thd->memdup(&$1, sizeof(LEX_STRING))); } | table_column_list ',' ident { Lex->column_list->push_back((LEX_STRING*) - sql_memdup(&$3, sizeof(LEX_STRING))); + thd->memdup(&$3, sizeof(LEX_STRING))); } ; @@ -8059,14 +8039,14 @@ table_index_name: ident { Lex->index_list->push_back( - (LEX_STRING*) sql_memdup(&$1, sizeof(LEX_STRING))); + (LEX_STRING*) thd->memdup(&$1, sizeof(LEX_STRING))); } | PRIMARY_SYM { LEX_STRING str= {(char*) "PRIMARY", 7}; Lex->index_list->push_back( - (LEX_STRING*) sql_memdup(&str, sizeof(LEX_STRING))); + (LEX_STRING*) thd->memdup(&str, sizeof(LEX_STRING))); } ; @@ -8547,6 +8527,12 @@ select_item: } ; +remember_tok_start: + { + $$= (char*) YYLIP->get_tok_start(); + } + ; + remember_name: { $$= (char*) YYLIP->get_cpp_tok_start(); @@ -9138,7 +9124,7 @@ simple_expr: MYSQL_YYABORT; } | literal - | param_marker + | param_marker { $$= $1; } | variable | sum_expr | simple_expr OR_OR_SYM simple_expr @@ -9285,9 +9271,9 @@ simple_expr: } | DEFAULT '(' simple_ident ')' { - if ($3->is_splocal()) + Item_splocal *il= $3->get_item_splocal(); + if (il) { - Item_splocal *il= static_cast<Item_splocal *>($3); my_error(ER_WRONG_COLUMN_NAME, MYF(0), il->my_name()->str); MYSQL_YYABORT; @@ -9753,7 +9739,7 @@ function_call_conflict: MYSQL_YYABORT; Lex->safe_to_cache_query=0; } - | IF '(' expr ',' expr ',' expr ')' + | IF_SYM '(' expr ',' expr ',' expr ')' { $$= new (thd->mem_root) Item_func_if($3,$5,$7); if ($$ == NULL) @@ -11111,7 +11097,7 @@ opt_table_alias: /* empty */ { $$=0; } | table_alias ident { - $$= (LEX_STRING*) sql_memdup(&$2,sizeof(LEX_STRING)); + $$= (LEX_STRING*) thd->memdup(&$2,sizeof(LEX_STRING)); if ($$ == NULL) MYSQL_YYABORT; } @@ -11570,16 +11556,13 @@ select_var_list: | select_var_ident {} ; -select_var_ident: - '@' ident_or_text +select_var_ident: select_outvar { - LEX *lex=Lex; - if (lex->result) + if (Lex->result) { - my_var *var= new my_var($2,0,0,(enum_field_types)0); - if (var == NULL) + if ($1 == NULL) MYSQL_YYABORT; - ((select_dumpvar *)lex->result)->var_list.push_back(var); + ((select_dumpvar *)Lex->result)->var_list.push_back($1); } else { @@ -11587,37 +11570,31 @@ select_var_ident: The parser won't create select_result instance only if it's an EXPLAIN. */ - DBUG_ASSERT(lex->describe); + DBUG_ASSERT(Lex->describe); } } + ; + +select_outvar: + '@' ident_or_text + { + $$ = Lex->result ? new my_var_user($2) : NULL; + } + | param_marker + { + $$ = Lex->result ? new my_var_param($1) : NULL; + } | ident_or_text { - LEX *lex=Lex; sp_variable *t; - if (!lex->spcont || !(t=lex->spcont->find_variable($1, false))) + if (!Lex->spcont || !(t= Lex->spcont->find_variable($1, false))) { my_error(ER_SP_UNDECLARED_VAR, MYF(0), $1.str); MYSQL_YYABORT; } - if (lex->result) - { - my_var *var= new my_var($1,1,t->offset,t->type); - if (var == NULL) - MYSQL_YYABORT; - ((select_dumpvar *)lex->result)->var_list.push_back(var); -#ifndef DBUG_OFF - var->sp= lex->sphead; -#endif - } - else - { - /* - The parser won't create select_result instance only - if it's an EXPLAIN. - */ - DBUG_ASSERT(lex->describe); - } + $$ = Lex->result ? new my_var_sp($1, t->offset, t->type, Lex->sphead) + : NULL; } ; @@ -11874,7 +11851,7 @@ opt_if_exists: Lex->check_exists= FALSE; $$= 0; } - | IF EXISTS + | IF_SYM EXISTS { Lex->check_exists= TRUE; $$= 1; @@ -12505,14 +12482,14 @@ show_param: LEX *lex= Lex; lex->sql_command= SQLCOM_SHOW_PROFILE; if (prepare_schema_table(thd, lex, NULL, SCH_PROFILES) != 0) - YYABORT; + MYSQL_YYABORT; } | opt_var_type STATUS_SYM wild_and_where { LEX *lex= Lex; lex->sql_command= SQLCOM_SHOW_STATUS; lex->option_type= $1; - if (prepare_schema_table(thd, lex, 0, SCH_STATUS)) + if (prepare_schema_table(thd, lex, 0, SCH_SESSION_STATUS)) MYSQL_YYABORT; } | opt_full PROCESSLIST_SYM @@ -12522,7 +12499,7 @@ show_param: LEX *lex= Lex; lex->sql_command= SQLCOM_SHOW_VARIABLES; lex->option_type= $1; - if (prepare_schema_table(thd, lex, 0, SCH_VARIABLES)) + if (prepare_schema_table(thd, lex, 0, SCH_SESSION_VARIABLES)) MYSQL_YYABORT; } | charset wild_and_where @@ -12596,34 +12573,6 @@ show_param: Lex->sql_command = SQLCOM_SHOW_SLAVE_STAT; Lex->verbose= 0; } - | CLIENT_STATS_SYM - { - LEX *lex= Lex; - lex->sql_command= SQLCOM_SHOW_CLIENT_STATS; - if (prepare_schema_table(thd, lex, 0, SCH_CLIENT_STATS)) - MYSQL_YYABORT; - } - | USER_STATS_SYM - { - LEX *lex= Lex; - lex->sql_command= SQLCOM_SHOW_USER_STATS; - if (prepare_schema_table(thd, lex, 0, SCH_USER_STATS)) - MYSQL_YYABORT; - } - | TABLE_STATS_SYM - { - LEX *lex= Lex; - lex->sql_command= SQLCOM_SHOW_TABLE_STATS; - if (prepare_schema_table(thd, lex, 0, SCH_TABLE_STATS)) - MYSQL_YYABORT; - } - | INDEX_STATS_SYM - { - LEX *lex= Lex; - lex->sql_command= SQLCOM_SHOW_INDEX_STATS; - if (prepare_schema_table(thd, lex, 0, SCH_INDEX_STATS)) - MYSQL_YYABORT; - } | CREATE PROCEDURE_SYM sp_name { LEX *lex= Lex; @@ -12680,6 +12629,24 @@ show_param: MYSQL_YYABORT; add_value_to_list(thd, $3); } + | IDENT_sys remember_tok_start wild_and_where + { + LEX *lex= Lex; + lex->sql_command= SQLCOM_SHOW_GENERIC; + ST_SCHEMA_TABLE *table= find_schema_table(thd, $1.str); + if (!table || !table->old_format) + { + my_parse_error(ER(ER_SYNTAX_ERROR), $2); + MYSQL_YYABORT; + } + if (lex->wild && table->idx_field1 < 0) + { + my_parse_error(ER(ER_SYNTAX_ERROR), $3); + MYSQL_YYABORT; + } + if (make_schema_select(thd, Lex->current_select, table)) + MYSQL_YYABORT; + } ; show_engine_param: @@ -12727,19 +12694,21 @@ binlog_from: ; wild_and_where: - /* empty */ - | LIKE TEXT_STRING_sys + /* empty */ { $$= 0; } + | LIKE remember_tok_start TEXT_STRING_sys { - Lex->wild= new (thd->mem_root) String($2.str, $2.length, + Lex->wild= new (thd->mem_root) String($3.str, $3.length, system_charset_info); if (Lex->wild == NULL) MYSQL_YYABORT; + $$= $2; } - | WHERE expr + | WHERE remember_tok_start expr { - Select->where= normalize_cond($2); - if ($2) - $2->top_level_item(); + Select->where= normalize_cond($3); + if ($3) + $3->top_level_item(); + $$= $2; } ; @@ -12921,8 +12890,7 @@ flush_option: | LOGS_SYM { Lex->type|= REFRESH_LOG; - Lex->relay_log_connection_name.str= (char*) ""; - Lex->relay_log_connection_name.length= 0; + Lex->relay_log_connection_name= empty_lex_str; } | STATUS_SYM { Lex->type|= REFRESH_STATUS; } @@ -12937,20 +12905,24 @@ flush_option: lex->type|= REFRESH_SLAVE; lex->reset_slave_info.all= false; } - | CLIENT_STATS_SYM - { Lex->type|= REFRESH_CLIENT_STATS; } - | USER_STATS_SYM - { Lex->type|= REFRESH_USER_STATS; } - | TABLE_STATS_SYM - { Lex->type|= REFRESH_TABLE_STATS; } - | INDEX_STATS_SYM - { Lex->type|= REFRESH_INDEX_STATS; } | MASTER_SYM { Lex->type|= REFRESH_MASTER; } | DES_KEY_FILE { Lex->type|= REFRESH_DES_KEY_FILE; } | RESOURCES { Lex->type|= REFRESH_USER_RESOURCES; } + | IDENT_sys remember_tok_start + { + Lex->type|= REFRESH_GENERIC; + ST_SCHEMA_TABLE *table= find_schema_table(thd, $1.str); + if (!table || !table->reset_table) + { + my_parse_error(ER(ER_SYNTAX_ERROR), $2); + MYSQL_YYABORT; + } + Lex->view_list.push_back( + (LEX_STRING*)thd->memdup(&$1, sizeof(LEX_STRING))); + } ; opt_table_list: @@ -13394,7 +13366,9 @@ param_marker: my_error(ER_VIEW_SELECT_VARIABLE, MYF(0)); MYSQL_YYABORT; } - item= new (thd->mem_root) Item_param((uint) (lip->get_tok_start() - thd->query())); + const char *query_start= lex->sphead ? lex->sphead->m_tmp_query + : thd->query(); + item= new (thd->mem_root) Item_param(lip->get_tok_start() - query_start); if (!($$= item) || lex->param_list.push_back(item)) { my_message(ER_OUT_OF_RESOURCES, ER(ER_OUT_OF_RESOURCES), MYF(0)); @@ -14152,6 +14126,7 @@ keyword_sp: | ALWAYS_SYM {} | ANY_SYM {} | AT_SYM {} + | ATOMIC_SYM {} | AUTHORS_SYM {} | AUTO_INC {} | AUTOEXTEND_SIZE_SYM {} @@ -14169,7 +14144,6 @@ keyword_sp: | CHAIN_SYM {} | CHANGED {} | CIPHER_SYM {} - | CLIENT_STATS_SYM {} | CLIENT_SYM {} | CLASS_ORIGIN_SYM {} | COALESCE {} @@ -14251,7 +14225,6 @@ keyword_sp: | ID_SYM {} | IDENTIFIED_SYM {} | IGNORE_SERVER_IDS_SYM {} - | INDEX_STATS_SYM {} | INVOKER_SYM {} | IMPORT {} | INDEXES {} @@ -14297,6 +14270,7 @@ keyword_sp: | MAX_CONNECTIONS_PER_HOUR {} | MAX_QUERIES_PER_HOUR {} | MAX_SIZE_SYM {} + | MAX_STATEMENT_TIME_SYM {} | MAX_UPDATES_PER_HOUR {} | MAX_USER_CONNECTIONS_SYM {} | MEDIUM_SYM {} @@ -14319,7 +14293,6 @@ keyword_sp: | NAMES_SYM {} | NATIONAL_SYM {} | NCHAR_SYM {} - | NDBCLUSTER_SYM {} | NEXT_SYM {} | NEW_SYM {} | NO_WAIT_SYM {} @@ -14415,7 +14388,6 @@ keyword_sp: | SWAPS_SYM {} | SWITCHES_SYM {} | TABLE_NAME_SYM {} - | TABLE_STATS_SYM {} | TABLES {} | TABLE_CHECKSUM_SYM {} | TABLESPACE {} @@ -14441,7 +14413,6 @@ keyword_sp: | UNKNOWN_SYM {} | UNTIL_SYM {} | USER {} - | USER_STATS_SYM {} | USE_FRM {} | VARIABLES {} | VIEW_SYM {} @@ -15651,6 +15622,12 @@ grant_option: lex->mqh.user_conn= $2; lex->mqh.specified_limits|= USER_RESOURCES::USER_CONNECTIONS; } + | MAX_STATEMENT_TIME_SYM NUM_literal + { + LEX *lex=Lex; + lex->mqh.max_statement_time= $2->val_real(); + lex->mqh.specified_limits|= USER_RESOURCES::MAX_STATEMENT_TIME; + } ; begin: @@ -15661,6 +15638,20 @@ begin: lex->start_transaction_opt= 0; } opt_work {} + ; + +compound_statement: + sp_proc_stmt_compound_ok + { + Lex->sql_command= SQLCOM_COMPOUND; + Lex->sphead->set_stmt_end(thd); + Lex->sphead->restore_thd_mem_root(thd); + } + ; + +opt_not: + /* nothing */ { $$= 0; } + | not { $$= 1; } ; opt_work: @@ -15763,14 +15754,13 @@ union_list: ; union_opt: - /* Empty */ { $$= 0; } + opt_union_order_or_limit | union_list { $$= 1; } - | union_order_or_limit { $$= 1; } ; opt_union_order_or_limit: - /* Empty */{ $$= false; } - | union_order_or_limit { $$= true; } + /* Empty */ { $$= 0; } + | union_order_or_limit { $$= 1; } ; union_order_or_limit: @@ -16032,12 +16022,12 @@ view_list: ident { Lex->view_list.push_back((LEX_STRING*) - sql_memdup(&$1, sizeof(LEX_STRING))); + thd->memdup(&$1, sizeof(LEX_STRING))); } | view_list ',' ident { Lex->view_list.push_back((LEX_STRING*) - sql_memdup(&$3, sizeof(LEX_STRING))); + thd->memdup(&$3, sizeof(LEX_STRING))); } ; @@ -16109,7 +16099,6 @@ trigger_tail: { /* $15 */ LEX *lex= thd->lex; Lex_input_stream *lip= YYLIP; - sp_head *sp; if (lex->sphead) { @@ -16117,21 +16106,14 @@ trigger_tail: MYSQL_YYABORT; } - if (!(sp= new sp_head())) - MYSQL_YYABORT; - sp->reset_thd_mem_root(thd); - sp->init(lex); - sp->m_type= TYPE_ENUM_TRIGGER; - sp->init_sp_name(thd, $3); lex->stmt_definition_begin= $2; lex->ident.str= $7; lex->ident.length= $11 - $7; - - lex->sphead= sp; lex->spname= $3; - bzero((char *)&lex->sp_chistics, sizeof(st_sp_chistics)); - lex->sphead->m_chistics= &lex->sp_chistics; + if (!make_sp_head(thd, $3, TYPE_ENUM_TRIGGER)) + MYSQL_YYABORT; + lex->sphead->set_body_start(thd, lip->get_cpp_ptr()); } sp_proc_stmt /* $16 */ @@ -16209,9 +16191,8 @@ sf_tail: sp_name /* $3 */ '(' /* $4 */ { /* $5 */ - LEX *lex= thd->lex; + LEX *lex= Lex; Lex_input_stream *lip= YYLIP; - sp_head *sp; const char* tmp_param_begin; lex->stmt_definition_begin= $1; @@ -16222,16 +16203,9 @@ sf_tail: my_error(ER_SP_NO_RECURSIVE_CREATE, MYF(0), "FUNCTION"); MYSQL_YYABORT; } - /* Order is important here: new - reset - init */ - sp= new sp_head(); - if (sp == NULL) - MYSQL_YYABORT; - sp->reset_thd_mem_root(thd); - sp->init(lex); - sp->init_sp_name(thd, lex->spname); - sp->m_type= TYPE_ENUM_FUNCTION; - lex->sphead= sp; + if (!make_sp_head(thd, $3, TYPE_ENUM_FUNCTION)) + MYSQL_YYABORT; tmp_param_begin= lip->get_cpp_tok_start(); tmp_param_begin++; @@ -16271,15 +16245,12 @@ sf_tail: (enum enum_field_types) $11, &sp->m_return_field_def)) MYSQL_YYABORT; - - bzero((char *)&lex->sp_chistics, sizeof(st_sp_chistics)); } sp_c_chistics /* $13 */ { /* $14 */ LEX *lex= thd->lex; Lex_input_stream *lip= YYLIP; - lex->sphead->m_chistics= &lex->sp_chistics; lex->sphead->set_body_start(thd, lip->get_cpp_tok_start()); } sp_proc_stmt /* $15 */ @@ -16339,27 +16310,16 @@ sf_tail: sp_tail: PROCEDURE_SYM remember_name sp_name { - LEX *lex= Lex; - sp_head *sp; - - if (lex->sphead) + if (Lex->sphead) { my_error(ER_SP_NO_RECURSIVE_CREATE, MYF(0), "PROCEDURE"); MYSQL_YYABORT; } - lex->stmt_definition_begin= $2; + Lex->stmt_definition_begin= $2; - /* Order is important here: new - reset - init */ - sp= new sp_head(); - if (sp == NULL) + if (!make_sp_head(thd, $3, TYPE_ENUM_PROCEDURE)) MYSQL_YYABORT; - sp->reset_thd_mem_root(thd); - sp->init(lex); - sp->m_type= TYPE_ENUM_PROCEDURE; - sp->init_sp_name(thd, $3); - - lex->sphead= sp; } '(' { @@ -16372,17 +16332,11 @@ sp_tail: sp_pdparam_list ')' { - LEX *lex= thd->lex; - - lex->sphead->m_param_end= YYLIP->get_cpp_tok_start(); - bzero((char *)&lex->sp_chistics, sizeof(st_sp_chistics)); + Lex->sphead->m_param_end= YYLIP->get_cpp_tok_start(); } sp_c_chistics { - LEX *lex= thd->lex; - - lex->sphead->m_chistics= &lex->sp_chistics; - lex->sphead->set_body_start(thd, YYLIP->get_cpp_tok_start()); + Lex->sphead->set_body_start(thd, YYLIP->get_cpp_tok_start()); } sp_proc_stmt { diff --git a/sql/structs.h b/sql/structs.h index 2de7abb666d..99561c5c730 100644 --- a/sql/structs.h +++ b/sql/structs.h @@ -93,7 +93,19 @@ typedef struct st_key { uint usable_key_parts; /* Should normally be = user_defined_key_parts */ uint ext_key_parts; /* Number of key parts in extended key */ ulong ext_key_flags; /* Flags for extended key */ - key_part_map ext_key_part_map; /* Bitmap of pk key parts in extension */ + /* + Parts of primary key that are in the extension of this index. + + Example: if this structure describes idx1, which is defined as + INDEX idx1 (pk2, col2) + and pk is defined as: + PRIMARY KEY (pk1, pk2) + then + pk1 is in the extension idx1, ext_key_part_map.is_set(0) == true + pk2 is explicitly present in idx1, it is not in the extension, so + ext_key_part_map.is_set(1) == false + */ + key_part_map ext_key_part_map; uint block_size; uint name_length; enum ha_key_alg algorithm; @@ -220,12 +232,15 @@ typedef struct user_resources { connections allowed */ int user_conn; + /* Max query timeout */ + double max_statement_time; + /* Values of this enum and specified_limits member are used by the parser to store which user limits were specified in GRANT statement. */ enum {QUERIES_PER_HOUR= 1, UPDATES_PER_HOUR= 2, CONNECTIONS_PER_HOUR= 4, - USER_CONNECTIONS= 8}; + USER_CONNECTIONS= 8, MAX_STATEMENT_TIME= 16}; uint specified_limits; } USER_RESOURCES; @@ -270,84 +285,23 @@ typedef struct st_user_stats char priv_user[MY_MAX(USERNAME_LENGTH, LIST_PROCESS_HOST_LEN) + 1]; uint user_name_length; uint total_connections; + uint total_ssl_connections; uint concurrent_connections; time_t connected_time; // in seconds - double busy_time; // in seconds - double cpu_time; // in seconds + ha_rows rows_read, rows_sent; + ha_rows rows_updated, rows_deleted, rows_inserted; ulonglong bytes_received; ulonglong bytes_sent; ulonglong binlog_bytes_written; - ha_rows rows_read, rows_sent; - ha_rows rows_updated, rows_deleted, rows_inserted; ulonglong select_commands, update_commands, other_commands; ulonglong commit_trans, rollback_trans; - ulonglong denied_connections, lost_connections; + ulonglong denied_connections, lost_connections, max_statement_time_exceeded; ulonglong access_denied_errors; ulonglong empty_queries; + double busy_time; // in seconds + double cpu_time; // in seconds } USER_STATS; -/* Lookup function for hash tables with USER_STATS entries */ -extern "C" uchar *get_key_user_stats(USER_STATS *user_stats, size_t *length, - my_bool not_used __attribute__((unused))); - -/* Free all memory for a hash table with USER_STATS entries */ -extern void free_user_stats(USER_STATS* user_stats); - -/* Intialize an instance of USER_STATS */ -extern void -init_user_stats(USER_STATS *user_stats, - const char *user, - size_t user_length, - const char *priv_user, - uint total_connections, - uint concurrent_connections, - time_t connected_time, - double busy_time, - double cpu_time, - ulonglong bytes_received, - ulonglong bytes_sent, - ulonglong binlog_bytes_written, - ha_rows rows_sent, - ha_rows rows_read, - ha_rows rows_inserted, - ha_rows rows_deleted, - ha_rows rows_updated, - ulonglong select_commands, - ulonglong update_commands, - ulonglong other_commands, - ulonglong commit_trans, - ulonglong rollback_trans, - ulonglong denied_connections, - ulonglong lost_connections, - ulonglong access_denied_errors, - ulonglong empty_queries); - -/* Increment values of an instance of USER_STATS */ -extern void -add_user_stats(USER_STATS *user_stats, - uint total_connections, - uint concurrent_connections, - time_t connected_time, - double busy_time, - double cpu_time, - ulonglong bytes_received, - ulonglong bytes_sent, - ulonglong binlog_bytes_written, - ha_rows rows_sent, - ha_rows rows_read, - ha_rows rows_inserted, - ha_rows rows_deleted, - ha_rows rows_updated, - ulonglong select_commands, - ulonglong update_commands, - ulonglong other_commands, - ulonglong commit_trans, - ulonglong rollback_trans, - ulonglong denied_connections, - ulonglong lost_connections, - ulonglong access_denied_errors, - ulonglong empty_queries); - typedef struct st_table_stats { char table[NAME_LEN * 2 + 2]; // [db] + '\0' + [table] + '\0' diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc index 9495d16247d..c8b589e0fd6 100644 --- a/sql/sys_vars.cc +++ b/sql/sys_vars.cc @@ -465,6 +465,26 @@ static bool binlog_format_check(sys_var *self, THD *thd, set_var *var) ER_INSIDE_TRANSACTION_PREVENTS_SWITCH_BINLOG_FORMAT)) return true; +#ifdef WITH_WSREP + /* MariaDB Galera does not support STATEMENT or MIXED binlog + format currently */ + if (WSREP(thd) && + (var->save_result.ulonglong_value == BINLOG_FORMAT_STMT || + var->save_result.ulonglong_value == BINLOG_FORMAT_MIXED)) + { + WSREP_DEBUG("MariaDB Galera does not support binlog format : %s", + var->save_result.ulonglong_value == BINLOG_FORMAT_STMT ? + "STATEMENT" : "MIXED"); + /* Push also warning, because error message is general */ + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_UNKNOWN_ERROR, + "MariaDB Galera does not support binlog format: %s", + var->save_result.ulonglong_value == BINLOG_FORMAT_STMT ? + "STATEMENT" : "MIXED"); + return true; + } +#endif + return false; } @@ -602,8 +622,7 @@ static bool check_cs_client(sys_var *self, THD *thd, set_var *var) if (check_charset_not_null(self, thd, var)) return true; - // Currently, UCS-2 cannot be used as a client character set - if (((CHARSET_INFO *)(var->save_result.ptr))->mbminlen > 1) + if (!is_supported_parser_charset((CHARSET_INFO *)(var->save_result.ptr))) return true; return false; @@ -980,8 +999,7 @@ static Sys_var_session_lexstring Sys_default_master_connection( "Master connection to use for all slave variables and slave commands", SESSION_ONLY(default_master_connection), NO_CMD_LINE, IN_SYSTEM_CHARSET, - DEFAULT(""), MAX_CONNECTION_NAME, ON_CHECK(check_master_connection), - ON_UPDATE(0)); + DEFAULT(""), MAX_CONNECTION_NAME, ON_CHECK(check_master_connection)); #endif static Sys_var_charptr Sys_init_file( @@ -1059,8 +1077,8 @@ static Sys_var_keycache Sys_key_cache_age_threshold( static Sys_var_mybool Sys_large_files_support( "large_files_support", "Whether mysqld was compiled with options for large file support", - READ_ONLY SHOW_VALUE_IN_HELP GLOBAL_VAR(opt_large_files), - NO_CMD_LINE, DEFAULT(sizeof(my_off_t) > 4)); + READ_ONLY GLOBAL_VAR(opt_large_files), + CMD_LINE_HELP_ONLY, DEFAULT(sizeof(my_off_t) > 4)); static Sys_var_uint Sys_large_page_size( "large_page_size", @@ -1158,6 +1176,29 @@ static Sys_var_double Sys_long_query_time( NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0), ON_UPDATE(update_cached_long_query_time)); + +static bool update_cached_max_statement_time(sys_var *self, THD *thd, + enum_var_type type) +{ + if (type == OPT_SESSION) + thd->variables.max_statement_time= + double2ulonglong(thd->variables.max_statement_time_double * 1e6); + else + global_system_variables.max_statement_time= + double2ulonglong(global_system_variables.max_statement_time_double * 1e6); + return false; +} + +static Sys_var_double Sys_max_statement_time( + "max_statement_time", + "A SELECT query that have taken more than max_statement_time seconds " + "will be aborted. The argument will be treated as a decimal value " + "with microsecond precision. A value of 0 (default) means no timeout", + SESSION_VAR(max_statement_time_double), + CMD_LINE(REQUIRED_ARG), VALID_RANGE(0, LONG_TIMEOUT), DEFAULT(0), + NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0), + ON_UPDATE(update_cached_max_statement_time)); + static bool fix_low_prio_updates(sys_var *self, THD *thd, enum_var_type type) { if (type == OPT_SESSION) @@ -1181,8 +1222,8 @@ static Sys_var_mybool Sys_lower_case_file_system( "lower_case_file_system", "Case sensitivity of file names on the file system where the " "data directory is located", - READ_ONLY SHOW_VALUE_IN_HELP GLOBAL_VAR(lower_case_file_system), - NO_CMD_LINE, + READ_ONLY GLOBAL_VAR(lower_case_file_system), + CMD_LINE_HELP_ONLY, DEFAULT(FALSE)); static Sys_var_uint Sys_lower_case_table_names( @@ -1436,7 +1477,7 @@ static Sys_var_gtid_binlog_pos Sys_gtid_binlog_pos( uchar * -Sys_var_gtid_binlog_pos::global_value_ptr(THD *thd, LEX_STRING *base) +Sys_var_gtid_binlog_pos::global_value_ptr(THD *thd, const LEX_STRING *base) { char buf[128]; String str(buf, sizeof(buf), system_charset_info); @@ -1464,7 +1505,7 @@ static Sys_var_gtid_current_pos Sys_gtid_current_pos( uchar * -Sys_var_gtid_current_pos::global_value_ptr(THD *thd, LEX_STRING *base) +Sys_var_gtid_current_pos::global_value_ptr(THD *thd, const LEX_STRING *base) { String str; char *p; @@ -1549,7 +1590,7 @@ Sys_var_gtid_slave_pos::global_update(THD *thd, set_var *var) uchar * -Sys_var_gtid_slave_pos::global_value_ptr(THD *thd, LEX_STRING *base) +Sys_var_gtid_slave_pos::global_value_ptr(THD *thd, const LEX_STRING *base) { String str; char *p; @@ -1667,7 +1708,7 @@ Sys_var_gtid_binlog_state::global_update(THD *thd, set_var *var) uchar * -Sys_var_gtid_binlog_state::global_value_ptr(THD *thd, LEX_STRING *base) +Sys_var_gtid_binlog_state::global_value_ptr(THD *thd, const LEX_STRING *base) { char buf[512]; String str(buf, sizeof(buf), system_charset_info); @@ -1700,7 +1741,7 @@ static Sys_var_last_gtid Sys_last_gtid( uchar * -Sys_var_last_gtid::session_value_ptr(THD *thd, LEX_STRING *base) +Sys_var_last_gtid::session_value_ptr(THD *thd, const LEX_STRING *base) { char buf[10+1+10+1+20+1]; String str(buf, sizeof(buf), system_charset_info); @@ -2080,6 +2121,7 @@ static Sys_var_mybool Sys_old_passwords( "Use old password encryption method (needed for 4.0 and older clients)", SESSION_VAR(old_passwords), CMD_LINE(OPT_ARG), DEFAULT(FALSE), NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(check_old_passwords)); +export sys_var *Sys_old_passwords_ptr= &Sys_old_passwords; // for sql_acl.cc static Sys_var_ulong Sys_open_files_limit( "open_files_limit", @@ -2181,13 +2223,15 @@ export const char *optimizer_switch_names[]= "exists_to_in", "default", NullS }; -/** propagates changes to @@engine_condition_pushdown */ static bool fix_optimizer_switch(sys_var *self, THD *thd, enum_var_type type) { SV *sv= (type == OPT_GLOBAL) ? &global_system_variables : &thd->variables; - sv->engine_condition_pushdown= - MY_TEST(sv->optimizer_switch & OPTIMIZER_SWITCH_ENGINE_CONDITION_PUSHDOWN); + if (sv->optimizer_switch & deprecated_ENGINE_CONDITION_PUSHDOWN) + push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN, + ER_WARN_DEPRECATED_SYNTAX_NO_REPLACEMENT, + ER(ER_WARN_DEPRECATED_SYNTAX_NO_REPLACEMENT), + "engine_condition_pushdown=on"); return false; } static Sys_var_flagset Sys_optimizer_switch( @@ -2228,7 +2272,7 @@ static Sys_var_ulong Sys_preload_buff_size( static Sys_var_uint Sys_protocol_version( "protocol_version", "The version of the client/server protocol used by the MySQL server", - READ_ONLY SHOW_VALUE_IN_HELP GLOBAL_VAR(protocol_version), NO_CMD_LINE, + READ_ONLY GLOBAL_VAR(protocol_version), CMD_LINE_HELP_ONLY, VALID_RANGE(0, ~0), DEFAULT(PROTOCOL_VERSION), BLOCK_SIZE(1)); static Sys_var_proxy_user Sys_proxy_user( @@ -2770,8 +2814,7 @@ static Sys_var_replicate_events_marked_for_skip Replicate_events_marked_for_skip "@@skip_replication=1 will be filtered on the master and never be sent to " "the slave).", GLOBAL_VAR(opt_replicate_events_marked_for_skip), CMD_LINE(REQUIRED_ARG), - replicate_events_marked_for_skip_names, DEFAULT(RPL_SKIP_REPLICATE), - NO_MUTEX_GUARD, NOT_IN_BINLOG); + replicate_events_marked_for_skip_names, DEFAULT(RPL_SKIP_REPLICATE)); #endif @@ -2916,7 +2959,7 @@ static Sys_var_set Sys_old_behavior( "old_mode", "Used to emulate old behavior from earlier MariaDB or MySQL versions", SESSION_VAR(old_behavior), CMD_LINE(REQUIRED_ARG), - old_mode_names, DEFAULT(0), NO_MUTEX_GUARD, NOT_IN_BINLOG); + old_mode_names, DEFAULT(0)); #if defined(HAVE_OPENSSL) && !defined(EMBEDDED_LIBRARY) #define SSL_OPT(X) CMD_LINE(REQUIRED_ARG,X) @@ -2984,8 +3027,8 @@ static Sys_var_mybool Sys_sync_frm( static char *system_time_zone_ptr; static Sys_var_charptr Sys_system_time_zone( "system_time_zone", "The server system time zone", - READ_ONLY SHOW_VALUE_IN_HELP GLOBAL_VAR(system_time_zone_ptr), - NO_CMD_LINE, + READ_ONLY GLOBAL_VAR(system_time_zone_ptr), + CMD_LINE_HELP_ONLY, IN_SYSTEM_CHARSET, DEFAULT(system_time_zone)); static Sys_var_ulong Sys_table_def_size( @@ -3093,7 +3136,7 @@ static Sys_var_uint Sys_threadpool_size( "This parameter is roughly equivalent to maximum number of concurrently " "executing threads (threads in a waiting state do not count as executing).", GLOBAL_VAR(threadpool_size), CMD_LINE(REQUIRED_ARG), - VALID_RANGE(1, MAX_THREAD_GROUPS), DEFAULT(my_getncpus()), BLOCK_SIZE(1), + VALID_RANGE(1, MAX_THREAD_GROUPS), DEFAULT(8), BLOCK_SIZE(1), NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(check_threadpool_size), ON_UPDATE(fix_threadpool_size) ); @@ -3195,35 +3238,34 @@ static Sys_var_mybool Sys_timed_mutexes( static char *server_version_ptr; static Sys_var_charptr Sys_version( "version", "Server version", - READ_ONLY SHOW_VALUE_IN_HELP GLOBAL_VAR(server_version_ptr), - NO_CMD_LINE, + READ_ONLY GLOBAL_VAR(server_version_ptr), + CMD_LINE_HELP_ONLY, IN_SYSTEM_CHARSET, DEFAULT(server_version)); static char *server_version_comment_ptr; static Sys_var_charptr Sys_version_comment( "version_comment", "version_comment", - READ_ONLY SHOW_VALUE_IN_HELP GLOBAL_VAR(server_version_comment_ptr), - NO_CMD_LINE, + READ_ONLY GLOBAL_VAR(server_version_comment_ptr), + CMD_LINE_HELP_ONLY, IN_SYSTEM_CHARSET, DEFAULT(MYSQL_COMPILATION_COMMENT)); static char *server_version_compile_machine_ptr; static Sys_var_charptr Sys_version_compile_machine( "version_compile_machine", "version_compile_machine", - READ_ONLY SHOW_VALUE_IN_HELP - GLOBAL_VAR(server_version_compile_machine_ptr), NO_CMD_LINE, - IN_SYSTEM_CHARSET, DEFAULT(MACHINE_TYPE)); + READ_ONLY GLOBAL_VAR(server_version_compile_machine_ptr), + CMD_LINE_HELP_ONLY, IN_SYSTEM_CHARSET, DEFAULT(MACHINE_TYPE)); static char *server_version_compile_os_ptr; static Sys_var_charptr Sys_version_compile_os( "version_compile_os", "version_compile_os", - READ_ONLY SHOW_VALUE_IN_HELP GLOBAL_VAR(server_version_compile_os_ptr), - NO_CMD_LINE, + READ_ONLY GLOBAL_VAR(server_version_compile_os_ptr), + CMD_LINE_HELP_ONLY, IN_SYSTEM_CHARSET, DEFAULT(SYSTEM_TYPE)); static char *malloc_library; static Sys_var_charptr Sys_malloc_library( "version_malloc_library", "Version of the used malloc library", - READ_ONLY SHOW_VALUE_IN_HELP GLOBAL_VAR(malloc_library), NO_CMD_LINE, + READ_ONLY GLOBAL_VAR(malloc_library), CMD_LINE_HELP_ONLY, IN_SYSTEM_CHARSET, DEFAULT(MALLOC_LIBRARY)); static Sys_var_ulong Sys_net_wait_timeout( @@ -3250,8 +3292,7 @@ static Sys_var_plugin Sys_storage_engine( static Sys_var_plugin Sys_default_tmp_storage_engine( "default_tmp_storage_engine", "The default storage engine for user-created temporary tables", SESSION_VAR(tmp_table_plugin), NO_CMD_LINE, - MYSQL_STORAGE_ENGINE_PLUGIN, DEFAULT(&default_tmp_storage_engine), - NO_MUTEX_GUARD, NOT_IN_BINLOG); + MYSQL_STORAGE_ENGINE_PLUGIN, DEFAULT(&default_tmp_storage_engine)); #if defined(ENABLED_DEBUG_SYNC) /* @@ -3317,6 +3358,8 @@ static bool fix_autocommit(sys_var *self, THD *thd, enum_var_type type) if (trans_commit_stmt(thd) || trans_commit(thd)) { thd->variables.option_bits&= ~OPTION_AUTOCOMMIT; + thd->mdl_context.release_transactional_locks(); + WSREP_DEBUG("autocommit, MDL TRX lock released: %lu", thd->thread_id); return true; } /* @@ -3696,7 +3739,7 @@ static char *glob_hostname_ptr; static Sys_var_charptr Sys_hostname( "hostname", "Server host name", READ_ONLY GLOBAL_VAR(glob_hostname_ptr), NO_CMD_LINE, - IN_FS_CHARSET, DEFAULT(glob_hostname)); + IN_SYSTEM_CHARSET, DEFAULT(glob_hostname)); #ifndef EMBEDDED_LIBRARY static Sys_var_charptr Sys_repl_report_host( @@ -3709,21 +3752,21 @@ static Sys_var_charptr Sys_repl_report_host( "NAT and other routing issues, that IP may not be valid for connecting " "to the slave from the master or other hosts", READ_ONLY GLOBAL_VAR(report_host), CMD_LINE(REQUIRED_ARG), - IN_FS_CHARSET, DEFAULT(0)); + IN_SYSTEM_CHARSET, DEFAULT(0)); static Sys_var_charptr Sys_repl_report_user( "report_user", "The account user name of the slave to be reported to the master " "during slave registration", READ_ONLY GLOBAL_VAR(report_user), CMD_LINE(REQUIRED_ARG), - IN_FS_CHARSET, DEFAULT(0)); + IN_SYSTEM_CHARSET, DEFAULT(0)); static Sys_var_charptr Sys_repl_report_password( "report_password", "The account password of the slave to be reported to the master " "during slave registration", READ_ONLY GLOBAL_VAR(report_password), CMD_LINE(REQUIRED_ARG), - IN_FS_CHARSET, DEFAULT(0)); + IN_SYSTEM_CHARSET, DEFAULT(0)); static Sys_var_uint Sys_repl_report_port( "report_port", @@ -3841,7 +3884,7 @@ static void reopen_slow_log(char* name) static bool fix_slow_log_file(sys_var *self, THD *thd, enum_var_type type) { return fix_log(&opt_slow_logname, opt_log_basename, "-slow.log", - opt_slow_log, reopen_slow_log); + global_system_variables.sql_log_slow, reopen_slow_log); } static Sys_var_charptr Sys_slow_log_path( "slow_query_log_file", "Log slow queries to given log file. " @@ -3892,6 +3935,7 @@ static Sys_var_have Sys_have_symlink( READ_ONLY GLOBAL_VAR(have_symlink), NO_CMD_LINE); static bool fix_log_state(sys_var *self, THD *thd, enum_var_type type); + static Sys_var_mybool Sys_general_log( "general_log", "Log connections and queries to a table or log file. " "Defaults logging to a file 'hostname'.log or a table mysql.general_log" @@ -3905,9 +3949,9 @@ static Sys_var_mybool Sys_slow_query_log( "Log slow queries to a table or log file. Defaults logging to a file " "'hostname'-slow.log or a table mysql.slow_log if --log-output=TABLE is " "used. Must be enabled to activate other slow log options", - GLOBAL_VAR(opt_slow_log), CMD_LINE(OPT_ARG), - DEFAULT(FALSE), NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0), - ON_UPDATE(fix_log_state)); + SESSION_VAR(sql_log_slow), CMD_LINE(OPT_ARG), + DEFAULT(FALSE), NO_MUTEX_GUARD, NOT_IN_BINLOG, + ON_CHECK(0), ON_UPDATE(fix_log_state)); static bool fix_log_state(sys_var *self, THD *thd, enum_var_type type) { @@ -3915,6 +3959,9 @@ static bool fix_log_state(sys_var *self, THD *thd, enum_var_type type) my_bool *UNINIT_VAR(newvalptr), newval, UNINIT_VAR(oldval); uint UNINIT_VAR(log_type); + if (type != OPT_GLOBAL) + return 0; + if (self == &Sys_general_log) { newvalptr= &opt_log; @@ -3923,7 +3970,7 @@ static bool fix_log_state(sys_var *self, THD *thd, enum_var_type type) } else if (self == &Sys_slow_query_log) { - newvalptr= &opt_slow_log; + newvalptr= &global_system_variables.sql_log_slow; oldval= logger.get_slow_log_file_handler()->is_open(); log_type= QUERY_LOG_SLOW; } @@ -4077,7 +4124,7 @@ bool Sys_var_rpl_filter::set_filter_value(const char *value, Master_info *mi) return status; } -uchar *Sys_var_rpl_filter::global_value_ptr(THD *thd, LEX_STRING *base) +uchar *Sys_var_rpl_filter::global_value_ptr(THD *thd, const LEX_STRING *base) { char buf[256]; String tmp(buf, sizeof(buf), &my_charset_bin); @@ -4186,9 +4233,7 @@ static Sys_var_uint Sys_slave_net_timeout( "slave_net_timeout", "Number of seconds to wait for more data " "from any master/slave connection before aborting the read", GLOBAL_VAR(slave_net_timeout), CMD_LINE(REQUIRED_ARG), - VALID_RANGE(1, LONG_TIMEOUT), DEFAULT(SLAVE_NET_TIMEOUT), BLOCK_SIZE(1), - NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0), - ON_UPDATE(0)); + VALID_RANGE(1, LONG_TIMEOUT), DEFAULT(SLAVE_NET_TIMEOUT), BLOCK_SIZE(1)); /* @@ -4284,7 +4329,7 @@ static bool update_max_relay_log_size(sys_var *self, THD *thd, Master_info *mi) static Sys_var_multi_source_ulong Sys_max_relay_log_size( "max_relay_log_size", "relay log will be rotated automatically when the " - "size exceeds this value. If 0 are startup, it's " + "size exceeds this value. If 0 at startup, it's " "set to max_binlog_size", SESSION_VAR(max_relay_log_size), CMD_LINE(REQUIRED_ARG), @@ -4412,6 +4457,272 @@ static Sys_var_tz Sys_time_zone( SESSION_VAR(time_zone), NO_CMD_LINE, DEFAULT(&default_tz), NO_MUTEX_GUARD, IN_BINLOG); +#ifdef WITH_WSREP +#include "wsrep_var.h" +#include "wsrep_sst.h" +#include "wsrep_binlog.h" + +static Sys_var_charptr Sys_wsrep_provider( + "wsrep_provider", "Path to replication provider library", + PREALLOCATED GLOBAL_VAR(wsrep_provider), CMD_LINE(REQUIRED_ARG), + IN_FS_CHARSET, DEFAULT(WSREP_NONE), + NO_MUTEX_GUARD, NOT_IN_BINLOG, + ON_CHECK(wsrep_provider_check), ON_UPDATE(wsrep_provider_update)); + +static Sys_var_charptr Sys_wsrep_provider_options( + "wsrep_provider_options", "provider specific options", + PREALLOCATED GLOBAL_VAR(wsrep_provider_options), + CMD_LINE(REQUIRED_ARG), + IN_SYSTEM_CHARSET, DEFAULT(""), NO_MUTEX_GUARD, NOT_IN_BINLOG, + ON_CHECK(wsrep_provider_options_check), + ON_UPDATE(wsrep_provider_options_update)); + +static Sys_var_charptr Sys_wsrep_data_home_dir( + "wsrep_data_home_dir", "home directory for wsrep provider", + READ_ONLY GLOBAL_VAR(wsrep_data_home_dir), CMD_LINE(REQUIRED_ARG), + IN_FS_CHARSET, DEFAULT("")); + +static Sys_var_charptr Sys_wsrep_cluster_name( + "wsrep_cluster_name", "Name for the cluster", + PREALLOCATED GLOBAL_VAR(wsrep_cluster_name), CMD_LINE(REQUIRED_ARG), + IN_SYSTEM_CHARSET, DEFAULT(WSREP_CLUSTER_NAME), + NO_MUTEX_GUARD, NOT_IN_BINLOG, + ON_CHECK(wsrep_cluster_name_check), + ON_UPDATE(wsrep_cluster_name_update)); + +static PolyLock_mutex PLock_wsrep_slave_threads(&LOCK_wsrep_slave_threads); +static Sys_var_charptr Sys_wsrep_cluster_address ( + "wsrep_cluster_address", "Address to initially connect to cluster", + PREALLOCATED GLOBAL_VAR(wsrep_cluster_address), + CMD_LINE(REQUIRED_ARG), + IN_SYSTEM_CHARSET, DEFAULT(""), + &PLock_wsrep_slave_threads, NOT_IN_BINLOG, + ON_CHECK(wsrep_cluster_address_check), + ON_UPDATE(wsrep_cluster_address_update)); + +static Sys_var_charptr Sys_wsrep_node_name ( + "wsrep_node_name", "Node name", + PREALLOCATED GLOBAL_VAR(wsrep_node_name), CMD_LINE(REQUIRED_ARG), + IN_SYSTEM_CHARSET, DEFAULT(""), NO_MUTEX_GUARD, NOT_IN_BINLOG, + wsrep_node_name_check, wsrep_node_name_update); + +static Sys_var_charptr Sys_wsrep_node_address ( + "wsrep_node_address", "Node address", + PREALLOCATED GLOBAL_VAR(wsrep_node_address), CMD_LINE(REQUIRED_ARG), + IN_SYSTEM_CHARSET, DEFAULT(""), + NO_MUTEX_GUARD, NOT_IN_BINLOG, + ON_CHECK(wsrep_node_address_check), + ON_UPDATE(wsrep_node_address_update)); + +static Sys_var_charptr Sys_wsrep_node_incoming_address( + "wsrep_node_incoming_address", "Client connection address", + PREALLOCATED GLOBAL_VAR(wsrep_node_incoming_address),CMD_LINE(REQUIRED_ARG), + IN_SYSTEM_CHARSET, DEFAULT(WSREP_NODE_INCOMING_AUTO)); + +static Sys_var_ulong Sys_wsrep_slave_threads( + "wsrep_slave_threads", "Number of slave appliers to launch", + GLOBAL_VAR(wsrep_slave_threads), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(1, 512), DEFAULT(1), BLOCK_SIZE(1), + &PLock_wsrep_slave_threads, NOT_IN_BINLOG, + ON_CHECK(wsrep_slave_threads_check), + ON_UPDATE(wsrep_slave_threads_update)); + +static Sys_var_charptr Sys_wsrep_dbug_option( + "wsrep_dbug_option", "DBUG options to provider library", + GLOBAL_VAR(wsrep_dbug_option),CMD_LINE(REQUIRED_ARG), + IN_SYSTEM_CHARSET, DEFAULT("")); + +static Sys_var_mybool Sys_wsrep_debug( + "wsrep_debug", "To enable debug level logging", + GLOBAL_VAR(wsrep_debug), CMD_LINE(OPT_ARG), DEFAULT(FALSE)); + +static Sys_var_mybool Sys_wsrep_convert_LOCK_to_trx( + "wsrep_convert_LOCK_to_trx", "To convert locking sessions " + "into transactions", + GLOBAL_VAR(wsrep_convert_LOCK_to_trx), + CMD_LINE(OPT_ARG), DEFAULT(FALSE)); + +static Sys_var_ulong Sys_wsrep_retry_autocommit( + "wsrep_retry_autocommit", "Max number of times to retry " + "a failed autocommit statement", + SESSION_VAR(wsrep_retry_autocommit), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0, 10000), DEFAULT(1), BLOCK_SIZE(1)); + +static Sys_var_mybool Sys_wsrep_auto_increment_control( + "wsrep_auto_increment_control", "To automatically control the " + "assignment of autoincrement variables", + GLOBAL_VAR(wsrep_auto_increment_control), + CMD_LINE(OPT_ARG), DEFAULT(TRUE)); + +static Sys_var_mybool Sys_wsrep_drupal_282555_workaround( + "wsrep_drupal_282555_workaround", "To use a workaround for" + "bad autoincrement value", + GLOBAL_VAR(wsrep_drupal_282555_workaround), + CMD_LINE(OPT_ARG), DEFAULT(FALSE)); + +static Sys_var_charptr sys_wsrep_sst_method( + "wsrep_sst_method", "State snapshot transfer method", + GLOBAL_VAR(wsrep_sst_method),CMD_LINE(REQUIRED_ARG), + IN_SYSTEM_CHARSET, DEFAULT(WSREP_SST_DEFAULT), NO_MUTEX_GUARD, NOT_IN_BINLOG, + ON_CHECK(wsrep_sst_method_check), + ON_UPDATE(wsrep_sst_method_update)); + +static Sys_var_charptr Sys_wsrep_sst_receive_address( + "wsrep_sst_receive_address", "Address where node is waiting for " + "SST contact", + GLOBAL_VAR(wsrep_sst_receive_address),CMD_LINE(REQUIRED_ARG), + IN_SYSTEM_CHARSET, DEFAULT(WSREP_SST_ADDRESS_AUTO), NO_MUTEX_GUARD, + NOT_IN_BINLOG, + ON_CHECK(wsrep_sst_receive_address_check), + ON_UPDATE(wsrep_sst_receive_address_update)); + +static Sys_var_charptr Sys_wsrep_sst_auth( + "wsrep_sst_auth", "Authentication for SST connection", + PREALLOCATED GLOBAL_VAR(wsrep_sst_auth), CMD_LINE(REQUIRED_ARG), + IN_SYSTEM_CHARSET, DEFAULT(NULL), NO_MUTEX_GUARD, + NOT_IN_BINLOG, + ON_CHECK(wsrep_sst_auth_check), + ON_UPDATE(wsrep_sst_auth_update)); + +static Sys_var_charptr Sys_wsrep_sst_donor( + "wsrep_sst_donor", "preferred donor node for the SST", + GLOBAL_VAR(wsrep_sst_donor),CMD_LINE(REQUIRED_ARG), + IN_SYSTEM_CHARSET, DEFAULT(""), NO_MUTEX_GUARD, NOT_IN_BINLOG, + ON_CHECK(wsrep_sst_donor_check), + ON_UPDATE(wsrep_sst_donor_update)); + +static Sys_var_mybool Sys_wsrep_sst_donor_rejects_queries( + "wsrep_sst_donor_rejects_queries", "Reject client queries " + "when donating state snapshot transfer", + GLOBAL_VAR(wsrep_sst_donor_rejects_queries), + CMD_LINE(OPT_ARG), DEFAULT(FALSE)); + +static Sys_var_mybool Sys_wsrep_on ( + "wsrep_on", "To enable wsrep replication ", + SESSION_VAR(wsrep_on), + CMD_LINE(OPT_ARG), DEFAULT(FALSE), + NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0), + ON_UPDATE(wsrep_on_update)); + +static Sys_var_charptr Sys_wsrep_start_position ( + "wsrep_start_position", "global transaction position to start from ", + PREALLOCATED GLOBAL_VAR(wsrep_start_position), + CMD_LINE(REQUIRED_ARG), + IN_SYSTEM_CHARSET, DEFAULT(WSREP_START_POSITION_ZERO), + NO_MUTEX_GUARD, NOT_IN_BINLOG, + ON_CHECK(wsrep_start_position_check), + ON_UPDATE(wsrep_start_position_update)); + +static Sys_var_ulong Sys_wsrep_max_ws_size ( + "wsrep_max_ws_size", "Max write set size (bytes)", + GLOBAL_VAR(wsrep_max_ws_size), CMD_LINE(REQUIRED_ARG), + /* Upper limit is 65K short of 4G to avoid overlows on 32-bit systems */ + VALID_RANGE(1024, WSREP_MAX_WS_SIZE), DEFAULT(1073741824UL), BLOCK_SIZE(1)); + +static Sys_var_ulong Sys_wsrep_max_ws_rows ( + "wsrep_max_ws_rows", "Max number of rows in write set", + GLOBAL_VAR(wsrep_max_ws_rows), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(1, 1048576), DEFAULT(131072), BLOCK_SIZE(1)); + +static Sys_var_charptr Sys_wsrep_notify_cmd( + "wsrep_notify_cmd", "", + GLOBAL_VAR(wsrep_notify_cmd),CMD_LINE(REQUIRED_ARG), + IN_SYSTEM_CHARSET, DEFAULT("")); + +static Sys_var_mybool Sys_wsrep_certify_nonPK( + "wsrep_certify_nonPK", "Certify tables with no primary key", + GLOBAL_VAR(wsrep_certify_nonPK), + CMD_LINE(OPT_ARG), DEFAULT(TRUE)); + +static bool fix_wsrep_causal_reads(sys_var *self, THD* thd, enum_var_type var_type) +{ + if (var_type == OPT_GLOBAL) + wsrep_causal_reads_update(&global_system_variables); + else + wsrep_causal_reads_update(&thd->variables); + return false; +} +static Sys_var_mybool Sys_wsrep_causal_reads( + "wsrep_causal_reads", "(DEPRECATED) Setting this variable is equivalent " + "to setting wsrep_sync_wait READ flag", + SESSION_VAR(wsrep_causal_reads), CMD_LINE(OPT_ARG), DEFAULT(FALSE), + NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0), + ON_UPDATE(fix_wsrep_causal_reads)); + +static Sys_var_uint Sys_wsrep_sync_wait( + "wsrep_sync_wait", "Ensure \"synchronous\" read view before executing " + "an operation of the type specified by bitmask: 1 - READ(includes " + "SELECT, SHOW and BEGIN/START TRANSACTION); 2 - UPDATE and DELETE; 4 - " + "INSERT and REPLACE", + SESSION_VAR(wsrep_sync_wait), CMD_LINE(OPT_ARG), + VALID_RANGE(WSREP_SYNC_WAIT_NONE, WSREP_SYNC_WAIT_MAX), + DEFAULT(WSREP_SYNC_WAIT_NONE), BLOCK_SIZE(1), + NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0), + ON_UPDATE(wsrep_sync_wait_update)); + +static const char *wsrep_OSU_method_names[]= { "TOI", "RSU", NullS }; +static Sys_var_enum Sys_wsrep_OSU_method( + "wsrep_OSU_method", "Method for Online Schema Upgrade", + GLOBAL_VAR(wsrep_OSU_method_options), CMD_LINE(OPT_ARG), + wsrep_OSU_method_names, DEFAULT(WSREP_OSU_TOI)); + +static PolyLock_mutex PLock_wsrep_desync(&LOCK_wsrep_desync); +static Sys_var_mybool Sys_wsrep_desync ( + "wsrep_desync", "To desynchronize the node from the cluster", + GLOBAL_VAR(wsrep_desync), + CMD_LINE(OPT_ARG), DEFAULT(FALSE), + &PLock_wsrep_desync, NOT_IN_BINLOG, + ON_CHECK(wsrep_desync_check), + ON_UPDATE(wsrep_desync_update)); + +static const char *wsrep_binlog_format_names[]= + {"MIXED", "STATEMENT", "ROW", "NONE", NullS}; +static Sys_var_enum Sys_wsrep_forced_binlog_format( + "wsrep_forced_binlog_format", "binlog format to take effect over user's choice", + GLOBAL_VAR(wsrep_forced_binlog_format), CMD_LINE(REQUIRED_ARG), + wsrep_binlog_format_names, DEFAULT(BINLOG_FORMAT_UNSPEC)); + +static Sys_var_mybool Sys_wsrep_recover_datadir( + "wsrep_recover", "Recover database state after crash and exit", + READ_ONLY GLOBAL_VAR(wsrep_recovery), + CMD_LINE(OPT_ARG), DEFAULT(FALSE)); + +static Sys_var_mybool Sys_wsrep_replicate_myisam( + "wsrep_replicate_myisam", "To enable myisam replication", + GLOBAL_VAR(wsrep_replicate_myisam), CMD_LINE(OPT_ARG), DEFAULT(FALSE)); + +static Sys_var_mybool Sys_wsrep_log_conflicts( + "wsrep_log_conflicts", "To log multi-master conflicts", + GLOBAL_VAR(wsrep_log_conflicts), CMD_LINE(OPT_ARG), DEFAULT(FALSE)); + +static Sys_var_ulong Sys_wsrep_mysql_replication_bundle( + "wsrep_mysql_replication_bundle", "mysql replication group commit ", + GLOBAL_VAR(wsrep_mysql_replication_bundle), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0, 1000), DEFAULT(0), BLOCK_SIZE(1)); + +static Sys_var_mybool Sys_wsrep_load_data_splitting( + "wsrep_load_data_splitting", "To commit LOAD DATA " + "transaction after every 10K rows inserted", + GLOBAL_VAR(wsrep_load_data_splitting), + CMD_LINE(OPT_ARG), DEFAULT(TRUE)); + +static Sys_var_mybool Sys_wsrep_slave_FK_checks( + "wsrep_slave_FK_checks", "Should slave thread do " + "foreign key constraint checks", + GLOBAL_VAR(wsrep_slave_FK_checks), + CMD_LINE(OPT_ARG), DEFAULT(TRUE)); + +static Sys_var_mybool Sys_wsrep_slave_UK_checks( + "wsrep_slave_UK_checks", "Should slave thread do " + "secondary index uniqueness checks", + GLOBAL_VAR(wsrep_slave_UK_checks), + CMD_LINE(OPT_ARG), DEFAULT(FALSE)); + +static Sys_var_mybool Sys_wsrep_restart_slave( + "wsrep_restart_slave", "Should MySQL slave be restarted automatically, when node joins back to cluster", + GLOBAL_VAR(wsrep_restart_slave), CMD_LINE(OPT_ARG), DEFAULT(FALSE)); +#endif /* WITH_WSREP */ + static bool fix_host_cache_size(sys_var *, THD *, enum_var_type) { hostname_cache_resize((uint) host_cache_size); @@ -4500,7 +4811,7 @@ static Sys_var_ulong Sys_extra_max_connections( #ifdef SAFE_MUTEX static Sys_var_mybool Sys_mutex_deadlock_detector( - "mutex_deadlock_detector", "Enable checking of wrong mutex usage", + "debug_mutex_deadlock_detector", "Enable checking of wrong mutex usage", READ_ONLY GLOBAL_VAR(safe_mutex_deadlock_detector), CMD_LINE(OPT_ARG), DEFAULT(TRUE)); #endif diff --git a/sql/sys_vars.h b/sql/sys_vars.h index cbf924ee036..fa997416cbd 100644 --- a/sql/sys_vars.h +++ b/sql/sys_vars.h @@ -41,7 +41,8 @@ #define GLOBAL_VAR(X) sys_var::GLOBAL, (((char*)&(X))-(char*)&global_system_variables), sizeof(X) #define SESSION_VAR(X) sys_var::SESSION, offsetof(SV, X), sizeof(((SV *)0)->X) #define SESSION_ONLY(X) sys_var::ONLY_SESSION, offsetof(SV, X), sizeof(((SV *)0)->X) -#define NO_CMD_LINE CMD_LINE(NO_ARG, -1) +#define NO_CMD_LINE CMD_LINE(NO_ARG, sys_var::NO_GETOPT) +#define CMD_LINE_HELP_ONLY CMD_LINE(NO_ARG, sys_var::GETOPT_ONLY_HELP) /* the define below means that there's no *second* mutex guard, LOCK_global_system_variables always guards all system variables @@ -56,7 +57,6 @@ // this means that Sys_var_charptr initial value was malloc()ed #define PREALLOCATED sys_var::ALLOCATED+ #define PARSED_EARLY sys_var::PARSE_EARLY+ -#define SHOW_VALUE_IN_HELP sys_var::SHOW_VALUE_IN_HELP+ /* Sys_var_bit meaning is reversed, like in @@ -84,8 +84,8 @@ #define SYSVAR_ASSERT(X) \ while(!(X)) \ { \ - fprintf(stderr, "Sysvar '%s' failed '%s'\n", name_arg, #X); \ - DBUG_ABORT(); \ + fprintf(stderr, "Sysvar '%s' failed '%s'\n", name_arg, #X); \ + DBUG_ABORT(); \ exit(255); \ } @@ -208,8 +208,6 @@ public: global_var(T)= static_cast<T>(var->save_result.ulonglong_value); return false; } - bool check_update_type(Item_result type) - { return type != INT_RESULT; } void session_save_default(THD *thd, set_var *var) { var->save_result.ulonglong_value= (ulonglong)*(T*)global_value_ptr(thd, 0); } void global_save_default(THD *thd, set_var *var) @@ -284,8 +282,6 @@ public: return false; } - bool check_update_type(Item_result type) - { return type != INT_RESULT && type != STRING_RESULT; } }; /** @@ -337,10 +333,14 @@ public: { var->save_result.ulonglong_value= global_var(ulong); } void global_save_default(THD *thd, set_var *var) { var->save_result.ulonglong_value= option.def_value; } - uchar *session_value_ptr(THD *thd, LEX_STRING *base) - { return (uchar*)typelib.type_names[session_var(thd, ulong)]; } - uchar *global_value_ptr(THD *thd, LEX_STRING *base) - { return (uchar*)typelib.type_names[global_var(ulong)]; } + uchar *valptr(THD *thd, ulong val) + { return (uchar*)typelib.type_names[val]; } + uchar *session_value_ptr(THD *thd, const LEX_STRING *base) + { return valptr(thd, session_var(thd, ulong)); } + uchar *global_value_ptr(THD *thd, const LEX_STRING *base) + { return valptr(thd, global_var(ulong)); } + uchar *default_value_ptr(THD *thd) + { return valptr(thd, option.def_value); } }; /** @@ -368,7 +368,7 @@ public: option.var_type= GET_BOOL; global_var(my_bool)= def_val; SYSVAR_ASSERT(def_val < 2); - SYSVAR_ASSERT(getopt.arg_type == OPT_ARG || getopt.id == -1); + SYSVAR_ASSERT(getopt.arg_type == OPT_ARG || getopt.id < 0); SYSVAR_ASSERT(size == sizeof(my_bool)); } bool session_update(THD *thd, set_var *var) @@ -499,8 +499,6 @@ public: var->save_result.string_value.str= ptr; var->save_result.string_value.length= ptr ? strlen(ptr) : 0; } - bool check_update_type(Item_result type) - { return type != STRING_RESULT; } }; @@ -510,7 +508,7 @@ public: Sys_var_proxy_user(const char *name_arg, const char *comment, enum charset_enum is_os_charset_arg) : sys_var(&all_sys_vars, name_arg, comment, - sys_var::READONLY+sys_var::ONLY_SESSION, 0, -1, + sys_var::READONLY+sys_var::ONLY_SESSION, 0, NO_GETOPT, NO_ARG, SHOW_CHAR, 0, NULL, VARIABLE_NOT_IN_BINLOG, NULL, NULL, NULL) { @@ -536,10 +534,8 @@ public: { DBUG_ASSERT(FALSE); } void global_save_default(THD *thd, set_var *var) { DBUG_ASSERT(FALSE); } - bool check_update_type(Item_result type) - { return true; } protected: - virtual uchar *session_value_ptr(THD *thd, LEX_STRING *base) + uchar *session_value_ptr(THD *thd, const LEX_STRING *base) { return thd->security_ctx->proxy_user[0] ? (uchar *) &(thd->security_ctx->proxy_user[0]) : NULL; @@ -555,7 +551,7 @@ public: {} protected: - virtual uchar *session_value_ptr(THD *thd, LEX_STRING *base) + uchar *session_value_ptr(THD *thd, const LEX_STRING *base) { return (uchar*)thd->security_ctx->external_user; } @@ -569,7 +565,7 @@ private: public: Sys_var_rpl_filter(const char *name, int getopt_id, const char *comment) - : sys_var(&all_sys_vars, name, comment, sys_var::GLOBAL, 0, -1, + : sys_var(&all_sys_vars, name, comment, sys_var::GLOBAL, 0, NO_GETOPT, NO_ARG, SHOW_CHAR, 0, NULL, VARIABLE_NOT_IN_BINLOG, NULL, NULL, NULL), opt_id(getopt_id) { @@ -580,9 +576,6 @@ public: { return Sys_var_charptr::do_string_check(thd, var, charset(thd)); } - bool check_update_type(Item_result type) - { return type != STRING_RESULT; } - void session_save_default(THD *thd, set_var *var) { DBUG_ASSERT(FALSE); } @@ -598,7 +591,7 @@ public: bool global_update(THD *thd, set_var *var); protected: - uchar *global_value_ptr(THD *thd, LEX_STRING *base); + uchar *global_value_ptr(THD *thd, const LEX_STRING *base); bool set_filter_value(const char *value, Master_info *mi); }; @@ -665,7 +658,7 @@ public: 0, VARIABLE_NOT_IN_BINLOG, on_check_func, on_update_func, 0),max_length(max_length_arg) { - option.var_type= GET_NO_ARG; + option.var_type= GET_STR; SYSVAR_ASSERT(scope() == ONLY_SESSION) *const_cast<SHOW_TYPE*>(&show_val_type)= SHOW_LEX_STRING; } @@ -716,23 +709,17 @@ public: { DBUG_ASSERT(FALSE); } - uchar *session_value_ptr(THD *thd, LEX_STRING *base) - { - return (uchar*) &session_var(thd, LEX_STRING); - } - uchar *global_value_ptr(THD *thd, LEX_STRING *base) + uchar *global_value_ptr(THD *thd, const LEX_STRING *base) { DBUG_ASSERT(FALSE); return NULL; } - bool check_update_type(Item_result type) - { return type != STRING_RESULT; } }; #ifndef DBUG_OFF /** - @@session.dbug and @@global.dbug variables. + @@session.debug_dbug and @@global.debug_dbug variables. @@dbug variable differs from other variables in one aspect: if its value is not assigned in the session, it "points" to the global @@ -757,7 +744,7 @@ public: getopt.arg_type, SHOW_CHAR, (intptr)def_val, lock, binlog_status_arg, on_check_func, on_update_func, substitute) - { option.var_type= GET_NO_ARG; } + { option.var_type= GET_STR; } bool do_check(THD *thd, set_var *var) { char buff[STRING_BUFFER_USUAL_SIZE]; @@ -791,20 +778,20 @@ public: char *ptr= (char*)(intptr)option.def_value; var->save_result.string_value.str= ptr; } - uchar *session_value_ptr(THD *thd, LEX_STRING *base) + uchar *session_value_ptr(THD *thd, const LEX_STRING *base) { char buf[256]; DBUG_EXPLAIN(buf, sizeof(buf)); return (uchar*) thd->strdup(buf); } - uchar *global_value_ptr(THD *thd, LEX_STRING *base) + uchar *global_value_ptr(THD *thd, const LEX_STRING *base) { char buf[256]; DBUG_EXPLAIN_INITIAL(buf, sizeof(buf)); return (uchar*) thd->strdup(buf); } - bool check_update_type(Item_result type) - { return type != STRING_RESULT; } + uchar *default_value_ptr(THD *thd) + { return (uchar*)""; } }; #endif @@ -879,7 +866,7 @@ public: return keycache_update(thd, key_cache, offset, new_value); } - uchar *global_value_ptr(THD *thd, LEX_STRING *base) + uchar *global_value_ptr(THD *thd, const LEX_STRING *base) { KEY_CACHE *key_cache= get_key_cache(base); if (!key_cache) @@ -1031,10 +1018,6 @@ public: global_var(double)= var->save_result.double_value; return false; } - bool check_update_type(Item_result type) - { - return type != INT_RESULT && type != REAL_RESULT && type != DECIMAL_RESULT; - } void session_save_default(THD *thd, set_var *var) { var->save_result.double_value= global_var(double); } void global_save_default(THD *thd, set_var *var) @@ -1068,7 +1051,7 @@ public: lock, binlog_status_arg, on_check_func, on_update_func, substitute) { } - uchar *session_value_ptr(THD *thd, LEX_STRING *base) + uchar *session_value_ptr(THD *thd, const LEX_STRING *base) { if (thd->user_connect && thd->user_connect->user_resources.user_conn) return (uchar*) &(thd->user_connect->user_resources.user_conn); @@ -1183,16 +1166,14 @@ public: { var->save_result.ulonglong_value= global_var(ulonglong); } void global_save_default(THD *thd, set_var *var) { var->save_result.ulonglong_value= option.def_value; } - uchar *session_value_ptr(THD *thd, LEX_STRING *base) - { - return (uchar*)flagset_to_string(thd, 0, session_var(thd, ulonglong), - typelib.type_names); - } - uchar *global_value_ptr(THD *thd, LEX_STRING *base) - { - return (uchar*)flagset_to_string(thd, 0, global_var(ulonglong), - typelib.type_names); - } + uchar *valptr(THD *thd, ulonglong val) + { return (uchar*)flagset_to_string(thd, 0, val, typelib.type_names); } + uchar *session_value_ptr(THD *thd, const LEX_STRING *base) + { return valptr(thd, session_var(thd, ulonglong)); } + uchar *global_value_ptr(THD *thd, const LEX_STRING *base) + { return valptr(thd, global_var(ulonglong)); } + uchar *default_value_ptr(THD *thd) + { return valptr(thd, option.def_value); } }; /** @@ -1284,16 +1265,14 @@ public: { var->save_result.ulonglong_value= global_var(ulonglong); } void global_save_default(THD *thd, set_var *var) { var->save_result.ulonglong_value= option.def_value; } - uchar *session_value_ptr(THD *thd, LEX_STRING *base) - { - return (uchar*)set_to_string(thd, 0, session_var(thd, ulonglong), - typelib.type_names); - } - uchar *global_value_ptr(THD *thd, LEX_STRING *base) - { - return (uchar*)set_to_string(thd, 0, global_var(ulonglong), - typelib.type_names); - } + uchar *valptr(THD *thd, ulonglong val) + { return (uchar*)set_to_string(thd, 0, val, typelib.type_names); } + uchar *session_value_ptr(THD *thd, const LEX_STRING *base) + { return valptr(thd, session_var(thd, ulonglong)); } + uchar *global_value_ptr(THD *thd, const LEX_STRING *base) + { return valptr(thd, global_var(ulonglong)); } + uchar *default_value_ptr(THD *thd) + { return valptr(thd, option.def_value); } }; /** @@ -1329,7 +1308,7 @@ public: { option.var_type= GET_STR; SYSVAR_ASSERT(size == sizeof(plugin_ref)); - SYSVAR_ASSERT(getopt.id == -1); // force NO_CMD_LINE + SYSVAR_ASSERT(getopt.id < 0); // force NO_CMD_LINE } bool do_check(THD *thd, set_var *var) { @@ -1387,41 +1366,39 @@ public: plugin_ref plugin= global_var(plugin_ref); var->save_result.plugin= plugin ? my_plugin_lock(thd, plugin) : 0; } - void global_save_default(THD *thd, set_var *var) + plugin_ref get_default(THD *thd) { - LEX_STRING pname; char *default_value= *reinterpret_cast<char**>(option.def_value); if (!default_value) - var->save_result.plugin= 0; - else - { - pname.str= default_value; - pname.length= strlen(pname.str); + return 0; - plugin_ref plugin; - if (plugin_type == MYSQL_STORAGE_ENGINE_PLUGIN) - plugin= ha_resolve_by_name(thd, &pname, false); - else - plugin= my_plugin_lock_by_name(thd, &pname, plugin_type); - DBUG_ASSERT(plugin); + LEX_STRING pname= { default_value, strlen(pname.str) }; + plugin_ref plugin; - var->save_result.plugin= my_plugin_lock(thd, plugin); - } + if (plugin_type == MYSQL_STORAGE_ENGINE_PLUGIN) + plugin= ha_resolve_by_name(thd, &pname, false); + else + plugin= my_plugin_lock_by_name(thd, &pname, plugin_type); + DBUG_ASSERT(plugin); + return my_plugin_lock(thd, plugin); } - bool check_update_type(Item_result type) - { return type != STRING_RESULT; } - uchar *session_value_ptr(THD *thd, LEX_STRING *base) + + void global_save_default(THD *thd, set_var *var) { - plugin_ref plugin= session_var(thd, plugin_ref); - return (uchar*)(plugin ? thd->strmake(plugin_name(plugin)->str, - plugin_name(plugin)->length) : 0); + var->save_result.plugin= get_default(thd); } - uchar *global_value_ptr(THD *thd, LEX_STRING *base) + + uchar *valptr(THD *thd, plugin_ref plugin) { - plugin_ref plugin= global_var(plugin_ref); return (uchar*)(plugin ? thd->strmake(plugin_name(plugin)->str, plugin_name(plugin)->length) : 0); } + uchar *session_value_ptr(THD *thd, const LEX_STRING *base) + { return valptr(thd, session_var(thd, plugin_ref)); } + uchar *global_value_ptr(THD *thd, const LEX_STRING *base) + { return valptr(thd, global_var(plugin_ref)); } + uchar *default_value_ptr(THD *thd) + { return valptr(thd, get_default(thd)); } }; #if defined(ENABLED_DEBUG_SYNC) @@ -1445,7 +1422,7 @@ public: substitute) { SYSVAR_ASSERT(scope() == ONLY_SESSION); - option.var_type= GET_NO_ARG; + option.var_type= GET_STR; } bool do_check(THD *thd, set_var *var) { @@ -1477,18 +1454,18 @@ public: { DBUG_ASSERT(FALSE); } - uchar *session_value_ptr(THD *thd, LEX_STRING *base) + uchar *session_value_ptr(THD *thd, const LEX_STRING *base) { extern uchar *debug_sync_value_ptr(THD *thd); return debug_sync_value_ptr(thd); } - uchar *global_value_ptr(THD *thd, LEX_STRING *base) + uchar *global_value_ptr(THD *thd, const LEX_STRING *base) { DBUG_ASSERT(FALSE); return 0; } - bool check_update_type(Item_result type) - { return type != STRING_RESULT; } + uchar *default_value_ptr(THD *thd) + { return (uchar*)""; } }; #endif /* defined(ENABLED_DEBUG_SYNC) */ @@ -1542,7 +1519,7 @@ public: bitmask= reverse_semantics ? ~bitmask_arg : bitmask_arg; set(global_var_ptr(), def_val); SYSVAR_ASSERT(def_val < 2); - SYSVAR_ASSERT(getopt.id == -1); // force NO_CMD_LINE + SYSVAR_ASSERT(getopt.id < 0); // force NO_CMD_LINE SYSVAR_ASSERT(size == sizeof(ulonglong)); } bool session_update(THD *thd, set_var *var) @@ -1559,16 +1536,19 @@ public: { var->save_result.ulonglong_value= global_var(ulonglong) & bitmask; } void global_save_default(THD *thd, set_var *var) { var->save_result.ulonglong_value= option.def_value; } - uchar *session_value_ptr(THD *thd, LEX_STRING *base) + + uchar *valptr(THD *thd, ulonglong val) { - thd->sys_var_tmp.my_bool_value= reverse_semantics ^ - ((session_var(thd, ulonglong) & bitmask) != 0); + thd->sys_var_tmp.my_bool_value= reverse_semantics ^ ((val & bitmask) != 0); return (uchar*) &thd->sys_var_tmp.my_bool_value; } - uchar *global_value_ptr(THD *thd, LEX_STRING *base) + uchar *session_value_ptr(THD *thd, const LEX_STRING *base) + { return valptr(thd, session_var(thd, ulonglong)); } + uchar *global_value_ptr(THD *thd, const LEX_STRING *base) + { return valptr(thd, global_var(ulonglong)); } + uchar *default_value_ptr(THD *thd) { - thd->sys_var_tmp.my_bool_value= reverse_semantics ^ - ((global_var(ulonglong) & bitmask) != 0); + thd->sys_var_tmp.my_bool_value= option.def_value != 0; return (uchar*) &thd->sys_var_tmp.my_bool_value; } }; @@ -1612,7 +1592,7 @@ public: read_func(read_func_arg), update_func(update_func_arg) { SYSVAR_ASSERT(scope() == ONLY_SESSION); - SYSVAR_ASSERT(getopt.id == -1); // NO_CMD_LINE, because the offset is fake + SYSVAR_ASSERT(getopt.id < 0); // NO_CMD_LINE, because the offset is fake } bool session_update(THD *thd, set_var *var) { return update_func(thd, var); } @@ -1625,16 +1605,21 @@ public: { var->value= 0; } void global_save_default(THD *thd, set_var *var) { DBUG_ASSERT(FALSE); } - uchar *session_value_ptr(THD *thd, LEX_STRING *base) + uchar *session_value_ptr(THD *thd, const LEX_STRING *base) { thd->sys_var_tmp.ulonglong_value= read_func(thd); return (uchar*) &thd->sys_var_tmp.ulonglong_value; } - uchar *global_value_ptr(THD *thd, LEX_STRING *base) + uchar *global_value_ptr(THD *thd, const LEX_STRING *base) { DBUG_ASSERT(FALSE); return 0; } + uchar *default_value_ptr(THD *thd) + { + thd->sys_var_tmp.ulonglong_value= 0; + return (uchar*) &thd->sys_var_tmp.ulonglong_value; + } }; @@ -1662,7 +1647,7 @@ public: read_func(read_func_arg), update_func(update_func_arg) { SYSVAR_ASSERT(scope() == ONLY_SESSION); - SYSVAR_ASSERT(getopt.id == -1); // NO_CMD_LINE, because the offset is fake + SYSVAR_ASSERT(getopt.id < 0); // NO_CMD_LINE, because the offset is fake } bool session_update(THD *thd, set_var *var) { return update_func(thd, var); } @@ -1675,16 +1660,21 @@ public: { var->value= 0; } void global_save_default(THD *thd, set_var *var) { DBUG_ASSERT(FALSE); } - uchar *session_value_ptr(THD *thd, LEX_STRING *base) + uchar *session_value_ptr(THD *thd, const LEX_STRING *base) { thd->sys_var_tmp.double_value= read_func(thd); return (uchar*) &thd->sys_var_tmp.double_value; } - uchar *global_value_ptr(THD *thd, LEX_STRING *base) + uchar *global_value_ptr(THD *thd, const LEX_STRING *base) { DBUG_ASSERT(FALSE); return 0; } + uchar *default_value_ptr(THD *thd) + { + thd->sys_var_tmp.double_value= 0; + return (uchar*) &thd->sys_var_tmp.double_value; + } }; @@ -1715,12 +1705,13 @@ public: substitute) { SYSVAR_ASSERT(scope() == GLOBAL); - SYSVAR_ASSERT(getopt.id == -1); + SYSVAR_ASSERT(getopt.id < 0); SYSVAR_ASSERT(lock == 0); SYSVAR_ASSERT(binlog_status_arg == VARIABLE_NOT_IN_BINLOG); SYSVAR_ASSERT(is_readonly()); SYSVAR_ASSERT(on_update == 0); SYSVAR_ASSERT(size == sizeof(enum SHOW_COMP_OPTION)); + option.var_type= GET_STR; } bool do_check(THD *thd, set_var *var) { DBUG_ASSERT(FALSE); @@ -1738,16 +1729,15 @@ public: } void session_save_default(THD *thd, set_var *var) { } void global_save_default(THD *thd, set_var *var) { } - uchar *session_value_ptr(THD *thd, LEX_STRING *base) + uchar *session_value_ptr(THD *thd, const LEX_STRING *base) { DBUG_ASSERT(FALSE); return 0; } - uchar *global_value_ptr(THD *thd, LEX_STRING *base) + uchar *global_value_ptr(THD *thd, const LEX_STRING *base) { return (uchar*)show_comp_option_name[global_var(enum SHOW_COMP_OPTION)]; } - bool check_update_type(Item_result type) { return false; } }; /** @@ -1783,7 +1773,7 @@ public: substitute), name_offset(name_off) { - option.var_type= GET_STR; + option.var_type= GET_ENUM; // because we accept INT and STRING here /* struct variables are special on the command line - often (e.g. for charsets) the name cannot be immediately resolved, but only after all @@ -1792,7 +1782,7 @@ public: thus all struct command-line options should be added manually to my_long_options in mysqld.cc */ - SYSVAR_ASSERT(getopt.id == -1); + SYSVAR_ASSERT(getopt.id < 0); SYSVAR_ASSERT(size == sizeof(void *)); } bool do_check(THD *thd, set_var *var) @@ -1814,18 +1804,14 @@ public: void **default_value= reinterpret_cast<void**>(option.def_value); var->save_result.ptr= *default_value; } - bool check_update_type(Item_result type) - { return type != INT_RESULT && type != STRING_RESULT; } - uchar *session_value_ptr(THD *thd, LEX_STRING *base) - { - uchar *ptr= session_var(thd, uchar*); - return ptr ? *(uchar**)(ptr+name_offset) : 0; - } - uchar *global_value_ptr(THD *thd, LEX_STRING *base) - { - uchar *ptr= global_var(uchar*); - return ptr ? *(uchar**)(ptr+name_offset) : 0; - } + uchar *valptr(THD *thd, uchar *val) + { return val ? *(uchar**)(val+name_offset) : 0; } + uchar *session_value_ptr(THD *thd, const LEX_STRING *base) + { return valptr(thd, session_var(thd, uchar*)); } + uchar *global_value_ptr(THD *thd, const LEX_STRING *base) + { return valptr(thd, global_var(uchar*)); } + uchar *default_value_ptr(THD *thd) + { return valptr(thd, *(uchar**)option.def_value); } }; /** @@ -1854,8 +1840,9 @@ public: lock, binlog_status_arg, on_check_func, on_update_func, substitute) { - SYSVAR_ASSERT(getopt.id == -1); + SYSVAR_ASSERT(getopt.id < 0); SYSVAR_ASSERT(size == sizeof(Time_zone *)); + option.var_type= GET_STR; } bool do_check(THD *thd, set_var *var) { @@ -1893,7 +1880,9 @@ public: var->save_result.time_zone= *(Time_zone**)(intptr)option.def_value; } - uchar *session_value_ptr(THD *thd, LEX_STRING *base) + uchar *valptr(THD *thd, Time_zone *val) + { return (uchar *)(val->get_name()->ptr()); } + uchar *session_value_ptr(THD *thd, const LEX_STRING *base) { /* This is an ugly fix for replication: we don't replicate properly queries @@ -1904,14 +1893,12 @@ public: (binlog code stores session value only). */ thd->time_zone_used= 1; - return (uchar *)(session_var(thd, Time_zone*)->get_name()->ptr()); + return valptr(thd, session_var(thd, Time_zone *)); } - uchar *global_value_ptr(THD *thd, LEX_STRING *base) - { - return (uchar *)(global_var(Time_zone*)->get_name()->ptr()); - } - bool check_update_type(Item_result type) - { return type != STRING_RESULT; } + uchar *global_value_ptr(THD *thd, const LEX_STRING *base) + { return valptr(thd, global_var(Time_zone*)); } + uchar *default_value_ptr(THD *thd) + { return valptr(thd, *(Time_zone**)option.def_value); } }; /** @@ -1981,8 +1968,8 @@ public: Sys_var_replicate_events_marked_for_skip(const char *name_arg, const char *comment, int flag_args, ptrdiff_t off, size_t size, CMD_LINE getopt, - const char *values[], uint def_val, PolyLock *lock, - enum binlog_status_enum binlog_status_arg) + const char *values[], uint def_val, PolyLock *lock= 0, + enum binlog_status_enum binlog_status_arg= VARIABLE_NOT_IN_BINLOG) :Sys_var_enum(name_arg, comment, flag_args, off, size, getopt, values, def_val, lock, binlog_status_arg) {} @@ -2042,7 +2029,7 @@ public: /* Use value given in variable declaration */ global_save_default(thd, var); } - uchar *session_value_ptr(THD *thd,LEX_STRING *base) + uchar *session_value_ptr(THD *thd, const LEX_STRING *base) { uint *tmp, res; tmp= (uint*) (((uchar*)&(thd->variables)) + offset); @@ -2050,7 +2037,7 @@ public: *tmp= res; return (uchar*) tmp; } - uchar *global_value_ptr(THD *thd, LEX_STRING *base) + uchar *global_value_ptr(THD *thd, const LEX_STRING *base) { return session_value_ptr(thd, base); } @@ -2075,6 +2062,8 @@ public: getopt.arg_type, SHOW_CHAR, 0, NULL, VARIABLE_NOT_IN_BINLOG, NULL, NULL, NULL) { + SYSVAR_ASSERT(getopt.id < 0); + SYSVAR_ASSERT(is_readonly()); option.var_type= GET_STR; } bool do_check(THD *thd, set_var *var) @@ -2092,10 +2081,6 @@ public: DBUG_ASSERT(false); return true; } - bool check_update_type(Item_result type) { - DBUG_ASSERT(false); - return false; - } void session_save_default(THD *thd, set_var *var) { DBUG_ASSERT(false); @@ -2104,12 +2089,12 @@ public: { DBUG_ASSERT(false); } - uchar *session_value_ptr(THD *thd, LEX_STRING *base) + uchar *session_value_ptr(THD *thd, const LEX_STRING *base) { DBUG_ASSERT(false); return NULL; } - uchar *global_value_ptr(THD *thd, LEX_STRING *base); + uchar *global_value_ptr(THD *thd, const LEX_STRING *base); }; @@ -2126,6 +2111,8 @@ public: getopt.arg_type, SHOW_CHAR, 0, NULL, VARIABLE_NOT_IN_BINLOG, NULL, NULL, NULL) { + SYSVAR_ASSERT(getopt.id < 0); + SYSVAR_ASSERT(is_readonly()); option.var_type= GET_STR; } bool do_check(THD *thd, set_var *var) @@ -2143,10 +2130,6 @@ public: DBUG_ASSERT(false); return true; } - bool check_update_type(Item_result type) { - DBUG_ASSERT(false); - return false; - } void session_save_default(THD *thd, set_var *var) { DBUG_ASSERT(false); @@ -2155,12 +2138,12 @@ public: { DBUG_ASSERT(false); } - uchar *session_value_ptr(THD *thd, LEX_STRING *base) + uchar *session_value_ptr(THD *thd, const LEX_STRING *base) { DBUG_ASSERT(false); return NULL; } - uchar *global_value_ptr(THD *thd, LEX_STRING *base); + uchar *global_value_ptr(THD *thd, const LEX_STRING *base); }; @@ -2186,7 +2169,6 @@ public: return true; } bool global_update(THD *thd, set_var *var); - bool check_update_type(Item_result type) { return type != STRING_RESULT; } void session_save_default(THD *thd, set_var *var) { DBUG_ASSERT(false); @@ -2196,12 +2178,14 @@ public: /* Record the attempt to use default so we can error. */ var->value= 0; } - uchar *session_value_ptr(THD *thd, LEX_STRING *base) + uchar *session_value_ptr(THD *thd, const LEX_STRING *base) { DBUG_ASSERT(false); return NULL; } - uchar *global_value_ptr(THD *thd, LEX_STRING *base); + uchar *global_value_ptr(THD *thd, const LEX_STRING *base); + uchar *default_value_ptr(THD *thd) + { return 0; } }; @@ -2227,7 +2211,6 @@ public: return true; } bool global_update(THD *thd, set_var *var); - bool check_update_type(Item_result type) { return type != STRING_RESULT; } void session_save_default(THD *thd, set_var *var) { DBUG_ASSERT(false); @@ -2237,12 +2220,14 @@ public: /* Record the attempt to use default so we can error. */ var->value= 0; } - uchar *session_value_ptr(THD *thd, LEX_STRING *base) + uchar *session_value_ptr(THD *thd, const LEX_STRING *base) { DBUG_ASSERT(false); return NULL; } - uchar *global_value_ptr(THD *thd, LEX_STRING *base); + uchar *global_value_ptr(THD *thd, const LEX_STRING *base); + uchar *default_value_ptr(THD *thd) + { return 0; } }; @@ -2258,6 +2243,8 @@ public: getopt.arg_type, SHOW_CHAR, 0, NULL, VARIABLE_NOT_IN_BINLOG, NULL, NULL, NULL) { + SYSVAR_ASSERT(getopt.id < 0); + SYSVAR_ASSERT(is_readonly()); option.var_type= GET_STR; } bool do_check(THD *thd, set_var *var) @@ -2275,10 +2262,6 @@ public: DBUG_ASSERT(false); return true; } - bool check_update_type(Item_result type) { - DBUG_ASSERT(false); - return false; - } void session_save_default(THD *thd, set_var *var) { DBUG_ASSERT(false); @@ -2287,8 +2270,8 @@ public: { DBUG_ASSERT(false); } - uchar *session_value_ptr(THD *thd, LEX_STRING *base); - uchar *global_value_ptr(THD *thd, LEX_STRING *base) + uchar *session_value_ptr(THD *thd, const LEX_STRING *base); + uchar *global_value_ptr(THD *thd, const LEX_STRING *base) { DBUG_ASSERT(false); return NULL; diff --git a/sql/table.cc b/sql/table.cc index d720a16f6e5..4f642cadaa2 100644 --- a/sql/table.cc +++ b/sql/table.cc @@ -4125,7 +4125,7 @@ void TABLE::reset_item_list(List<Item> *item_list) const void TABLE_LIST::calc_md5(char *buffer) { uchar digest[16]; - compute_md5_hash((char*) digest, select_stmt.str, + compute_md5_hash(digest, select_stmt.str, select_stmt.length); sprintf((char *) buffer, "%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x", diff --git a/sql/table.h b/sql/table.h index b57e9c7227d..eca35d6c52c 100644 --- a/sql/table.h +++ b/sql/table.h @@ -1113,6 +1113,7 @@ public: and max #key parts that range access would use. */ ha_rows quick_rows[MAX_KEY]; + double quick_costs[MAX_KEY]; /* Bitmaps of key parts that =const for the duration of join execution. If @@ -1486,8 +1487,8 @@ typedef struct st_schema_table { const char* table_name; ST_FIELD_INFO *fields_info; - /* Create information_schema table */ - TABLE *(*create_table) (THD *thd, TABLE_LIST *table_list); + /* for FLUSH table_name */ + int (*reset_table) (); /* Fill table with data */ int (*fill_table) (THD *thd, TABLE_LIST *tables, COND *cond); /* Handle fileds for old SHOW */ diff --git a/sql/transaction.cc b/sql/transaction.cc index 933e39ae357..223d507a799 100644 --- a/sql/transaction.cc +++ b/sql/transaction.cc @@ -97,6 +97,8 @@ static bool xa_trans_force_rollback(THD *thd) by ha_rollback()/THD::transaction::cleanup(). */ thd->transaction.xid_state.rm_error= 0; + if (WSREP_ON) + wsrep_register_hton(thd, TRUE); if (ha_rollback_trans(thd, true)) { my_error(ER_XAER_RMERR, MYF(0)); @@ -135,10 +137,14 @@ bool trans_begin(THD *thd, uint flags) (thd->variables.option_bits & OPTION_TABLE_LOCK)) { thd->variables.option_bits&= ~OPTION_TABLE_LOCK; + if (WSREP_ON) + wsrep_register_hton(thd, TRUE); thd->server_status&= ~(SERVER_STATUS_IN_TRANS | SERVER_STATUS_IN_TRANS_READONLY); DBUG_PRINT("info", ("clearing SERVER_STATUS_IN_TRANS")); res= MY_TEST(ha_commit_trans(thd, TRUE)); + if (WSREP_ON) + wsrep_post_commit(thd, TRUE); } thd->variables.option_bits&= ~(OPTION_BEGIN | OPTION_KEEP_LOG); @@ -181,6 +187,12 @@ bool trans_begin(THD *thd, uint flags) thd->tx_read_only= false; } +#ifdef WITH_WSREP + thd->wsrep_PA_safe= true; + if (WSREP_CLIENT(thd) && wsrep_sync_wait(thd)) + DBUG_RETURN(TRUE); +#endif /* WITH_WSREP */ + thd->variables.option_bits|= OPTION_BEGIN; thd->server_status|= SERVER_STATUS_IN_TRANS; if (thd->tx_read_only) @@ -212,10 +224,14 @@ bool trans_commit(THD *thd) if (trans_check(thd)) DBUG_RETURN(TRUE); + if (WSREP_ON) + wsrep_register_hton(thd, TRUE); thd->server_status&= ~(SERVER_STATUS_IN_TRANS | SERVER_STATUS_IN_TRANS_READONLY); DBUG_PRINT("info", ("clearing SERVER_STATUS_IN_TRANS")); res= ha_commit_trans(thd, TRUE); + if (WSREP_ON) + wsrep_post_commit(thd, TRUE); /* if res is non-zero, then ha_commit_trans has rolled back the transaction, so the hooks for rollback will be called. @@ -261,10 +277,14 @@ bool trans_commit_implicit(THD *thd) /* Safety if one did "drop table" on locked tables */ if (!thd->locked_tables_mode) thd->variables.option_bits&= ~OPTION_TABLE_LOCK; + if (WSREP_ON) + wsrep_register_hton(thd, TRUE); thd->server_status&= ~(SERVER_STATUS_IN_TRANS | SERVER_STATUS_IN_TRANS_READONLY); DBUG_PRINT("info", ("clearing SERVER_STATUS_IN_TRANS")); res= MY_TEST(ha_commit_trans(thd, TRUE)); + if (WSREP_ON) + wsrep_post_commit(thd, TRUE); } thd->variables.option_bits&= ~(OPTION_BEGIN | OPTION_KEEP_LOG); @@ -297,9 +317,14 @@ bool trans_rollback(THD *thd) int res; DBUG_ENTER("trans_rollback"); +#ifdef WITH_WSREP + thd->wsrep_PA_safe= true; +#endif /* WITH_WSREP */ if (trans_check(thd)) DBUG_RETURN(TRUE); + if (WSREP_ON) + wsrep_register_hton(thd, TRUE); thd->server_status&= ~(SERVER_STATUS_IN_TRANS | SERVER_STATUS_IN_TRANS_READONLY); DBUG_PRINT("info", ("clearing SERVER_STATUS_IN_TRANS")); @@ -390,11 +415,15 @@ bool trans_commit_stmt(THD *thd) if (thd->transaction.stmt.ha_list) { + if (WSREP_ON) + wsrep_register_hton(thd, FALSE); res= ha_commit_trans(thd, FALSE); if (! thd->in_active_multi_stmt_transaction()) { thd->tx_isolation= (enum_tx_isolation) thd->variables.tx_isolation; thd->tx_read_only= thd->variables.tx_read_only; + if (WSREP_ON) + wsrep_post_commit(thd, FALSE); } } @@ -435,6 +464,8 @@ bool trans_rollback_stmt(THD *thd) if (thd->transaction.stmt.ha_list) { + if (WSREP_ON) + wsrep_register_hton(thd, FALSE); ha_rollback_trans(thd, FALSE); if (! thd->in_active_multi_stmt_transaction()) { @@ -812,9 +843,13 @@ bool trans_xa_commit(THD *thd) } else if (xa_state == XA_IDLE && thd->lex->xa_opt == XA_ONE_PHASE) { + if (WSREP_ON) + wsrep_register_hton(thd, TRUE); int r= ha_commit_trans(thd, TRUE); if ((res= MY_TEST(r))) my_error(r == 1 ? ER_XA_RBROLLBACK : ER_XAER_RMERR, MYF(0)); + if (WSREP_ON) + wsrep_post_commit(thd, TRUE); } else if (xa_state == XA_PREPARED && thd->lex->xa_opt == XA_NONE) { @@ -833,6 +868,8 @@ bool trans_xa_commit(THD *thd) if (thd->mdl_context.acquire_lock(&mdl_request, thd->variables.lock_wait_timeout)) { + if (WSREP_ON) + wsrep_register_hton(thd, TRUE); ha_rollback_trans(thd, TRUE); my_error(ER_XAER_RMERR, MYF(0)); } diff --git a/sql/tztime.cc b/sql/tztime.cc index d3b4fec6335..24e61588a06 100644 --- a/sql/tztime.cc +++ b/sql/tztime.cc @@ -2708,6 +2708,13 @@ main(int argc, char **argv) free_defaults(default_argv); return 1; } + + // Replicate MyISAM DDL for this session, cf. lp:1161432 + // timezone info unfixable in XtraDB Cluster + printf("set @prep=if((select count(*) from information_schema.global_variables where variable_name='wsrep_on'), 'SET GLOBAL wsrep_replicate_myisam=?', 'do ?');\n" + "prepare set_wsrep_myisam from @prep;\n" + "set @toggle=1; execute set_wsrep_myisam using @toggle;\n"); + if (argc == 1 && !opt_leap) { /* Argument is timezonedir */ @@ -2755,6 +2762,9 @@ main(int argc, char **argv) free_root(&tz_storage, MYF(0)); } + // Reset wsrep_replicate_myisam. lp:1161432 + printf("set @toggle=0; execute set_wsrep_myisam using @toggle;\n"); + free_defaults(default_argv); my_end(0); return 0; diff --git a/sql/wsrep_applier.cc b/sql/wsrep_applier.cc new file mode 100644 index 00000000000..23687e98c32 --- /dev/null +++ b/sql/wsrep_applier.cc @@ -0,0 +1,387 @@ +/* Copyright (C) 2013 Codership Oy <info@codership.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ + +#include "wsrep_priv.h" +#include "wsrep_binlog.h" // wsrep_dump_rbr_buf() + +#include "log_event.h" // EVENT_LEN_OFFSET, etc. +#include "wsrep_applier.h" + +/* + read the first event from (*buf). The size of the (*buf) is (*buf_len). + At the end (*buf) is shitfed to point to the following event or NULL and + (*buf_len) will be changed to account just being read bytes of the 1st event. +*/ + +static Log_event* wsrep_read_log_event( + char **arg_buf, size_t *arg_buf_len, + const Format_description_log_event *description_event) +{ + DBUG_ENTER("wsrep_read_log_event"); + char *head= (*arg_buf); + + uint data_len = uint4korr(head + EVENT_LEN_OFFSET); + char *buf= (*arg_buf); + const char *error= 0; + Log_event *res= 0; + + if (data_len > wsrep_max_ws_size) + { + error = "Event too big"; + goto err; + } + + res= Log_event::read_log_event(buf, data_len, &error, description_event, true); + +err: + if (!res) + { + DBUG_ASSERT(error != 0); + sql_print_error("Error in Log_event::read_log_event(): " + "'%s', data_len: %d, event_type: %d", + error,data_len,head[EVENT_TYPE_OFFSET]); + } + (*arg_buf)+= data_len; + (*arg_buf_len)-= data_len; + DBUG_RETURN(res); +} + +#include "transaction.h" // trans_commit(), trans_rollback() +#include "rpl_rli.h" // class Relay_log_info; +#include "sql_base.h" // close_temporary_table() + +static inline void +wsrep_set_apply_format(THD* thd, Format_description_log_event* ev) +{ + if (thd->wsrep_apply_format) + { + delete (Format_description_log_event*)thd->wsrep_apply_format; + } + thd->wsrep_apply_format= ev; +} + +static inline Format_description_log_event* +wsrep_get_apply_format(THD* thd) +{ + if (thd->wsrep_apply_format) + return (Format_description_log_event*) thd->wsrep_apply_format; + /* TODO: mariadb does not support rli->get_rli_description_event() + * => look for alternative way to remember last FDE in replication + */ + //return thd->wsrep_rli->get_rli_description_event(); + thd->wsrep_apply_format = new Format_description_log_event(4); + return (Format_description_log_event*) thd->wsrep_apply_format; +} + +static wsrep_cb_status_t wsrep_apply_events(THD* thd, + const void* events_buf, + size_t buf_len) +{ + char *buf= (char *)events_buf; + int rcode= 0; + int event= 1; + + DBUG_ENTER("wsrep_apply_events"); + + if (thd->killed == KILL_CONNECTION) + { + WSREP_INFO("applier has been aborted, skipping apply_rbr: %lld", + (long long) wsrep_thd_trx_seqno(thd)); + DBUG_RETURN(WSREP_CB_FAILURE); + } + + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + thd->wsrep_query_state= QUERY_EXEC; + if (thd->wsrep_conflict_state!= REPLAYING) + thd->wsrep_conflict_state= NO_CONFLICT; + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + + if (!buf_len) WSREP_DEBUG("empty rbr buffer to apply: %lld", + (long long) wsrep_thd_trx_seqno(thd)); + + while(buf_len) + { + int exec_res; + Log_event* ev= wsrep_read_log_event(&buf, &buf_len, + wsrep_get_apply_format(thd)); + + if (!ev) + { + WSREP_ERROR("applier could not read binlog event, seqno: %lld, len: %zu", + (long long)wsrep_thd_trx_seqno(thd), buf_len); + rcode= 1; + goto error; + } + + switch (ev->get_type_code()) { + case FORMAT_DESCRIPTION_EVENT: + wsrep_set_apply_format(thd, (Format_description_log_event*)ev); + continue; +#ifdef GTID_SUPPORT + case GTID_LOG_EVENT: + { + Gtid_log_event* gev= (Gtid_log_event*)ev; + if (gev->get_gno() == 0) + { + /* Skip GTID log event to make binlog to generate LTID on commit */ + delete ev; + continue; + } + } +#endif /* GTID_SUPPORT */ + default: + break; + } + + thd->set_server_id(ev->server_id); // use the original server id for logging + thd->set_time(); // time the query + wsrep_xid_init(&thd->transaction.xid_state.xid, + &thd->wsrep_trx_meta.gtid.uuid, + thd->wsrep_trx_meta.gtid.seqno); + thd->lex->current_select= 0; + if (!ev->when) + { + my_hrtime_t hrtime= my_hrtime(); + ev->when= hrtime_to_my_time(hrtime); + ev->when_sec_part= hrtime_sec_part(hrtime); + } + + ev->thd = thd; + //exec_res = ev->apply_event(thd->wsrep_rli); + exec_res = ev->apply_event(thd->wsrep_rgi); + DBUG_PRINT("info", ("exec_event result: %d", exec_res)); + + if (exec_res) + { + WSREP_WARN("RBR event %d %s apply warning: %d, %lld", + event, ev->get_type_str(), exec_res, + (long long) wsrep_thd_trx_seqno(thd)); + rcode= exec_res; + /* stop processing for the first error */ + delete ev; + goto error; + } + event++; + + if (thd->wsrep_conflict_state!= NO_CONFLICT && + thd->wsrep_conflict_state!= REPLAYING) + WSREP_WARN("conflict state after RBR event applying: %d, %lld", + thd->wsrep_query_state, (long long)wsrep_thd_trx_seqno(thd)); + + if (thd->wsrep_conflict_state == MUST_ABORT) { + WSREP_WARN("RBR event apply failed, rolling back: %lld", + (long long) wsrep_thd_trx_seqno(thd)); + trans_rollback(thd); + thd->locked_tables_list.unlock_locked_tables(thd); + /* Release transactional metadata locks. */ + thd->mdl_context.release_transactional_locks(); + thd->wsrep_conflict_state= NO_CONFLICT; + DBUG_RETURN(WSREP_CB_FAILURE); + } + + delete ev; + } + + error: + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + thd->wsrep_query_state= QUERY_IDLE; + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + + assert(thd->wsrep_exec_mode== REPL_RECV); + + if (thd->killed == KILL_CONNECTION) + WSREP_INFO("applier aborted: %lld", (long long)wsrep_thd_trx_seqno(thd)); + + if (rcode) DBUG_RETURN(WSREP_CB_FAILURE); + DBUG_RETURN(WSREP_CB_SUCCESS); +} + +wsrep_cb_status_t wsrep_apply_cb(void* const ctx, + const void* const buf, + size_t const buf_len, + uint32_t const flags, + const wsrep_trx_meta_t* meta) +{ + THD* const thd((THD*)ctx); + + thd->wsrep_trx_meta = *meta; + +#ifdef WSREP_PROC_INFO + snprintf(thd->wsrep_info, sizeof(thd->wsrep_info) - 1, + "applying write set %lld: %p, %zu", + (long long)wsrep_thd_trx_seqno(thd), buf, buf_len); + thd_proc_info(thd, thd->wsrep_info); +#else + thd_proc_info(thd, "applying write set"); +#endif /* WSREP_PROC_INFO */ + + /* tune FK and UK checking policy */ + if (wsrep_slave_UK_checks == FALSE) + thd->variables.option_bits|= OPTION_RELAXED_UNIQUE_CHECKS; + else + thd->variables.option_bits&= ~OPTION_RELAXED_UNIQUE_CHECKS; + + if (wsrep_slave_FK_checks == FALSE) + thd->variables.option_bits|= OPTION_NO_FOREIGN_KEY_CHECKS; + else + thd->variables.option_bits&= ~OPTION_NO_FOREIGN_KEY_CHECKS; + + if (flags & WSREP_FLAG_ISOLATION) + { + thd->wsrep_apply_toi= true; + /* + Don't run in transaction mode with TOI actions. + */ + thd->variables.option_bits&= ~OPTION_BEGIN; + thd->server_status&= ~SERVER_STATUS_IN_TRANS; + } + wsrep_cb_status_t rcode(wsrep_apply_events(thd, buf, buf_len)); + +#ifdef WSREP_PROC_INFO + snprintf(thd->wsrep_info, sizeof(thd->wsrep_info) - 1, + "applied write set %lld", (long long)wsrep_thd_trx_seqno(thd)); + thd_proc_info(thd, thd->wsrep_info); +#else + thd_proc_info(thd, "applied write set"); +#endif /* WSREP_PROC_INFO */ + + if (WSREP_CB_SUCCESS != rcode) + { + wsrep_dump_rbr_buf(thd, buf, buf_len); + } + + TABLE *tmp; + while ((tmp = thd->temporary_tables)) + { + WSREP_DEBUG("Applier %lu, has temporary tables: %s.%s", + thd->thread_id, + (tmp->s) ? tmp->s->db.str : "void", + (tmp->s) ? tmp->s->table_name.str : "void"); + close_temporary_table(thd, tmp, 1, 1); + } + + return rcode; +} + +static wsrep_cb_status_t wsrep_commit(THD* const thd, + wsrep_seqno_t const global_seqno) +{ +#ifdef WSREP_PROC_INFO + snprintf(thd->wsrep_info, sizeof(thd->wsrep_info) - 1, + "committing %lld", (long long)wsrep_thd_trx_seqno(thd)); + thd_proc_info(thd, thd->wsrep_info); +#else + thd_proc_info(thd, "committing"); +#endif /* WSREP_PROC_INFO */ + + wsrep_cb_status_t const rcode(trans_commit(thd) ? + WSREP_CB_FAILURE : WSREP_CB_SUCCESS); + +#ifdef WSREP_PROC_INFO + snprintf(thd->wsrep_info, sizeof(thd->wsrep_info) - 1, + "committed %lld", (long long)wsrep_thd_trx_seqno(thd)); + thd_proc_info(thd, thd->wsrep_info); +#else + thd_proc_info(thd, "committed"); +#endif /* WSREP_PROC_INFO */ + + if (WSREP_CB_SUCCESS == rcode) + { + thd->wsrep_rgi->cleanup_context(thd, false); +#ifdef GTID_SUPPORT + thd->variables.gtid_next.set_automatic(); +#endif /* GTID_SUPPORT */ + // TODO: mark snapshot with global_seqno. + } + + return rcode; +} + +static wsrep_cb_status_t wsrep_rollback(THD* const thd, + wsrep_seqno_t const global_seqno) +{ +#ifdef WSREP_PROC_INFO + snprintf(thd->wsrep_info, sizeof(thd->wsrep_info) - 1, + "rolling back %lld", (long long)wsrep_thd_trx_seqno(thd)); + thd_proc_info(thd, thd->wsrep_info); +#else + thd_proc_info(thd, "rolling back"); +#endif /* WSREP_PROC_INFO */ + + wsrep_cb_status_t const rcode(trans_rollback(thd) ? + WSREP_CB_FAILURE : WSREP_CB_SUCCESS); + +#ifdef WSREP_PROC_INFO + snprintf(thd->wsrep_info, sizeof(thd->wsrep_info) - 1, + "rolled back %lld", (long long)wsrep_thd_trx_seqno(thd)); + thd_proc_info(thd, thd->wsrep_info); +#else + thd_proc_info(thd, "rolled back"); +#endif /* WSREP_PROC_INFO */ + + return rcode; +} + +wsrep_cb_status_t wsrep_commit_cb(void* const ctx, + uint32_t const flags, + const wsrep_trx_meta_t* meta, + wsrep_bool_t* const exit, + bool const commit) +{ + THD* const thd((THD*)ctx); + + assert(meta->gtid.seqno == wsrep_thd_trx_seqno(thd)); + + wsrep_cb_status_t rcode; + + if (commit) + rcode = wsrep_commit(thd, meta->gtid.seqno); + else + rcode = wsrep_rollback(thd, meta->gtid.seqno); + + wsrep_set_apply_format(thd, NULL); + thd->mdl_context.release_transactional_locks(); + free_root(thd->mem_root,MYF(MY_KEEP_PREALLOC)); + thd->tx_isolation= (enum_tx_isolation) thd->variables.tx_isolation; + + if (wsrep_slave_count_change < 0 && commit && WSREP_CB_SUCCESS == rcode) + { + mysql_mutex_lock(&LOCK_wsrep_slave_threads); + if (wsrep_slave_count_change < 0) + { + wsrep_slave_count_change++; + *exit = true; + } + mysql_mutex_unlock(&LOCK_wsrep_slave_threads); + } + + if (*exit == false && thd->wsrep_applier) + { + /* From trans_begin() */ + thd->variables.option_bits|= OPTION_BEGIN; + thd->server_status|= SERVER_STATUS_IN_TRANS; + thd->wsrep_apply_toi= false; + } + + return rcode; +} + + +wsrep_cb_status_t wsrep_unordered_cb(void* const ctx, + const void* const data, + size_t const size) +{ + return WSREP_CB_SUCCESS; +} diff --git a/sql/wsrep_applier.h b/sql/wsrep_applier.h new file mode 100644 index 00000000000..816970db67c --- /dev/null +++ b/sql/wsrep_applier.h @@ -0,0 +1,38 @@ +/* Copyright 2013 Codership Oy <http://www.codership.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#ifndef WSREP_APPLIER_H +#define WSREP_APPLIER_H + +#include <sys/types.h> + +/* wsrep callback prototypes */ + +wsrep_cb_status_t wsrep_apply_cb(void *ctx, + const void* buf, size_t buf_len, + uint32_t flags, + const wsrep_trx_meta_t* meta); + +wsrep_cb_status_t wsrep_commit_cb(void *ctx, + uint32_t flags, + const wsrep_trx_meta_t* meta, + wsrep_bool_t* exit, + bool commit); + +wsrep_cb_status_t wsrep_unordered_cb(void* ctx, + const void* data, + size_t size); + +#endif /* WSREP_APPLIER_H */ diff --git a/sql/wsrep_binlog.cc b/sql/wsrep_binlog.cc new file mode 100644 index 00000000000..ee8a9cb130b --- /dev/null +++ b/sql/wsrep_binlog.cc @@ -0,0 +1,414 @@ +/* Copyright (C) 2013 Codership Oy <info@codership.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ + +#include "wsrep_binlog.h" +#include "wsrep_priv.h" +#include "log.h" + +extern handlerton *binlog_hton; +/* + Write the contents of a cache to a memory buffer. + + This function quite the same as MYSQL_BIN_LOG::write_cache(), + with the exception that here we write in buffer instead of log file. + */ +int wsrep_write_cache_buf(IO_CACHE *cache, uchar **buf, size_t *buf_len) +{ + *buf= NULL; + *buf_len= 0; + + my_off_t const saved_pos(my_b_tell(cache)); + + if (reinit_io_cache(cache, READ_CACHE, 0, 0, 0)) + { + WSREP_ERROR("failed to initialize io-cache"); + return ER_ERROR_ON_WRITE; + } + + uint length = my_b_bytes_in_cache(cache); + if (unlikely(0 == length)) length = my_b_fill(cache); + + size_t total_length = 0; + + if (likely(length > 0)) do + { + total_length += length; + /* + Bail out if buffer grows too large. + A temporary fix to avoid allocating indefinitely large buffer, + not a real limit on a writeset size which includes other things + like header and keys. + */ + if (total_length > wsrep_max_ws_size) + { + WSREP_WARN("transaction size limit (%lu) exceeded: %zu", + wsrep_max_ws_size, total_length); + goto error; + } + uchar* tmp = (uchar *)my_realloc(*buf, total_length, + MYF(MY_ALLOW_ZERO_PTR)); + if (!tmp) + { + WSREP_ERROR("could not (re)allocate buffer: %zu + %u", + *buf_len, length); + goto error; + } + *buf = tmp; + + memcpy(*buf + *buf_len, cache->read_pos, length); + *buf_len = total_length; + cache->read_pos = cache->read_end; + } while ((cache->file >= 0) && (length = my_b_fill(cache))); + + if (reinit_io_cache(cache, WRITE_CACHE, saved_pos, 0, 0)) + { + WSREP_WARN("failed to initialize io-cache"); + goto cleanup; + } + + return 0; + +error: + if (reinit_io_cache(cache, WRITE_CACHE, saved_pos, 0, 0)) + { + WSREP_WARN("failed to initialize io-cache"); + } +cleanup: + my_free(*buf); + *buf= NULL; + *buf_len= 0; + return ER_ERROR_ON_WRITE; +} + +#define STACK_SIZE 4096 /* 4K - for buffer preallocated on the stack: + * many transactions would fit in there + * so there is no need to reach for the heap */ + +/* Returns minimum multiple of HEAP_PAGE_SIZE that is >= length */ +static inline size_t +heap_size(size_t length) +{ + return (length + HEAP_PAGE_SIZE - 1)/HEAP_PAGE_SIZE*HEAP_PAGE_SIZE; +} + +/* append data to writeset */ +static inline wsrep_status_t +wsrep_append_data(wsrep_t* const wsrep, + wsrep_ws_handle_t* const ws, + const void* const data, + size_t const len) +{ + struct wsrep_buf const buff = { data, len }; + wsrep_status_t const rc(wsrep->append_data(wsrep, ws, &buff, 1, + WSREP_DATA_ORDERED, true)); + if (rc != WSREP_OK) + { + WSREP_WARN("append_data() returned %d", rc); + } + + return rc; +} + +/* + Write the contents of a cache to wsrep provider. + + This function quite the same as MYSQL_BIN_LOG::write_cache(), + with the exception that here we write in buffer instead of log file. + + This version reads all of cache into single buffer and then appends to a + writeset at once. + */ +static int wsrep_write_cache_once(wsrep_t* const wsrep, + THD* const thd, + IO_CACHE* const cache, + size_t* const len) +{ + my_off_t const saved_pos(my_b_tell(cache)); + + if (reinit_io_cache(cache, READ_CACHE, 0, 0, 0)) + { + WSREP_ERROR("failed to initialize io-cache"); + return ER_ERROR_ON_WRITE; + } + + int err(WSREP_OK); + + size_t total_length(0); + uchar stack_buf[STACK_SIZE]; /* to avoid dynamic allocations for few data*/ + uchar* heap_buf(NULL); + uchar* buf(stack_buf); + size_t allocated(sizeof(stack_buf)); + size_t used(0); + + uint length(my_b_bytes_in_cache(cache)); + if (unlikely(0 == length)) length = my_b_fill(cache); + + if (likely(length > 0)) do + { + total_length += length; + /* + Bail out if buffer grows too large. + A temporary fix to avoid allocating indefinitely large buffer, + not a real limit on a writeset size which includes other things + like header and keys. + */ + if (unlikely(total_length > wsrep_max_ws_size)) + { + WSREP_WARN("transaction size limit (%lu) exceeded: %zu", + wsrep_max_ws_size, total_length); + err = WSREP_TRX_SIZE_EXCEEDED; + goto cleanup; + } + + if (total_length > allocated) + { + size_t const new_size(heap_size(total_length)); + uchar* tmp = (uchar *)my_realloc(heap_buf, new_size, + MYF(MY_ALLOW_ZERO_PTR)); + if (!tmp) + { + WSREP_ERROR("could not (re)allocate buffer: %zu + %u", + allocated, length); + err = WSREP_TRX_SIZE_EXCEEDED; + goto cleanup; + } + + heap_buf = tmp; + buf = heap_buf; + allocated = new_size; + + if (used <= STACK_SIZE && used > 0) // there's data in stack_buf + { + DBUG_ASSERT(buf == stack_buf); + memcpy(heap_buf, stack_buf, used); + } + } + + memcpy(buf + used, cache->read_pos, length); + used = total_length; + cache->read_pos = cache->read_end; + } while ((cache->file >= 0) && (length = my_b_fill(cache))); + + if (used > 0) + err = wsrep_append_data(wsrep, &thd->wsrep_ws_handle, buf, used); + + if (WSREP_OK == err) *len = total_length; + +cleanup: + if (reinit_io_cache(cache, WRITE_CACHE, saved_pos, 0, 0)) + { + WSREP_ERROR("failed to reinitialize io-cache"); + } + + if (unlikely(WSREP_OK != err)) wsrep_dump_rbr_buf(thd, buf, used); + + my_free(heap_buf); + return err; +} + +/* + Write the contents of a cache to wsrep provider. + + This function quite the same as MYSQL_BIN_LOG::write_cache(), + with the exception that here we write in buffer instead of log file. + + This version uses incremental data appending as it reads it from cache. + */ +static int wsrep_write_cache_inc(wsrep_t* const wsrep, + THD* const thd, + IO_CACHE* const cache, + size_t* const len) +{ + my_off_t const saved_pos(my_b_tell(cache)); + + if (reinit_io_cache(cache, READ_CACHE, 0, 0, 0)) + { + WSREP_ERROR("failed to initialize io-cache"); + return WSREP_TRX_ERROR; + } + + int err(WSREP_OK); + + size_t total_length(0); + + uint length(my_b_bytes_in_cache(cache)); + if (unlikely(0 == length)) length = my_b_fill(cache); + + if (likely(length > 0)) do + { + total_length += length; + /* bail out if buffer grows too large + not a real limit on a writeset size which includes other things + like header and keys. + */ + if (unlikely(total_length > wsrep_max_ws_size)) + { + WSREP_WARN("transaction size limit (%lu) exceeded: %zu", + wsrep_max_ws_size, total_length); + err = WSREP_TRX_SIZE_EXCEEDED; + goto cleanup; + } + + if(WSREP_OK != (err=wsrep_append_data(wsrep, &thd->wsrep_ws_handle, + cache->read_pos, length))) + goto cleanup; + + cache->read_pos = cache->read_end; + } while ((cache->file >= 0) && (length = my_b_fill(cache))); + + if (WSREP_OK == err) *len = total_length; + +cleanup: + if (reinit_io_cache(cache, WRITE_CACHE, saved_pos, 0, 0)) + { + WSREP_ERROR("failed to reinitialize io-cache"); + } + + return err; +} + +/* + Write the contents of a cache to wsrep provider. + + This function quite the same as MYSQL_BIN_LOG::write_cache(), + with the exception that here we write in buffer instead of log file. + */ +int wsrep_write_cache(wsrep_t* const wsrep, + THD* const thd, + IO_CACHE* const cache, + size_t* const len) +{ + if (wsrep_incremental_data_collection) { + return wsrep_write_cache_inc(wsrep, thd, cache, len); + } + else { + return wsrep_write_cache_once(wsrep, thd, cache, len); + } +} + +void wsrep_dump_rbr_buf(THD *thd, const void* rbr_buf, size_t buf_len) +{ + char filename[PATH_MAX]= {0}; + int len= snprintf(filename, PATH_MAX, "%s/GRA_%ld_%lld.log", + wsrep_data_home_dir, thd->thread_id, + (long long)wsrep_thd_trx_seqno(thd)); + if (len >= PATH_MAX) + { + WSREP_ERROR("RBR dump path too long: %d, skipping dump.", len); + return; + } + + FILE *of= fopen(filename, "wb"); + if (of) + { + fwrite (rbr_buf, buf_len, 1, of); + fclose(of); + } + else + { + WSREP_ERROR("Failed to open file '%s': %d (%s)", + filename, errno, strerror(errno)); + } +} + +/* + wsrep exploits binlog's caches even if binlogging itself is not + activated. In such case connection close needs calling + actual binlog's method. + Todo: split binlog hton from its caches to use ones by wsrep + without referring to binlog's stuff. +*/ +int wsrep_binlog_close_connection(THD* thd) +{ + DBUG_ENTER("wsrep_binlog_close_connection"); + if (thd_get_ha_data(thd, binlog_hton) != NULL) + binlog_hton->close_connection (binlog_hton, thd); + DBUG_RETURN(0); +} + +int wsrep_binlog_savepoint_set(THD *thd, void *sv) +{ + if (!wsrep_emulate_bin_log) return 0; + int rcode = binlog_hton->savepoint_set(binlog_hton, thd, sv); + return rcode; +} + +int wsrep_binlog_savepoint_rollback(THD *thd, void *sv) +{ + if (!wsrep_emulate_bin_log) return 0; + int rcode = binlog_hton->savepoint_rollback(binlog_hton, thd, sv); + return rcode; +} + +void wsrep_dump_rbr_direct(THD* thd, IO_CACHE* cache) +{ + char filename[PATH_MAX]= {0}; + int len= snprintf(filename, PATH_MAX, "%s/GRA_%ld_%lld.log", + wsrep_data_home_dir, thd->thread_id, + (long long)wsrep_thd_trx_seqno(thd)); + size_t bytes_in_cache = 0; + // check path + if (len >= PATH_MAX) + { + WSREP_ERROR("RBR dump path too long: %d, skipping dump.", len); + return ; + } + // init cache + my_off_t const saved_pos(my_b_tell(cache)); + if (reinit_io_cache(cache, READ_CACHE, 0, 0, 0)) + { + WSREP_ERROR("failed to initialize io-cache"); + return ; + } + // open file + FILE* of = fopen(filename, "wb"); + if (!of) + { + WSREP_ERROR("Failed to open file '%s': %d (%s)", + filename, errno, strerror(errno)); + goto cleanup; + } + // ready to write + bytes_in_cache= my_b_bytes_in_cache(cache); + if (unlikely(bytes_in_cache == 0)) bytes_in_cache = my_b_fill(cache); + if (likely(bytes_in_cache > 0)) do + { + if (my_fwrite(of, cache->read_pos, bytes_in_cache, + MYF(MY_WME | MY_NABP)) == (size_t) -1) + { + WSREP_ERROR("Failed to write file '%s'", filename); + goto cleanup; + } + cache->read_pos= cache->read_end; + } while ((cache->file >= 0) && (bytes_in_cache= my_b_fill(cache))); + if(cache->error == -1) + { + WSREP_ERROR("RBR inconsistent"); + goto cleanup; + } +cleanup: + // init back + if (reinit_io_cache(cache, WRITE_CACHE, saved_pos, 0, 0)) + { + WSREP_ERROR("failed to reinitialize io-cache"); + } + // close file + if (of) fclose(of); +} + +void thd_binlog_flush_pending_rows_event(THD *thd, bool stmt_end) +{ + thd->binlog_flush_pending_rows_event(stmt_end); +} diff --git a/sql/wsrep_binlog.h b/sql/wsrep_binlog.h new file mode 100644 index 00000000000..a3d8ec6ec2c --- /dev/null +++ b/sql/wsrep_binlog.h @@ -0,0 +1,59 @@ +/* Copyright (C) 2013 Codership Oy <info@codership.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ + +#ifndef WSREP_BINLOG_H +#define WSREP_BINLOG_H + +#include "sql_class.h" // THD, IO_CACHE + +#define HEAP_PAGE_SIZE 65536 /* 64K */ +#define WSREP_MAX_WS_SIZE (0xFFFFFFFFUL - HEAP_PAGE_SIZE) + +/* + Write the contents of a cache to a memory buffer. + + This function quite the same as MYSQL_BIN_LOG::write_cache(), + with the exception that here we write in buffer instead of log file. + */ +int wsrep_write_cache_buf(IO_CACHE *cache, uchar **buf, size_t *buf_len); + +/* + Write the contents of a cache to wsrep provider. + + This function quite the same as MYSQL_BIN_LOG::write_cache(), + with the exception that here we write in buffer instead of log file. + + @param len total amount of data written + @return wsrep error status + */ +int wsrep_write_cache (wsrep_t* wsrep, + THD* thd, + IO_CACHE* cache, + size_t* len); + +/* Dump replication buffer to disk */ +void wsrep_dump_rbr_buf(THD *thd, const void* rbr_buf, size_t buf_len); + +/* Dump replication buffer to disk without intermediate buffer */ +void wsrep_dump_rbr_direct(THD* thd, IO_CACHE* cache); + +int wsrep_binlog_close_connection(THD* thd); +int wsrep_binlog_savepoint_set(THD *thd, void *sv); +int wsrep_binlog_savepoint_rollback(THD *thd, void *sv); + +/* Dump replication buffer to disk without intermediate buffer */ +void wsrep_dump_rbr_direct(THD* thd, IO_CACHE* cache); + +#endif /* WSREP_BINLOG_H */ diff --git a/sql/wsrep_check_opts.cc b/sql/wsrep_check_opts.cc new file mode 100644 index 00000000000..119813bfa04 --- /dev/null +++ b/sql/wsrep_check_opts.cc @@ -0,0 +1,102 @@ +/* Copyright 2011 Codership Oy <http://www.codership.com> + Copyright 2014 SkySQL Ab. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include "mysqld.h" +#include "sys_vars_shared.h" +#include "wsrep.h" +#include "wsrep_sst.h" +//#include <sql_class.h> +//#include "wsrep_mysqld.h" + +extern char *my_bind_addr_str; + +int wsrep_check_opts() +{ + if (wsrep_slave_threads > 1) + { + sys_var *autoinc_lock_mode= + intern_find_sys_var(STRING_WITH_LEN("innodb_autoinc_lock_mode")); + bool is_null; + if (autoinc_lock_mode && + autoinc_lock_mode->val_int(&is_null, 0, OPT_GLOBAL, 0) != 2) + { + WSREP_ERROR("Parallel applying (wsrep_slave_threads > 1) requires" + " innodb_autoinc_lock_mode = 2."); + return 1; + } + } + + if (locked_in_memory) + { + WSREP_ERROR("Memory locking is not supported (locked_in_memory=ON)"); + return 1; + } + + if (!strcasecmp(wsrep_sst_method, "mysqldump")) + { + if (!strcasecmp(my_bind_addr_str, "127.0.0.1") || + !strcasecmp(my_bind_addr_str, "localhost")) + { + WSREP_ERROR("wsrep_sst_method is set to 'mysqldump' yet " + "mysqld bind_address is set to '%s', which makes it " + "impossible to receive state transfer from another " + "node, since mysqld won't accept such connections. " + "If you wish to use mysqldump state transfer method, " + "set bind_address to allow mysql client connections " + "from other cluster members (e.g. 0.0.0.0).", + my_bind_addr_str); + return 1; + } + } + else + { + // non-mysqldump SST requires wsrep_cluster_address on startup + if (!wsrep_cluster_address || !wsrep_cluster_address[0]) + { + WSREP_ERROR ("%s SST method requires wsrep_cluster_address to be " + "configured on startup.", wsrep_sst_method); + return 1; + } + } + + if (strcasecmp(wsrep_sst_receive_address, "AUTO")) + { + if (!strncasecmp(wsrep_sst_receive_address, STRING_WITH_LEN("127.0.0.1")) || + !strncasecmp(wsrep_sst_receive_address, STRING_WITH_LEN("localhost"))) + { + WSREP_WARN("wsrep_sst_receive_address is set to '%s' which " + "makes it impossible for another host to reach this " + "one. Please set it to the address which this node " + "can be connected at by other cluster members.", + wsrep_sst_receive_address); + } + } + + if (strcasecmp(wsrep_provider, "NONE")) + { + if (global_system_variables.binlog_format != BINLOG_FORMAT_ROW) + { + WSREP_ERROR("Only binlog_format = 'ROW' is currently supported. " + "Configured value: '%s'. Please adjust your " + "configuration.", + binlog_format_names[global_system_variables.binlog_format]); + + return 1; + } + } + return 0; +} + diff --git a/sql/wsrep_dummy.cc b/sql/wsrep_dummy.cc new file mode 100644 index 00000000000..e937ef9be5c --- /dev/null +++ b/sql/wsrep_dummy.cc @@ -0,0 +1,126 @@ +/* Copyright (C) 2014 SkySQL Ab. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ + +#include <my_global.h> +#include <sql_class.h> +#include <mysql/service_wsrep.h> + +my_bool wsrep_thd_is_BF(THD *, my_bool) +{ return 0; } + +int wsrep_trx_order_before(THD *, THD *) +{ return 0; } + +enum wsrep_conflict_state wsrep_thd_conflict_state(THD *, my_bool) +{ return NO_CONFLICT; } + +int wsrep_is_wsrep_xid(const XID*) +{ return 0; } + +bool wsrep_prepare_key(const uchar*, size_t, const uchar*, size_t, struct wsrep_buf*, size_t*) +{ return 0; } + +struct wsrep *get_wsrep() +{ return 0; } + +my_bool get_wsrep_certify_nonPK() +{ return 0; } + +my_bool get_wsrep_debug() +{ return 0; } + +my_bool get_wsrep_drupal_282555_workaround() +{ return 0; } + +my_bool get_wsrep_load_data_splitting() +{ return 0; } + +my_bool get_wsrep_log_conflicts() +{ return 0; } + +long get_wsrep_protocol_version() +{ return 0; } + +my_bool wsrep_aborting_thd_contains(THD *) +{ return 0; } + +void wsrep_aborting_thd_enqueue(THD *) +{ } + +bool wsrep_consistency_check(THD *) +{ return 0; } + +void wsrep_lock_rollback() +{ } + +int wsrep_on(THD *thd) +{ return 0; } + +void wsrep_post_commit(THD*, bool) +{ } + +enum wsrep_trx_status wsrep_run_wsrep_commit(THD *, handlerton *, bool) +{ return WSREP_TRX_ERROR; } + +void wsrep_thd_LOCK(THD *) +{ } + +void wsrep_thd_UNLOCK(THD *) +{ } + +void wsrep_thd_awake(THD *, my_bool) +{ } + +const char *wsrep_thd_conflict_state_str(THD *) +{ return 0; } + +enum wsrep_exec_mode wsrep_thd_exec_mode(THD *) +{ return LOCAL_STATE; } + +const char *wsrep_thd_exec_mode_str(THD *) +{ return NULL; } + +enum wsrep_conflict_state wsrep_thd_get_conflict_state(THD *) +{ return NO_CONFLICT; } + +my_bool wsrep_thd_is_wsrep(THD *) +{ return 0; } + +char *wsrep_thd_query(THD *) +{ return 0; } + +enum wsrep_query_state wsrep_thd_query_state(THD *) +{ return QUERY_IDLE; } + +const char *wsrep_thd_query_state_str(THD *) +{ return 0; } + +int wsrep_thd_retry_counter(THD *) +{ return 0; } + +void wsrep_thd_set_conflict_state(THD *, enum wsrep_conflict_state) +{ } + +longlong wsrep_thd_trx_seqno(THD *) +{ return -1; } + +struct wsrep_ws_handle* wsrep_thd_ws_handle(THD *) +{ return 0; } + +int wsrep_trx_is_aborting(THD *) +{ return 0; } + +void wsrep_unlock_rollback() +{ } diff --git a/sql/wsrep_hton.cc b/sql/wsrep_hton.cc new file mode 100644 index 00000000000..b77cc54ee15 --- /dev/null +++ b/sql/wsrep_hton.cc @@ -0,0 +1,583 @@ +/* Copyright 2008 Codership Oy <http://www.codership.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include <mysqld.h> +#include "sql_base.h" +#include "rpl_filter.h" +#include <sql_class.h> +#include "wsrep_mysqld.h" +#include "wsrep_binlog.h" +#include <cstdio> +#include <cstdlib> + +extern ulonglong thd_to_trx_id(THD *thd); + +extern "C" int thd_binlog_format(const MYSQL_THD thd); +// todo: share interface with ha_innodb.c + +enum wsrep_trx_status wsrep_run_wsrep_commit(THD *thd, handlerton *hton, + bool all); + +/* + Cleanup after local transaction commit/rollback, replay or TOI. +*/ +void wsrep_cleanup_transaction(THD *thd) +{ + if (wsrep_emulate_bin_log) thd_binlog_trx_reset(thd); + thd->wsrep_ws_handle.trx_id= WSREP_UNDEFINED_TRX_ID; + thd->wsrep_trx_meta.gtid= WSREP_GTID_UNDEFINED; + thd->wsrep_trx_meta.depends_on= WSREP_SEQNO_UNDEFINED; + thd->wsrep_exec_mode= LOCAL_STATE; + return; +} + +/* + wsrep hton +*/ +handlerton *wsrep_hton; + + +/* + Registers wsrep hton at commit time if transaction has registered htons + for supported engine types. + + Hton should not be registered for TOTAL_ORDER operations. + + Registration is needed for both LOCAL_MODE and REPL_RECV transactions to run + commit in 2pc so that wsrep position gets properly recorded in storage + engines. + + Note that all hton calls should immediately return for threads that are + in REPL_RECV mode as their states are controlled by wsrep appliers or + replaying code. Only threads in LOCAL_MODE should run wsrep callbacks + from hton methods. +*/ +void wsrep_register_hton(THD* thd, bool all) +{ + if (WSREP(thd) && thd->wsrep_exec_mode != TOTAL_ORDER && + !thd->wsrep_apply_toi) + { + THD_TRANS *trans=all ? &thd->transaction.all : &thd->transaction.stmt; + for (Ha_trx_info *i= trans->ha_list; i; i = i->next()) + { + if ((i->ht()->db_type == DB_TYPE_INNODB) || + (i->ht()->db_type == DB_TYPE_TOKUDB)) + { + trans_register_ha(thd, all, wsrep_hton); + + /* follow innodb read/write settting + * but, as an exception: CTAS with empty result set will not be + * replicated unless we declare wsrep hton as read/write here + */ + if (i->is_trx_read_write() || + (thd->lex->sql_command == SQLCOM_CREATE_TABLE && + thd->wsrep_exec_mode == LOCAL_STATE)) + { + thd->ha_data[wsrep_hton->slot].ha_info[all].set_trx_read_write(); + } + break; + } + } + } +} + +/* + Calls wsrep->post_commit() for locally executed transactions that have + got seqno from provider (must commit) and don't require replaying. + */ +void wsrep_post_commit(THD* thd, bool all) +{ + if (thd->wsrep_exec_mode == LOCAL_COMMIT) + { + DBUG_ASSERT(thd->wsrep_trx_meta.gtid.seqno != WSREP_SEQNO_UNDEFINED); + if (wsrep->post_commit(wsrep, &thd->wsrep_ws_handle)) + { + DBUG_PRINT("wsrep", ("set committed fail")); + WSREP_WARN("set committed fail: %llu %d", + (long long)thd->real_id, thd->get_stmt_da()->status()); + } + wsrep_cleanup_transaction(thd); + } +} + +/* + wsrep exploits binlog's caches even if binlogging itself is not + activated. In such case connection close needs calling + actual binlog's method. + Todo: split binlog hton from its caches to use ones by wsrep + without referring to binlog's stuff. +*/ +static int +wsrep_close_connection(handlerton* hton, THD* thd) +{ + DBUG_ENTER("wsrep_close_connection"); + + if (thd->wsrep_exec_mode == REPL_RECV) + { + DBUG_RETURN(0); + } + DBUG_RETURN(wsrep_binlog_close_connection (thd)); +} + +/* + prepare/wsrep_run_wsrep_commit can fail in two ways + - certification test or an equivalent. As a result, + the current transaction just rolls back + Error codes: + WSREP_TRX_CERT_FAIL, WSREP_TRX_SIZE_EXCEEDED, WSREP_TRX_ERROR + - a post-certification failure makes this server unable to + commit its own WS and therefore the server must abort +*/ +static int wsrep_prepare(handlerton *hton, THD *thd, bool all) +{ + DBUG_ENTER("wsrep_prepare"); + + if (thd->wsrep_exec_mode == REPL_RECV) + { + DBUG_RETURN(0); + } + + DBUG_ASSERT(thd->ha_data[wsrep_hton->slot].ha_info[all].is_trx_read_write()); + DBUG_ASSERT(thd->wsrep_exec_mode == LOCAL_STATE); + DBUG_ASSERT(thd->wsrep_trx_meta.gtid.seqno == WSREP_SEQNO_UNDEFINED); + + if ((all || + !thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) && + (thd->variables.wsrep_on && !wsrep_trans_cache_is_empty(thd))) + { + int res= wsrep_run_wsrep_commit(thd, hton, all); + if (res == WSREP_TRX_SIZE_EXCEEDED) + res= EMSGSIZE; + else + res= EDEADLK; // for a better error message + DBUG_RETURN (wsrep_run_wsrep_commit(thd, hton, all)); + } + DBUG_RETURN(0); +} + +static int wsrep_savepoint_set(handlerton *hton, THD *thd, void *sv) +{ + DBUG_ENTER("wsrep_savepoint_set"); + + if (thd->wsrep_exec_mode == REPL_RECV) + { + DBUG_RETURN(0); + } + + if (!wsrep_emulate_bin_log) DBUG_RETURN(0); + int rcode = wsrep_binlog_savepoint_set(thd, sv); + DBUG_RETURN(rcode); +} + +static int wsrep_savepoint_rollback(handlerton *hton, THD *thd, void *sv) +{ + DBUG_ENTER("wsrep_savepoint_rollback"); + + if (thd->wsrep_exec_mode == REPL_RECV) + { + DBUG_RETURN(0); + } + + if (!wsrep_emulate_bin_log) DBUG_RETURN(0); + int rcode = wsrep_binlog_savepoint_rollback(thd, sv); + DBUG_RETURN(rcode); +} + +static int wsrep_rollback(handlerton *hton, THD *thd, bool all) +{ + DBUG_ENTER("wsrep_rollback"); + + if (thd->wsrep_exec_mode == REPL_RECV) + { + DBUG_RETURN(0); + } + + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + switch (thd->wsrep_exec_mode) + { + case TOTAL_ORDER: + case REPL_RECV: + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + WSREP_DEBUG("Avoiding wsrep rollback for failed DDL: %s", thd->query()); + DBUG_RETURN(0); + default: break; + } + + if ((all || !thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) && + (thd->variables.wsrep_on && thd->wsrep_conflict_state != MUST_REPLAY)) + { + if (wsrep->post_rollback(wsrep, &thd->wsrep_ws_handle)) + { + DBUG_PRINT("wsrep", ("setting rollback fail")); + WSREP_ERROR("settting rollback fail: thd: %llu SQL: %s", + (long long)thd->real_id, thd->query()); + } + wsrep_cleanup_transaction(thd); + } + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + DBUG_RETURN(0); +} + +int wsrep_commit(handlerton *hton, THD *thd, bool all) +{ + DBUG_ENTER("wsrep_commit"); + + if (thd->wsrep_exec_mode == REPL_RECV) + { + DBUG_RETURN(0); + } + + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + if ((all || !thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) && + (thd->variables.wsrep_on && thd->wsrep_conflict_state != MUST_REPLAY)) + { + if (thd->wsrep_exec_mode == LOCAL_COMMIT) + { + DBUG_ASSERT(thd->ha_data[wsrep_hton->slot].ha_info[all].is_trx_read_write()); + /* + Call to wsrep->post_commit() (moved to wsrep_post_commit()) must + be done only after commit has done for all involved htons. + */ + DBUG_PRINT("wsrep", ("commit")); + } + else + { + /* + Transaction didn't go through wsrep->pre_commit() so just roll back + possible changes to clean state. + */ + if (WSREP_PROVIDER_EXISTS) { + if (wsrep->post_rollback(wsrep, &thd->wsrep_ws_handle)) + { + DBUG_PRINT("wsrep", ("setting rollback fail")); + WSREP_ERROR("settting rollback fail: thd: %llu SQL: %s", + (long long)thd->real_id, thd->query()); + } + } + wsrep_cleanup_transaction(thd); + } + } + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + DBUG_RETURN(0); +} + + +extern Rpl_filter* binlog_filter; +extern my_bool opt_log_slave_updates; + +enum wsrep_trx_status +wsrep_run_wsrep_commit(THD *thd, handlerton *hton, bool all) +{ + int rcode= -1; + size_t data_len= 0; + IO_CACHE *cache; + int replay_round= 0; + + if (thd->get_stmt_da()->is_error()) { + WSREP_ERROR("commit issue, error: %d %s", + thd->get_stmt_da()->sql_errno(), thd->get_stmt_da()->message()); + } + + DBUG_ENTER("wsrep_run_wsrep_commit"); + + if (thd->slave_thread && !opt_log_slave_updates) DBUG_RETURN(WSREP_TRX_OK); + + if (thd->wsrep_exec_mode == REPL_RECV) { + + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + if (thd->wsrep_conflict_state == MUST_ABORT) { + if (wsrep_debug) + WSREP_INFO("WSREP: must abort for BF"); + DBUG_PRINT("wsrep", ("BF apply commit fail")); + thd->wsrep_conflict_state = NO_CONFLICT; + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + // + // TODO: test all calls of the rollback. + // rollback must happen automagically innobase_rollback(hton, thd, 1); + // + DBUG_RETURN(WSREP_TRX_ERROR); + } + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + } + + if (thd->wsrep_exec_mode != LOCAL_STATE) DBUG_RETURN(WSREP_TRX_OK); + + if (thd->wsrep_consistency_check == CONSISTENCY_CHECK_RUNNING) { + WSREP_DEBUG("commit for consistency check: %s", thd->query()); + DBUG_RETURN(WSREP_TRX_OK); + } + + DBUG_PRINT("wsrep", ("replicating commit")); + + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + if (thd->wsrep_conflict_state == MUST_ABORT) { + DBUG_PRINT("wsrep", ("replicate commit fail")); + thd->wsrep_conflict_state = ABORTED; + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + if (wsrep_debug) { + WSREP_INFO("innobase_commit, abort %s", + (thd->query()) ? thd->query() : "void"); + } + DBUG_RETURN(WSREP_TRX_CERT_FAIL); + } + + mysql_mutex_lock(&LOCK_wsrep_replaying); + + while (wsrep_replaying > 0 && + thd->wsrep_conflict_state == NO_CONFLICT && + thd->killed == NOT_KILLED && + !shutdown_in_progress) + { + + mysql_mutex_unlock(&LOCK_wsrep_replaying); + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + + mysql_mutex_lock(&thd->mysys_var->mutex); + thd_proc_info(thd, "wsrep waiting on replaying"); + thd->mysys_var->current_mutex= &LOCK_wsrep_replaying; + thd->mysys_var->current_cond= &COND_wsrep_replaying; + mysql_mutex_unlock(&thd->mysys_var->mutex); + + mysql_mutex_lock(&LOCK_wsrep_replaying); + // Using timedwait is a hack to avoid deadlock in case if BF victim + // misses the signal. + struct timespec wtime = {0, 1000000}; + mysql_cond_timedwait(&COND_wsrep_replaying, &LOCK_wsrep_replaying, + &wtime); + + if (replay_round++ % 100000 == 0) + WSREP_DEBUG("commit waiting for replaying: replayers %d, thd: (%lu) " + "conflict: %d (round: %d)", + wsrep_replaying, thd->thread_id, + thd->wsrep_conflict_state, replay_round); + + mysql_mutex_unlock(&LOCK_wsrep_replaying); + + mysql_mutex_lock(&thd->mysys_var->mutex); + thd->mysys_var->current_mutex= 0; + thd->mysys_var->current_cond= 0; + mysql_mutex_unlock(&thd->mysys_var->mutex); + + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + mysql_mutex_lock(&LOCK_wsrep_replaying); + } + mysql_mutex_unlock(&LOCK_wsrep_replaying); + + if (thd->wsrep_conflict_state == MUST_ABORT) { + DBUG_PRINT("wsrep", ("replicate commit fail")); + thd->wsrep_conflict_state = ABORTED; + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + WSREP_DEBUG("innobase_commit abort after replaying wait %s", + (thd->query()) ? thd->query() : "void"); + DBUG_RETURN(WSREP_TRX_CERT_FAIL); + } + + thd->wsrep_query_state = QUERY_COMMITTING; + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + + cache = get_trans_log(thd); + rcode = 0; + if (cache) { + thd->binlog_flush_pending_rows_event(true); + rcode = wsrep_write_cache(wsrep, thd, cache, &data_len); + if (WSREP_OK != rcode) { + WSREP_ERROR("rbr write fail, data_len: %zu, %d", data_len, rcode); + DBUG_RETURN(WSREP_TRX_SIZE_EXCEEDED); + } + } + + if (data_len == 0) + { + if (thd->get_stmt_da()->is_ok() && + thd->get_stmt_da()->affected_rows() > 0 && + !binlog_filter->is_on()) + { + WSREP_DEBUG("empty rbr buffer, query: %s, " + "affected rows: %llu, " + "changed tables: %d, " + "sql_log_bin: %d, " + "wsrep status (%d %d %d)", + thd->query(), thd->get_stmt_da()->affected_rows(), + stmt_has_updated_trans_table(thd), thd->variables.sql_log_bin, + thd->wsrep_exec_mode, thd->wsrep_query_state, + thd->wsrep_conflict_state); + } + else + { + WSREP_DEBUG("empty rbr buffer, query: %s", thd->query()); + } + thd->wsrep_query_state= QUERY_EXEC; + DBUG_RETURN(WSREP_TRX_OK); + } + + if (WSREP_UNDEFINED_TRX_ID == thd->wsrep_ws_handle.trx_id) + { + WSREP_WARN("SQL statement was ineffective, THD: %lu, buf: %zu\n" + "QUERY: %s\n" + " => Skipping replication", + thd->thread_id, data_len, thd->query()); + rcode = WSREP_TRX_FAIL; + } + else if (!rcode) + { + if (WSREP_OK == rcode) + rcode = wsrep->pre_commit(wsrep, + (wsrep_conn_id_t)thd->thread_id, + &thd->wsrep_ws_handle, + WSREP_FLAG_COMMIT | + ((thd->wsrep_PA_safe) ? + 0ULL : WSREP_FLAG_PA_UNSAFE), + &thd->wsrep_trx_meta); + + if (rcode == WSREP_TRX_MISSING) { + WSREP_WARN("Transaction missing in provider, thd: %ld, SQL: %s", + thd->thread_id, thd->query()); + rcode = WSREP_TRX_FAIL; + } else if (rcode == WSREP_BF_ABORT) { + WSREP_DEBUG("thd %lu seqno %lld BF aborted by provider, will replay", + thd->thread_id, (long long)thd->wsrep_trx_meta.gtid.seqno); + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + thd->wsrep_conflict_state = MUST_REPLAY; + DBUG_ASSERT(wsrep_thd_trx_seqno(thd) > 0); + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + mysql_mutex_lock(&LOCK_wsrep_replaying); + wsrep_replaying++; + WSREP_DEBUG("replaying increased: %d, thd: %lu", + wsrep_replaying, thd->thread_id); + mysql_mutex_unlock(&LOCK_wsrep_replaying); + } + } else { + WSREP_ERROR("I/O error reading from thd's binlog iocache: " + "errno=%d, io cache code=%d", my_errno, cache->error); + DBUG_ASSERT(0); // failure like this can not normally happen + DBUG_RETURN(WSREP_TRX_ERROR); + } + + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + switch(rcode) { + case 0: + /* + About MUST_ABORT: We assume that even if thd conflict state was set + to MUST_ABORT, underlying transaction was not rolled back or marked + as deadlock victim in QUERY_COMMITTING state. Conflict state is + set to NO_CONFLICT and commit proceeds as usual. + */ + if (thd->wsrep_conflict_state == MUST_ABORT) + thd->wsrep_conflict_state= NO_CONFLICT; + + if (thd->wsrep_conflict_state != NO_CONFLICT) + { + WSREP_WARN("thd %lu seqno %lld: conflict state %d after post commit", + thd->thread_id, + (long long)thd->wsrep_trx_meta.gtid.seqno, + thd->wsrep_conflict_state); + } + thd->wsrep_exec_mode= LOCAL_COMMIT; + DBUG_ASSERT(thd->wsrep_trx_meta.gtid.seqno != WSREP_SEQNO_UNDEFINED); + /* Override XID iff it was generated by mysql */ + if (thd->transaction.xid_state.xid.get_my_xid()) + { + wsrep_xid_init(&thd->transaction.xid_state.xid, + &thd->wsrep_trx_meta.gtid.uuid, + thd->wsrep_trx_meta.gtid.seqno); + } + DBUG_PRINT("wsrep", ("replicating commit success")); + break; + case WSREP_BF_ABORT: + DBUG_ASSERT(thd->wsrep_trx_meta.gtid.seqno != WSREP_SEQNO_UNDEFINED); + case WSREP_TRX_FAIL: + WSREP_DEBUG("commit failed for reason: %d", rcode); + DBUG_PRINT("wsrep", ("replicating commit fail")); + + thd->wsrep_query_state= QUERY_EXEC; + + if (thd->wsrep_conflict_state == MUST_ABORT) { + thd->wsrep_conflict_state= ABORTED; + } + else + { + WSREP_DEBUG("conflict state: %d", thd->wsrep_conflict_state); + if (thd->wsrep_conflict_state == NO_CONFLICT) + { + thd->wsrep_conflict_state = CERT_FAILURE; + WSREP_LOG_CONFLICT(NULL, thd, FALSE); + } + } + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + + DBUG_RETURN(WSREP_TRX_CERT_FAIL); + + case WSREP_SIZE_EXCEEDED: + WSREP_ERROR("transaction size exceeded"); + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + DBUG_RETURN(WSREP_TRX_SIZE_EXCEEDED); + case WSREP_CONN_FAIL: + WSREP_ERROR("connection failure"); + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + DBUG_RETURN(WSREP_TRX_ERROR); + default: + WSREP_ERROR("unknown connection failure"); + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + DBUG_RETURN(WSREP_TRX_ERROR); + } + + thd->wsrep_query_state= QUERY_EXEC; + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + + DBUG_RETURN(WSREP_TRX_OK); +} + + +static int wsrep_hton_init(void *p) +{ + wsrep_hton= (handlerton *)p; + //wsrep_hton->state=opt_bin_log ? SHOW_OPTION_YES : SHOW_OPTION_NO; + wsrep_hton->state= SHOW_OPTION_YES; + wsrep_hton->db_type=(legacy_db_type)0; + wsrep_hton->savepoint_offset= sizeof(my_off_t); + wsrep_hton->close_connection= wsrep_close_connection; + wsrep_hton->savepoint_set= wsrep_savepoint_set; + wsrep_hton->savepoint_rollback= wsrep_savepoint_rollback; + wsrep_hton->commit= wsrep_commit; + wsrep_hton->rollback= wsrep_rollback; + wsrep_hton->prepare= wsrep_prepare; + wsrep_hton->flags= HTON_NOT_USER_SELECTABLE | HTON_HIDDEN; // todo: fix flags + wsrep_hton->slot= 0; + return 0; +} + + +struct st_mysql_storage_engine wsrep_storage_engine= +{ MYSQL_HANDLERTON_INTERFACE_VERSION }; + + +mysql_declare_plugin(wsrep) +{ + MYSQL_STORAGE_ENGINE_PLUGIN, + &wsrep_storage_engine, + "wsrep", + "Codership Oy", + "A pseudo storage engine to represent transactions in multi-master " + "synchornous replication", + PLUGIN_LICENSE_GPL, + wsrep_hton_init, /* Plugin Init */ + NULL, /* Plugin Deinit */ + 0x0100 /* 1.0 */, + NULL, /* status variables */ + NULL, /* system variables */ + NULL, /* config options */ + 0, /* flags */ +} +mysql_declare_plugin_end; diff --git a/sql/wsrep_mysqld.cc b/sql/wsrep_mysqld.cc new file mode 100644 index 00000000000..64b7d6c8721 --- /dev/null +++ b/sql/wsrep_mysqld.cc @@ -0,0 +1,2532 @@ +/* Copyright 2008-2014 Codership Oy <http://www.codership.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include <mysqld.h> +#include <sql_class.h> +#include <sql_parse.h> +#include "slave.h" +#include "rpl_mi.h" +#include "sql_repl.h" +#include "rpl_filter.h" +#include "sql_callback.h" +#include "sp_head.h" +#include "sql_show.h" +#include "sp.h" +#include "wsrep_priv.h" +#include "wsrep_thd.h" +#include "wsrep_sst.h" +#include "wsrep_utils.h" +#include "wsrep_var.h" +#include "wsrep_binlog.h" +#include "wsrep_applier.h" +#include <cstdio> +#include <cstdlib> +#include "log_event.h" +#include <slave.h> + +wsrep_t *wsrep = NULL; +/* + wsrep_emulate_bin_log is a flag to tell that binlog has not been configured. + wsrep needs to get binlog events from transaction cache even when binlog is + not enabled, wsrep_emulate_bin_log opens needed code paths to make this + possible +*/ +my_bool wsrep_emulate_bin_log = FALSE; // activating parts of binlog interface +#ifdef GTID_SUPPORT +/* Sidno in global_sid_map corresponding to group uuid */ +rpl_sidno wsrep_sidno= -1; +#endif /* GTID_SUPPORT */ +my_bool wsrep_preordered_opt= FALSE; + +/* + * Begin configuration options and their default values + */ + +extern my_bool plugins_are_initialized; +extern uint kill_cached_threads; +extern mysql_cond_t COND_thread_cache; + +const char* wsrep_data_home_dir = NULL; +const char* wsrep_dbug_option = ""; + +long wsrep_slave_threads = 1; // # of slave action appliers wanted +int wsrep_slave_count_change = 0; // # of appliers to stop or start +my_bool wsrep_debug = 0; // enable debug level logging +my_bool wsrep_convert_LOCK_to_trx = 1; // convert locking sessions to trx +ulong wsrep_retry_autocommit = 5; // retry aborted autocommit trx +my_bool wsrep_auto_increment_control = 1; // control auto increment variables +my_bool wsrep_drupal_282555_workaround = 1; // retry autoinc insert after dupkey +my_bool wsrep_incremental_data_collection = 0; // incremental data collection +ulong wsrep_max_ws_size = 1073741824UL;//max ws (RBR buffer) size +ulong wsrep_max_ws_rows = 65536; // max number of rows in ws +int wsrep_to_isolation = 0; // # of active TO isolation threads +my_bool wsrep_certify_nonPK = 1; // certify, even when no primary key +long wsrep_max_protocol_version = 3; // maximum protocol version to use +ulong wsrep_forced_binlog_format = BINLOG_FORMAT_UNSPEC; +my_bool wsrep_recovery = 0; // recovery +my_bool wsrep_replicate_myisam = 0; // enable myisam replication +my_bool wsrep_log_conflicts = 0; +ulong wsrep_mysql_replication_bundle = 0; +my_bool wsrep_desync = 0; // desynchronize the node from the + // cluster +my_bool wsrep_load_data_splitting = 1; // commit load data every 10K intervals +my_bool wsrep_restart_slave = 0; // should mysql slave thread be + // restarted, if node joins back +my_bool wsrep_restart_slave_activated = 0; // node has dropped, and slave + // restart will be needed +my_bool wsrep_slave_UK_checks = 0; // slave thread does UK checks +my_bool wsrep_slave_FK_checks = 0; // slave thread does FK checks +bool wsrep_new_cluster = false; // Bootstrap the cluster ? +/* + * End configuration options + */ + +/* + * Other wsrep global variables. + */ + +mysql_mutex_t LOCK_wsrep_ready; +mysql_cond_t COND_wsrep_ready; +mysql_mutex_t LOCK_wsrep_sst; +mysql_cond_t COND_wsrep_sst; +mysql_mutex_t LOCK_wsrep_sst_init; +mysql_cond_t COND_wsrep_sst_init; +mysql_mutex_t LOCK_wsrep_rollback; +mysql_cond_t COND_wsrep_rollback; +wsrep_aborting_thd_t wsrep_aborting_thd= NULL; +mysql_mutex_t LOCK_wsrep_replaying; +mysql_cond_t COND_wsrep_replaying; +mysql_mutex_t LOCK_wsrep_slave_threads; +mysql_mutex_t LOCK_wsrep_desync; +int wsrep_replaying= 0; +ulong wsrep_running_threads = 0; // # of currently running wsrep threads +ulong my_bind_addr; + +#ifdef HAVE_PSI_INTERFACE +PSI_mutex_key key_LOCK_wsrep_rollback, key_LOCK_wsrep_thd, + key_LOCK_wsrep_replaying, key_LOCK_wsrep_ready, key_LOCK_wsrep_sst, + key_LOCK_wsrep_sst_thread, key_LOCK_wsrep_sst_init, + key_LOCK_wsrep_slave_threads, key_LOCK_wsrep_desync; + +PSI_cond_key key_COND_wsrep_rollback, key_COND_wsrep_thd, + key_COND_wsrep_replaying, key_COND_wsrep_ready, key_COND_wsrep_sst, + key_COND_wsrep_sst_init, key_COND_wsrep_sst_thread; + +static PSI_mutex_info wsrep_mutexes[]= +{ + { &key_LOCK_wsrep_ready, "LOCK_wsrep_ready", PSI_FLAG_GLOBAL}, + { &key_LOCK_wsrep_sst, "LOCK_wsrep_sst", PSI_FLAG_GLOBAL}, + { &key_LOCK_wsrep_sst_thread, "wsrep_sst_thread", 0}, + { &key_LOCK_wsrep_sst_init, "LOCK_wsrep_sst_init", PSI_FLAG_GLOBAL}, + { &key_LOCK_wsrep_sst, "LOCK_wsrep_sst", PSI_FLAG_GLOBAL}, + { &key_LOCK_wsrep_rollback, "LOCK_wsrep_rollback", PSI_FLAG_GLOBAL}, + { &key_LOCK_wsrep_thd, "THD::LOCK_wsrep_thd", 0}, + { &key_LOCK_wsrep_replaying, "LOCK_wsrep_replaying", PSI_FLAG_GLOBAL}, + { &key_LOCK_wsrep_slave_threads, "LOCK_wsrep_slave_threads", PSI_FLAG_GLOBAL}, + { &key_LOCK_wsrep_desync, "LOCK_wsrep_desync", PSI_FLAG_GLOBAL} +}; + +static PSI_cond_info wsrep_conds[]= +{ + { &key_COND_wsrep_ready, "COND_wsrep_ready", PSI_FLAG_GLOBAL}, + { &key_COND_wsrep_sst, "COND_wsrep_sst", PSI_FLAG_GLOBAL}, + { &key_COND_wsrep_sst_init, "COND_wsrep_sst_init", PSI_FLAG_GLOBAL}, + { &key_COND_wsrep_sst_thread, "wsrep_sst_thread", 0}, + { &key_COND_wsrep_rollback, "COND_wsrep_rollback", PSI_FLAG_GLOBAL}, + { &key_COND_wsrep_thd, "THD::COND_wsrep_thd", 0}, + { &key_COND_wsrep_replaying, "COND_wsrep_replaying", PSI_FLAG_GLOBAL} +}; +#else +#define mysql_mutex_register(X,Y,Z) +#define mysql_cond_register(X,Y,Z) +#endif + +my_bool wsrep_inited = 0; // initialized ? + +static const wsrep_uuid_t cluster_uuid = WSREP_UUID_UNDEFINED; +static char cluster_uuid_str[40]= { 0, }; +static const char* cluster_status_str[WSREP_VIEW_MAX] = +{ + "Primary", + "non-Primary", + "Disconnected" +}; + +static char provider_name[256]= { 0, }; +static char provider_version[256]= { 0, }; +static char provider_vendor[256]= { 0, }; + +/* + * wsrep status variables + */ +my_bool wsrep_connected = FALSE; +my_bool wsrep_ready = FALSE; // node can accept queries +const char* wsrep_cluster_state_uuid = cluster_uuid_str; +long long wsrep_cluster_conf_id = WSREP_SEQNO_UNDEFINED; +const char* wsrep_cluster_status = cluster_status_str[WSREP_VIEW_DISCONNECTED]; +long wsrep_cluster_size = 0; +long wsrep_local_index = -1; +long long wsrep_local_bf_aborts = 0; +const char* wsrep_provider_name = provider_name; +const char* wsrep_provider_version = provider_version; +const char* wsrep_provider_vendor = provider_vendor; +/* End wsrep status variables */ + +wsrep_uuid_t local_uuid = WSREP_UUID_UNDEFINED; +wsrep_seqno_t local_seqno = WSREP_SEQNO_UNDEFINED; +wsp::node_status local_status; +long wsrep_protocol_version = 3; + +// Boolean denoting if server is in initial startup phase. This is needed +// to make sure that main thread waiting in wsrep_sst_wait() is signaled +// if there was no state gap on receiving first view event. +static my_bool wsrep_startup = TRUE; + + +static void wsrep_log_cb(wsrep_log_level_t level, const char *msg) { + switch (level) { + case WSREP_LOG_INFO: + sql_print_information("WSREP: %s", msg); + break; + case WSREP_LOG_WARN: + sql_print_warning("WSREP: %s", msg); + break; + case WSREP_LOG_ERROR: + case WSREP_LOG_FATAL: + sql_print_error("WSREP: %s", msg); + break; + case WSREP_LOG_DEBUG: + if (wsrep_debug) sql_print_information ("[Debug] WSREP: %s", msg); + default: + break; + } +} + +static void wsrep_log_states (wsrep_log_level_t const level, + const wsrep_uuid_t* const group_uuid, + wsrep_seqno_t const group_seqno, + const wsrep_uuid_t* const node_uuid, + wsrep_seqno_t const node_seqno) +{ + char uuid_str[37]; + char msg[256]; + + wsrep_uuid_print (group_uuid, uuid_str, sizeof(uuid_str)); + snprintf (msg, 255, "WSREP: Group state: %s:%lld", + uuid_str, (long long)group_seqno); + wsrep_log_cb (level, msg); + + wsrep_uuid_print (node_uuid, uuid_str, sizeof(uuid_str)); + snprintf (msg, 255, "WSREP: Local state: %s:%lld", + uuid_str, (long long)node_seqno); + wsrep_log_cb (level, msg); +} + +static my_bool set_SE_checkpoint(THD* unused, plugin_ref plugin, void* arg) +{ + XID* xid= reinterpret_cast<XID*>(arg); + handlerton* hton= plugin_data(plugin, handlerton *); + if (hton->set_checkpoint) + { + const wsrep_uuid_t* uuid(wsrep_xid_uuid(xid)); + char uuid_str[40] = {0, }; + wsrep_uuid_print(uuid, uuid_str, sizeof(uuid_str)); + WSREP_DEBUG("Set WSREPXid for InnoDB: %s:%lld", + uuid_str, (long long)wsrep_xid_seqno(xid)); + hton->set_checkpoint(hton, xid); + } + return FALSE; +} + +void wsrep_set_SE_checkpoint(XID* xid) +{ + plugin_foreach(NULL, set_SE_checkpoint, MYSQL_STORAGE_ENGINE_PLUGIN, xid); +} + +static my_bool get_SE_checkpoint(THD* unused, plugin_ref plugin, void* arg) +{ + XID* xid= reinterpret_cast<XID*>(arg); + handlerton* hton= plugin_data(plugin, handlerton *); + if (hton->get_checkpoint) + { + hton->get_checkpoint(hton, xid); + const wsrep_uuid_t* uuid(wsrep_xid_uuid(xid)); + char uuid_str[40] = {0, }; + wsrep_uuid_print(uuid, uuid_str, sizeof(uuid_str)); + WSREP_DEBUG("Read WSREPXid from InnoDB: %s:%lld", + uuid_str, (long long)wsrep_xid_seqno(xid)); + } + return FALSE; +} + +void wsrep_get_SE_checkpoint(XID* xid) +{ + plugin_foreach(NULL, get_SE_checkpoint, MYSQL_STORAGE_ENGINE_PLUGIN, xid); +} + +#ifdef GTID_SUPPORT +void wsrep_init_sidno(const wsrep_uuid_t& uuid) +{ + /* generate new Sid map entry from inverted uuid */ + rpl_sid sid; + wsrep_uuid_t ltid_uuid; + for (size_t i= 0; i < sizeof(ltid_uuid.data); ++i) + { + ltid_uuid.data[i] = ~local_uuid.data[i]; + } + sid.copy_from(ltid_uuid.data); + global_sid_lock->wrlock(); + wsrep_sidno= global_sid_map->add_sid(sid); + WSREP_INFO("inited wsrep sidno %d", wsrep_sidno); + global_sid_lock->unlock(); +} +#endif /* GTID_SUPPORT */ + +static wsrep_cb_status_t +wsrep_view_handler_cb (void* app_ctx, + void* recv_ctx, + const wsrep_view_info_t* view, + const char* state, + size_t state_len, + void** sst_req, + size_t* sst_req_len) +{ + *sst_req = NULL; + *sst_req_len = 0; + + wsrep_member_status_t new_status= local_status.get(); + + if (memcmp(&cluster_uuid, &view->state_id.uuid, sizeof(wsrep_uuid_t))) + { + memcpy((wsrep_uuid_t*)&cluster_uuid, &view->state_id.uuid, + sizeof(cluster_uuid)); + + wsrep_uuid_print (&cluster_uuid, cluster_uuid_str, + sizeof(cluster_uuid_str)); + } + + wsrep_cluster_conf_id= view->view; + wsrep_cluster_status= cluster_status_str[view->status]; + wsrep_cluster_size= view->memb_num; + wsrep_local_index= view->my_idx; + + WSREP_INFO("New cluster view: global state: %s:%lld, view# %lld: %s, " + "number of nodes: %ld, my index: %ld, protocol version %d", + wsrep_cluster_state_uuid, (long long)view->state_id.seqno, + (long long)wsrep_cluster_conf_id, wsrep_cluster_status, + wsrep_cluster_size, wsrep_local_index, view->proto_ver); + + /* Proceed further only if view is PRIMARY */ + if (WSREP_VIEW_PRIMARY != view->status) { + wsrep_ready_set(FALSE); + new_status= WSREP_MEMBER_UNDEFINED; + /* Always record local_uuid and local_seqno in non-prim since this + * may lead to re-initializing provider and start position is + * determined according to these variables */ + // WRONG! local_uuid should be the last primary configuration uuid we were + // a member of. local_seqno should be updated in commit calls. + // local_uuid= cluster_uuid; + // local_seqno= view->first - 1; + goto out; + } + + switch (view->proto_ver) + { + case 0: + case 1: + case 2: + case 3: + // version change + if (view->proto_ver != wsrep_protocol_version) + { + my_bool wsrep_ready_saved= wsrep_ready; + wsrep_ready_set(FALSE); + WSREP_INFO("closing client connections for " + "protocol change %ld -> %d", + wsrep_protocol_version, view->proto_ver); + wsrep_close_client_connections(TRUE); + wsrep_protocol_version= view->proto_ver; + wsrep_ready_set(wsrep_ready_saved); + } + break; + default: + WSREP_ERROR("Unsupported application protocol version: %d", + view->proto_ver); + unireg_abort(1); + } + + if (view->state_gap) + { + WSREP_WARN("Gap in state sequence. Need state transfer."); + + /* After that wsrep will call wsrep_sst_prepare. */ + /* keep ready flag 0 until we receive the snapshot */ + wsrep_ready_set(FALSE); + + /* Close client connections to ensure that they don't interfere + * with SST */ + WSREP_DEBUG("[debug]: closing client connections for PRIM"); + wsrep_close_client_connections(TRUE); + + ssize_t const req_len= wsrep_sst_prepare (sst_req); + + if (req_len < 0) + { + WSREP_ERROR("SST preparation failed: %zd (%s)", -req_len, + strerror(-req_len)); + new_status= WSREP_MEMBER_UNDEFINED; + } + else + { + assert(sst_req != NULL); + *sst_req_len= req_len; + new_status= WSREP_MEMBER_JOINER; + } + } + else + { + /* + * NOTE: Initialize wsrep_group_uuid here only if it wasn't initialized + * before - OR - it was reinitilized on startup (lp:992840) + */ + if (wsrep_startup) + { + if (wsrep_before_SE()) + { + wsrep_SE_init_grab(); + // Signal mysqld init thread to continue + wsrep_sst_complete (&cluster_uuid, view->state_id.seqno, false); + // and wait for SE initialization + wsrep_SE_init_wait(); + } + else + { + local_uuid= cluster_uuid; + local_seqno= view->state_id.seqno; + } + /* Init storage engine XIDs from first view */ + XID xid; + wsrep_xid_init(&xid, &local_uuid, local_seqno); + wsrep_set_SE_checkpoint(&xid); + new_status= WSREP_MEMBER_JOINED; +#ifdef GTID_SUPPORT + wsrep_init_sidno(local_uuid); +#endif /* GTID_SUPPORT */ + } + + // just some sanity check + if (memcmp (&local_uuid, &cluster_uuid, sizeof (wsrep_uuid_t))) + { + WSREP_ERROR("Undetected state gap. Can't continue."); + wsrep_log_states(WSREP_LOG_FATAL, &cluster_uuid, view->state_id.seqno, + &local_uuid, -1); + unireg_abort(1); + } + } + + if (wsrep_auto_increment_control) + { + global_system_variables.auto_increment_offset= view->my_idx + 1; + global_system_variables.auto_increment_increment= view->memb_num; + } + + { /* capabilities may be updated on new configuration */ + uint64_t const caps(wsrep->capabilities (wsrep)); + + my_bool const idc((caps & WSREP_CAP_INCREMENTAL_WRITESET) != 0); + if (TRUE == wsrep_incremental_data_collection && FALSE == idc) + { + WSREP_WARN("Unsupported protocol downgrade: " + "incremental data collection disabled. Expect abort."); + } + wsrep_incremental_data_collection = idc; + } + +out: + if (view->status == WSREP_VIEW_PRIMARY) wsrep_startup= FALSE; + local_status.set(new_status, view); + + return WSREP_CB_SUCCESS; +} + +void wsrep_ready_set (my_bool x) +{ + WSREP_DEBUG("Setting wsrep_ready to %d", x); + if (mysql_mutex_lock (&LOCK_wsrep_ready)) abort(); + if (wsrep_ready != x) + { + wsrep_ready= x; + mysql_cond_signal (&COND_wsrep_ready); + } + mysql_mutex_unlock (&LOCK_wsrep_ready); +} + +// Wait until wsrep has reached ready state +void wsrep_ready_wait () +{ + if (mysql_mutex_lock (&LOCK_wsrep_ready)) abort(); + while (!wsrep_ready) + { + WSREP_INFO("Waiting to reach ready state"); + mysql_cond_wait (&COND_wsrep_ready, &LOCK_wsrep_ready); + } + WSREP_INFO("ready state reached"); + mysql_mutex_unlock (&LOCK_wsrep_ready); +} + +static void wsrep_synced_cb(void* app_ctx) +{ + WSREP_INFO("Synchronized with group, ready for connections"); + bool signal_main= false; + if (mysql_mutex_lock (&LOCK_wsrep_ready)) abort(); + if (!wsrep_ready) + { + wsrep_ready= TRUE; + mysql_cond_signal (&COND_wsrep_ready); + signal_main= true; + + } + local_status.set(WSREP_MEMBER_SYNCED); + mysql_mutex_unlock (&LOCK_wsrep_ready); + + if (signal_main) + { + wsrep_SE_init_grab(); + // Signal mysqld init thread to continue + wsrep_sst_complete (&local_uuid, local_seqno, false); + // and wait for SE initialization + wsrep_SE_init_wait(); + } + if (wsrep_restart_slave_activated) + { + int rcode; + WSREP_INFO("MySQL slave restart"); + wsrep_restart_slave_activated= FALSE; + + mysql_mutex_lock(&LOCK_active_mi); + if ((rcode = start_slave_threads(1 /* need mutex */, + 0 /* no wait for start*/, + active_mi, + master_info_file, + relay_log_info_file, + SLAVE_SQL))) + { + WSREP_WARN("Failed to create slave threads: %d", rcode); + } + mysql_mutex_unlock(&LOCK_active_mi); + + } +} + +static void wsrep_init_position() +{ + /* read XIDs from storage engines */ + XID xid; + memset(&xid, 0, sizeof(xid)); + xid.formatID= -1; + wsrep_get_SE_checkpoint(&xid); + + if (xid.formatID == -1) + { + WSREP_INFO("Read nil XID from storage engines, skipping position init"); + return; + } + else if (!wsrep_is_wsrep_xid(&xid)) + { + WSREP_WARN("Read non-wsrep XID from storage engines, skipping position init"); + return; + } + + const wsrep_uuid_t* uuid= wsrep_xid_uuid(&xid); + const wsrep_seqno_t seqno= wsrep_xid_seqno(&xid); + + char uuid_str[40] = {0, }; + wsrep_uuid_print(uuid, uuid_str, sizeof(uuid_str)); + WSREP_INFO("Initial position: %s:%lld", uuid_str, (long long)seqno); + + + if (!memcmp(&local_uuid, &WSREP_UUID_UNDEFINED, sizeof(local_uuid)) && + local_seqno == WSREP_SEQNO_UNDEFINED) + { + // Initial state + local_uuid= *uuid; + local_seqno= seqno; + } + else if (memcmp(&local_uuid, uuid, sizeof(local_uuid)) || + local_seqno != seqno) + { + WSREP_WARN("Initial position was provided by configuration or SST, " + "avoiding override"); + } +} + +extern char* my_bind_addr_str; + +int wsrep_init() +{ + int rcode= -1; + DBUG_ASSERT(wsrep_inited == 0); + + if (strcmp(wsrep_start_position, WSREP_START_POSITION_ZERO)) + wsrep_start_position_init(wsrep_start_position); + + wsrep_sst_auth_init(wsrep_sst_auth); + + wsrep_causal_reads_update(&global_system_variables); + + mysql_mutex_register("sql", wsrep_mutexes, array_elements(wsrep_mutexes)); + mysql_cond_register("sql", wsrep_conds, array_elements(wsrep_conds)); + + mysql_mutex_init(key_LOCK_wsrep_ready, &LOCK_wsrep_ready, MY_MUTEX_INIT_FAST); + mysql_cond_init(key_COND_wsrep_ready, &COND_wsrep_ready, NULL); + mysql_mutex_init(key_LOCK_wsrep_sst, &LOCK_wsrep_sst, MY_MUTEX_INIT_FAST); + mysql_cond_init(key_COND_wsrep_sst, &COND_wsrep_sst, NULL); + mysql_mutex_init(key_LOCK_wsrep_sst_init, &LOCK_wsrep_sst_init, MY_MUTEX_INIT_FAST); + mysql_cond_init(key_COND_wsrep_sst_init, &COND_wsrep_sst_init, NULL); + mysql_mutex_init(key_LOCK_wsrep_rollback, &LOCK_wsrep_rollback, MY_MUTEX_INIT_FAST); + mysql_cond_init(key_COND_wsrep_rollback, &COND_wsrep_rollback, NULL); + mysql_mutex_init(key_LOCK_wsrep_replaying, &LOCK_wsrep_replaying, MY_MUTEX_INIT_FAST); + mysql_cond_init(key_COND_wsrep_replaying, &COND_wsrep_replaying, NULL); + mysql_mutex_init(key_LOCK_wsrep_slave_threads, &LOCK_wsrep_slave_threads, MY_MUTEX_INIT_FAST); + mysql_mutex_init(key_LOCK_wsrep_desync, &LOCK_wsrep_desync, MY_MUTEX_INIT_FAST); + + wsrep_ready_set(FALSE); + assert(wsrep_provider); + + wsrep_init_position(); + + if ((rcode= wsrep_load(wsrep_provider, &wsrep, wsrep_log_cb)) != WSREP_OK) + { + if (strcasecmp(wsrep_provider, WSREP_NONE)) + { + WSREP_ERROR("wsrep_load(%s) failed: %s (%d). Reverting to no provider.", + wsrep_provider, strerror(rcode), rcode); + strcpy((char*)wsrep_provider, WSREP_NONE); // damn it's a dirty hack + (void) wsrep_init(); + return rcode; + } + else /* this is for recursive call above */ + { + WSREP_ERROR("Could not revert to no provider: %s (%d). Need to abort.", + strerror(rcode), rcode); + unireg_abort(1); + } + } + + if (!WSREP_PROVIDER_EXISTS) + { + // enable normal operation in case no provider is specified + wsrep_ready_set(TRUE); + wsrep_inited= 1; + global_system_variables.wsrep_on = 0; + wsrep_init_args args; + args.logger_cb = wsrep_log_cb; + args.options = (wsrep_provider_options) ? + wsrep_provider_options : ""; + rcode = wsrep->init(wsrep, &args); + if (rcode) + { + DBUG_PRINT("wsrep",("wsrep::init() failed: %d", rcode)); + WSREP_ERROR("wsrep::init() failed: %d, must shutdown", rcode); + wsrep->free(wsrep); + free(wsrep); + wsrep = NULL; + } + return rcode; + } + else + { + global_system_variables.wsrep_on = 1; + strncpy(provider_name, + wsrep->provider_name, sizeof(provider_name) - 1); + strncpy(provider_version, + wsrep->provider_version, sizeof(provider_version) - 1); + strncpy(provider_vendor, + wsrep->provider_vendor, sizeof(provider_vendor) - 1); + } + + if (!wsrep_data_home_dir || strlen(wsrep_data_home_dir) == 0) + wsrep_data_home_dir = mysql_real_data_home; + + char node_addr[512]= { 0, }; + size_t const node_addr_max= sizeof(node_addr) - 1; + if (!wsrep_node_address || !strcmp(wsrep_node_address, "")) + { + size_t const ret= wsrep_guess_ip(node_addr, node_addr_max); + if (!(ret > 0 && ret < node_addr_max)) + { + WSREP_WARN("Failed to guess base node address. Set it explicitly via " + "wsrep_node_address."); + node_addr[0]= '\0'; + } + } + else + { + strncpy(node_addr, wsrep_node_address, node_addr_max); + } + + char inc_addr[512]= { 0, }; + size_t const inc_addr_max= sizeof (inc_addr); + if ((!wsrep_node_incoming_address || + !strcmp (wsrep_node_incoming_address, WSREP_NODE_INCOMING_AUTO))) + { + unsigned int my_bind_ip= INADDR_ANY; // default if not set + if (my_bind_addr_str && strlen(my_bind_addr_str)) + { + my_bind_ip= wsrep_check_ip(my_bind_addr_str); + } + + if (INADDR_ANY != my_bind_ip) + { + if (INADDR_NONE != my_bind_ip && INADDR_LOOPBACK != my_bind_ip) + { + snprintf(inc_addr, inc_addr_max, "%s:%u", + my_bind_addr_str, (int)mysqld_port); + } // else leave inc_addr an empty string - mysqld is not listening for + // client connections on network interfaces. + } + else // mysqld binds to 0.0.0.0, take IP from wsrep_node_address if possible + { + size_t const node_addr_len= strlen(node_addr); + if (node_addr_len > 0) + { + const char* const colon= strrchr(node_addr, ':'); + if (strchr(node_addr, ':') == colon) // 1 or 0 ':' + { + size_t const ip_len= colon ? colon - node_addr : node_addr_len; + if (ip_len + 7 /* :55555\0 */ < inc_addr_max) + { + memcpy (inc_addr, node_addr, ip_len); + snprintf(inc_addr + ip_len, inc_addr_max - ip_len, ":%u", + (int)mysqld_port); + } + else + { + WSREP_WARN("Guessing address for incoming client connections: " + "address too long."); + inc_addr[0]= '\0'; + } + } + else + { + WSREP_WARN("Guessing address for incoming client connections: " + "too many colons :) ."); + inc_addr[0]= '\0'; + } + } + + if (!strlen(inc_addr)) + { + WSREP_WARN("Guessing address for incoming client connections failed. " + "Try setting wsrep_node_incoming_address explicitly."); + } + } + } + else if (!strchr(wsrep_node_incoming_address, ':')) // no port included + { + if ((int)inc_addr_max <= + snprintf(inc_addr, inc_addr_max, "%s:%u", + wsrep_node_incoming_address,(int)mysqld_port)) + { + WSREP_WARN("Guessing address for incoming client connections: " + "address too long."); + inc_addr[0]= '\0'; + } + } + else + { + size_t const need = strlen (wsrep_node_incoming_address); + if (need >= inc_addr_max) { + WSREP_WARN("wsrep_node_incoming_address too long: %zu", need); + inc_addr[0]= '\0'; + } + else { + memcpy (inc_addr, wsrep_node_incoming_address, need); + } + } + + struct wsrep_init_args wsrep_args; + + struct wsrep_gtid const state_id = { local_uuid, local_seqno }; + + wsrep_args.data_dir = wsrep_data_home_dir; + wsrep_args.node_name = (wsrep_node_name) ? wsrep_node_name : ""; + wsrep_args.node_address = node_addr; + wsrep_args.node_incoming = inc_addr; + wsrep_args.options = (wsrep_provider_options) ? + wsrep_provider_options : ""; + wsrep_args.proto_ver = wsrep_max_protocol_version; + + wsrep_args.state_id = &state_id; + + wsrep_args.logger_cb = wsrep_log_cb; + wsrep_args.view_handler_cb = wsrep_view_handler_cb; + wsrep_args.apply_cb = wsrep_apply_cb; + wsrep_args.commit_cb = wsrep_commit_cb; + wsrep_args.unordered_cb = wsrep_unordered_cb; + wsrep_args.sst_donate_cb = wsrep_sst_donate_cb; + wsrep_args.synced_cb = wsrep_synced_cb; + + rcode = wsrep->init(wsrep, &wsrep_args); + + if (rcode) + { + DBUG_PRINT("wsrep",("wsrep::init() failed: %d", rcode)); + WSREP_ERROR("wsrep::init() failed: %d, must shutdown", rcode); + wsrep->free(wsrep); + free(wsrep); + wsrep = NULL; + } else { + wsrep_inited= 1; + } + + return rcode; +} + +void wsrep_init_startup (bool first) +{ + if (wsrep_init()) unireg_abort(1); + + wsrep_thr_lock_init( + (wsrep_thd_is_brute_force_fun)wsrep_thd_is_BF, + (wsrep_abort_thd_fun)wsrep_abort_thd, + wsrep_debug, wsrep_convert_LOCK_to_trx, + (wsrep_on_fun)wsrep_on); + + /* Skip replication start if no cluster address */ + if (!wsrep_cluster_address || strlen(wsrep_cluster_address) == 0) return; + + if (first) wsrep_sst_grab(); // do it so we can wait for SST below + + if (!wsrep_start_replication()) unireg_abort(1); + + wsrep_create_rollbacker(); + wsrep_create_appliers(1); + + if (first && !wsrep_sst_wait()) unireg_abort(1);// wait until SST is completed +} + + +void wsrep_deinit(bool free_options) +{ + DBUG_ASSERT(wsrep_inited == 1); + wsrep_unload(wsrep); + wsrep= 0; + provider_name[0]= '\0'; + provider_version[0]= '\0'; + provider_vendor[0]= '\0'; + wsrep_inited= 0; + + if (free_options) + { + wsrep_sst_auth_free(); + } + + mysql_mutex_destroy(&LOCK_wsrep_ready); + mysql_cond_destroy(&COND_wsrep_ready); + mysql_mutex_destroy(&LOCK_wsrep_sst); + mysql_cond_destroy(&COND_wsrep_sst); + mysql_mutex_destroy(&LOCK_wsrep_sst_init); + mysql_cond_destroy(&COND_wsrep_sst_init); + mysql_mutex_destroy(&LOCK_wsrep_rollback); + mysql_cond_destroy(&COND_wsrep_rollback); + mysql_mutex_destroy(&LOCK_wsrep_replaying); + mysql_cond_destroy(&COND_wsrep_replaying); + mysql_mutex_destroy(&LOCK_wsrep_slave_threads); + mysql_mutex_destroy(&LOCK_wsrep_desync); +} + +void wsrep_recover() +{ + if (!memcmp(&local_uuid, &WSREP_UUID_UNDEFINED, sizeof(wsrep_uuid_t)) && + local_seqno == -2) + { + char uuid_str[40]; + wsrep_uuid_print(&local_uuid, uuid_str, sizeof(uuid_str)); + WSREP_INFO("Position %s:%lld given at startup, skipping position recovery", + uuid_str, (long long)local_seqno); + return; + } + XID xid; + memset(&xid, 0, sizeof(xid)); + xid.formatID= -1; + wsrep_get_SE_checkpoint(&xid); + char uuid_str[40]; + wsrep_uuid_print(wsrep_xid_uuid(&xid), uuid_str, sizeof(uuid_str)); + WSREP_INFO("Recovered position: %s:%lld", uuid_str, + (long long)wsrep_xid_seqno(&xid)); +} + + +void wsrep_stop_replication(THD *thd) +{ + WSREP_INFO("Stop replication"); + if (!wsrep) + { + WSREP_INFO("Provider was not loaded, in stop replication"); + return; + } + + /* disconnect from group first to get wsrep_ready == FALSE */ + WSREP_DEBUG("Provider disconnect"); + wsrep->disconnect(wsrep); + + wsrep_connected= FALSE; + + wsrep_close_client_connections(TRUE); + + /* wait until appliers have stopped */ + wsrep_wait_appliers_close(thd); + + return; +} + +bool wsrep_start_replication() +{ + wsrep_status_t rcode; + + /* + if provider is trivial, don't even try to connect, + but resume local node operation + */ + if (!WSREP_PROVIDER_EXISTS) + { + // enable normal operation in case no provider is specified + wsrep_ready_set(TRUE); + return true; + } + + if (!wsrep_cluster_address || strlen(wsrep_cluster_address)== 0) + { + // if provider is non-trivial, but no address is specified, wait for address + wsrep_ready_set(FALSE); + return true; + } + + bool const bootstrap= wsrep_new_cluster; + + WSREP_INFO("Start replication"); + + if (wsrep_new_cluster) + { + WSREP_INFO("'wsrep-new-cluster' option used, bootstrapping the cluster"); + wsrep_new_cluster= false; + } + + if ((rcode = wsrep->connect(wsrep, + wsrep_cluster_name, + wsrep_cluster_address, + wsrep_sst_donor, + bootstrap))) + { + if (-ESOCKTNOSUPPORT == rcode) + { + DBUG_PRINT("wsrep",("unrecognized cluster address: '%s', rcode: %d", + wsrep_cluster_address, rcode)); + WSREP_ERROR("unrecognized cluster address: '%s', rcode: %d", + wsrep_cluster_address, rcode); + } + else + { + DBUG_PRINT("wsrep",("wsrep->connect() failed: %d", rcode)); + WSREP_ERROR("wsrep::connect() failed: %d", rcode); + } + + return false; + } + else + { + wsrep_connected= TRUE; + + char* opts= wsrep->options_get(wsrep); + if (opts) + { + wsrep_provider_options_init(opts); + free(opts); + } + else + { + WSREP_WARN("Failed to get wsrep options"); + } + } + + return true; +} + +bool wsrep_sync_wait (THD* thd, uint mask) +{ + if ((thd->variables.wsrep_sync_wait & mask) && + thd->variables.wsrep_on && + !thd->in_active_multi_stmt_transaction() && + thd->wsrep_conflict_state != REPLAYING) + { + WSREP_DEBUG("wsrep_sync_wait: thd->variables.wsrep_sync_wait = %u, mask = %u", + thd->variables.wsrep_sync_wait, mask); + // This allows autocommit SELECTs and a first SELECT after SET AUTOCOMMIT=0 + // TODO: modify to check if thd has locked any rows. + wsrep_gtid_t gtid; + wsrep_status_t ret= wsrep->causal_read (wsrep, >id); + + if (unlikely(WSREP_OK != ret)) + { + const char* msg; + int err; + + // Possibly relevant error codes: + // ER_CHECKREAD, ER_ERROR_ON_READ, ER_INVALID_DEFAULT, ER_EMPTY_QUERY, + // ER_FUNCTION_NOT_DEFINED, ER_NOT_ALLOWED_COMMAND, ER_NOT_SUPPORTED_YET, + // ER_FEATURE_DISABLED, ER_QUERY_INTERRUPTED + + switch (ret) + { + case WSREP_NOT_IMPLEMENTED: + msg= "synchronous reads by wsrep backend. " + "Please unset wsrep_causal_reads variable."; + err= ER_NOT_SUPPORTED_YET; + break; + default: + msg= "Synchronous wait failed."; + err= ER_LOCK_WAIT_TIMEOUT; // NOTE: the above msg won't be displayed + // with ER_LOCK_WAIT_TIMEOUT + } + + my_error(err, MYF(0), msg); + + return true; + } + } + + return false; +} + +/* + * Helpers to deal with TOI key arrays + */ +typedef struct wsrep_key_arr +{ + wsrep_key_t* keys; + size_t keys_len; +} wsrep_key_arr_t; + + +static void wsrep_keys_free(wsrep_key_arr_t* key_arr) +{ + for (size_t i= 0; i < key_arr->keys_len; ++i) + { + my_free((void*)key_arr->keys[i].key_parts); + } + my_free(key_arr->keys); + key_arr->keys= 0; + key_arr->keys_len= 0; +} + + +/*! + * @param db Database string + * @param table Table string + * @param key Array of wsrep_key_t + * @param key_len In: number of elements in key array, Out: number of + * elements populated + * + * @return true if preparation was successful, otherwise false. + */ + +static bool wsrep_prepare_key_for_isolation(const char* db, + const char* table, + wsrep_buf_t* key, + size_t* key_len) +{ + if (*key_len < 2) return false; + + switch (wsrep_protocol_version) + { + case 0: + *key_len= 0; + break; + case 1: + case 2: + case 3: + { + *key_len= 0; + if (db) + { + // sql_print_information("%s.%s", db, table); + if (db) + { + key[*key_len].ptr= db; + key[*key_len].len= strlen(db); + ++(*key_len); + if (table) + { + key[*key_len].ptr= table; + key[*key_len].len= strlen(table); + ++(*key_len); + } + } + } + break; + } + default: + return false; + } + + return true; +} + +/* Prepare key list from db/table and table_list */ +static bool wsrep_prepare_keys_for_isolation(THD* thd, + const char* db, + const char* table, + const TABLE_LIST* table_list, + wsrep_key_arr_t* ka) +{ + ka->keys= 0; + ka->keys_len= 0; + + extern TABLE* find_temporary_table(THD*, const TABLE_LIST*); + + if (db || table) + { + TABLE_LIST tmp_table; + MDL_request mdl_request; + + memset(&tmp_table, 0, sizeof(tmp_table)); + tmp_table.table_name= (char*)table; + tmp_table.db= (char*)db; + tmp_table.mdl_request.init(MDL_key::GLOBAL, (db) ? db : "", + (table) ? table : "", + MDL_INTENTION_EXCLUSIVE, MDL_STATEMENT); + + if (!table || !find_temporary_table(thd, &tmp_table)) + { + if (!(ka->keys= (wsrep_key_t*)my_malloc(sizeof(wsrep_key_t), MYF(0)))) + { + WSREP_ERROR("Can't allocate memory for key_array"); + goto err; + } + ka->keys_len= 1; + if (!(ka->keys[0].key_parts= (wsrep_buf_t*) + my_malloc(sizeof(wsrep_buf_t)*2, MYF(0)))) + { + WSREP_ERROR("Can't allocate memory for key_parts"); + goto err; + } + ka->keys[0].key_parts_num= 2; + if (!wsrep_prepare_key_for_isolation( + db, table, + (wsrep_buf_t*)ka->keys[0].key_parts, + &ka->keys[0].key_parts_num)) + { + WSREP_ERROR("Preparing keys for isolation failed"); + goto err; + } + } + } + + for (const TABLE_LIST* table= table_list; table; table= table->next_global) + { + if (!find_temporary_table(thd, table)) + { + wsrep_key_t* tmp; + tmp= (wsrep_key_t*)my_realloc( + ka->keys, (ka->keys_len + 1) * sizeof(wsrep_key_t), + MYF(MY_ALLOW_ZERO_PTR)); + + if (!tmp) + { + WSREP_ERROR("Can't allocate memory for key_array"); + goto err; + } + ka->keys= tmp; + if (!(ka->keys[ka->keys_len].key_parts= (wsrep_buf_t*) + my_malloc(sizeof(wsrep_buf_t)*2, MYF(0)))) + { + WSREP_ERROR("Can't allocate memory for key_parts"); + goto err; + } + ka->keys[ka->keys_len].key_parts_num= 2; + ++ka->keys_len; + if (!wsrep_prepare_key_for_isolation( + table->db, table->table_name, + (wsrep_buf_t*)ka->keys[ka->keys_len - 1].key_parts, + &ka->keys[ka->keys_len - 1].key_parts_num)) + { + WSREP_ERROR("Preparing keys for isolation failed"); + goto err; + } + } + } + return true; +err: + wsrep_keys_free(ka); + return false; +} + + +bool wsrep_prepare_key(const uchar* cache_key, size_t cache_key_len, + const uchar* row_id, size_t row_id_len, + wsrep_buf_t* key, size_t* key_len) +{ + if (*key_len < 3) return false; + + *key_len= 0; + switch (wsrep_protocol_version) + { + case 0: + { + key[0].ptr = cache_key; + key[0].len = cache_key_len; + + *key_len = 1; + break; + } + case 1: + case 2: + case 3: + { + key[0].ptr = cache_key; + key[0].len = strlen( (char*)cache_key ); + + key[1].ptr = cache_key + strlen( (char*)cache_key ) + 1; + key[1].len = strlen( (char*)(key[1].ptr) ); + + *key_len = 2; + break; + } + default: + return false; + } + + key[*key_len].ptr = row_id; + key[*key_len].len = row_id_len; + ++(*key_len); + + return true; +} + + +/* + * Construct Query_log_Event from thd query and serialize it + * into buffer. + * + * Return 0 in case of success, 1 in case of error. + */ +int wsrep_to_buf_helper( + THD* thd, const char *query, uint query_len, uchar** buf, size_t* buf_len) +{ + IO_CACHE tmp_io_cache; + if (open_cached_file(&tmp_io_cache, mysql_tmpdir, TEMP_PREFIX, + 65536, MYF(MY_WME))) + return 1; + int ret(0); + +#ifdef GTID_SUPPORT + if (thd->variables.gtid_next.type == GTID_GROUP) + { + Gtid_log_event gtid_ev(thd, FALSE, &thd->variables.gtid_next); + if (!gtid_ev.is_valid()) ret= 0; + if (!ret && gtid_ev.write(&tmp_io_cache)) ret= 1; + } +#endif /* GTID_SUPPORT */ + + /* if there is prepare query, add event for it */ + if (!ret && thd->wsrep_TOI_pre_query) + { + Query_log_event ev(thd, thd->wsrep_TOI_pre_query, + thd->wsrep_TOI_pre_query_len, + FALSE, FALSE, FALSE, 0); + if (ev.write(&tmp_io_cache)) ret= 1; + } + + /* continue to append the actual query */ + Query_log_event ev(thd, query, query_len, FALSE, FALSE, FALSE, 0); + if (!ret && ev.write(&tmp_io_cache)) ret= 1; + if (!ret && wsrep_write_cache_buf(&tmp_io_cache, buf, buf_len)) ret= 1; + close_cached_file(&tmp_io_cache); + return ret; +} + +static int +wsrep_alter_query_string(THD *thd, String *buf) +{ + /* Append the "ALTER" part of the query */ + if (buf->append(STRING_WITH_LEN("ALTER "))) + return 1; + /* Append definer */ + append_definer(thd, buf, &(thd->lex->definer->user), &(thd->lex->definer->host)); + /* Append the left part of thd->query after event name part */ + if (buf->append(thd->lex->stmt_definition_begin, + thd->lex->stmt_definition_end - + thd->lex->stmt_definition_begin)) + return 1; + + return 0; +} + +int wsrep_alter_event_query(THD *thd, uchar** buf, size_t* buf_len) +{ + String log_query; + + if (wsrep_alter_query_string(thd, &log_query)) + { + WSREP_WARN("events alter string failed: %s", thd->query()); + return 1; + } + return wsrep_to_buf_helper(thd, log_query.ptr(), log_query.length(), buf, buf_len); +} + +#include "sql_show.h" +static int +create_view_query(THD *thd, uchar** buf, size_t* buf_len) +{ + LEX *lex= thd->lex; + SELECT_LEX *select_lex= &lex->select_lex; + TABLE_LIST *first_table= select_lex->table_list.first; + TABLE_LIST *views = first_table; + + String buff; + const LEX_STRING command[3]= + {{ C_STRING_WITH_LEN("CREATE ") }, + { C_STRING_WITH_LEN("ALTER ") }, + { C_STRING_WITH_LEN("CREATE OR REPLACE ") }}; + + buff.append(command[thd->lex->create_view_mode].str, + command[thd->lex->create_view_mode].length); + + if (!lex->definer) + { + /* + DEFINER-clause is missing; we have to create default definer in + persistent arena to be PS/SP friendly. + If this is an ALTER VIEW then the current user should be set as + the definer. + */ + + if (!(lex->definer= create_default_definer(thd, false))) + { + WSREP_WARN("view default definer issue"); + } + } + + views->algorithm = lex->create_view_algorithm; + views->definer.user = lex->definer->user; + views->definer.host = lex->definer->host; + views->view_suid = lex->create_view_suid; + views->with_check = lex->create_view_check; + + view_store_options(thd, views, &buff); + buff.append(STRING_WITH_LEN("VIEW ")); + /* Test if user supplied a db (ie: we did not use thd->db) */ + if (views->db && views->db[0] && + (thd->db == NULL || strcmp(views->db, thd->db))) + { + append_identifier(thd, &buff, views->db, + views->db_length); + buff.append('.'); + } + append_identifier(thd, &buff, views->table_name, + views->table_name_length); + if (lex->view_list.elements) + { + List_iterator_fast<LEX_STRING> names(lex->view_list); + LEX_STRING *name; + int i; + + for (i= 0; (name= names++); i++) + { + buff.append(i ? ", " : "("); + append_identifier(thd, &buff, name->str, name->length); + } + buff.append(')'); + } + buff.append(STRING_WITH_LEN(" AS ")); + //buff.append(views->source.str, views->source.length); + buff.append(thd->lex->create_view_select.str, + thd->lex->create_view_select.length); + //int errcode= query_error_code(thd, TRUE); + //if (thd->binlog_query(THD::STMT_QUERY_TYPE, + // buff.ptr(), buff.length(), FALSE, FALSE, FALSE, errcod + return wsrep_to_buf_helper(thd, buff.ptr(), buff.length(), buf, buf_len); +} + +static int wsrep_TOI_begin(THD *thd, char *db_, char *table_, + const TABLE_LIST* table_list) +{ + wsrep_status_t ret(WSREP_WARNING); + uchar* buf(0); + size_t buf_len(0); + int buf_err; + + WSREP_DEBUG("TO BEGIN: %lld, %d : %s", (long long)wsrep_thd_trx_seqno(thd), + thd->wsrep_exec_mode, thd->query() ); + switch (thd->lex->sql_command) + { + case SQLCOM_CREATE_VIEW: + buf_err= create_view_query(thd, &buf, &buf_len); + break; + case SQLCOM_CREATE_PROCEDURE: + case SQLCOM_CREATE_SPFUNCTION: + buf_err= wsrep_create_sp(thd, &buf, &buf_len); + break; + case SQLCOM_CREATE_TRIGGER: + buf_err= wsrep_create_trigger_query(thd, &buf, &buf_len); + break; + case SQLCOM_CREATE_EVENT: + buf_err= wsrep_create_event_query(thd, &buf, &buf_len); + break; + case SQLCOM_ALTER_EVENT: + buf_err= wsrep_alter_event_query(thd, &buf, &buf_len); + break; + default: + buf_err= wsrep_to_buf_helper(thd, thd->query(), thd->query_length(), &buf, + &buf_len); + break; + } + + wsrep_key_arr_t key_arr= {0, 0}; + struct wsrep_buf buff = { buf, buf_len }; + if (!buf_err && + wsrep_prepare_keys_for_isolation(thd, db_, table_, table_list, &key_arr)&& + WSREP_OK == (ret = wsrep->to_execute_start(wsrep, thd->thread_id, + key_arr.keys, key_arr.keys_len, + &buff, 1, + &thd->wsrep_trx_meta))) + { + thd->wsrep_exec_mode= TOTAL_ORDER; + wsrep_to_isolation++; + my_free(buf); + wsrep_keys_free(&key_arr); + WSREP_DEBUG("TO BEGIN: %lld, %d",(long long)wsrep_thd_trx_seqno(thd), + thd->wsrep_exec_mode); + } + else { + /* jump to error handler in mysql_execute_command() */ + WSREP_WARN("TO isolation failed for: %d, sql: %s. Check wsrep " + "connection state and retry the query.", + ret, (thd->query()) ? thd->query() : "void"); + my_error(ER_LOCK_DEADLOCK, MYF(0), "WSREP replication failed. Check " + "your wsrep connection state and retry the query."); + if (buf) my_free(buf); + wsrep_keys_free(&key_arr); + return -1; + } + return 0; +} + +static void wsrep_TOI_end(THD *thd) { + wsrep_status_t ret; + wsrep_to_isolation--; + + WSREP_DEBUG("TO END: %lld, %d : %s", (long long)wsrep_thd_trx_seqno(thd), + thd->wsrep_exec_mode, (thd->query()) ? thd->query() : "void"); + + XID xid; + wsrep_xid_init(&xid, &thd->wsrep_trx_meta.gtid.uuid, + thd->wsrep_trx_meta.gtid.seqno); + wsrep_set_SE_checkpoint(&xid); + WSREP_DEBUG("TO END: %lld, update seqno", + (long long)wsrep_thd_trx_seqno(thd)); + + if (WSREP_OK == (ret = wsrep->to_execute_end(wsrep, thd->thread_id))) { + WSREP_DEBUG("TO END: %lld", (long long)wsrep_thd_trx_seqno(thd)); + } + else { + WSREP_WARN("TO isolation end failed for: %d, sql: %s", + ret, (thd->query()) ? thd->query() : "void"); + } +} + +static int wsrep_RSU_begin(THD *thd, char *db_, char *table_) +{ + wsrep_status_t ret(WSREP_WARNING); + WSREP_DEBUG("RSU BEGIN: %lld, %d : %s", (long long)wsrep_thd_trx_seqno(thd), + thd->wsrep_exec_mode, thd->query() ); + + ret = wsrep->desync(wsrep); + if (ret != WSREP_OK) + { + WSREP_WARN("RSU desync failed %d for %s", ret, thd->query()); + my_error(ER_LOCK_DEADLOCK, MYF(0)); + return(ret); + } + mysql_mutex_lock(&LOCK_wsrep_replaying); + wsrep_replaying++; + mysql_mutex_unlock(&LOCK_wsrep_replaying); + + if (wsrep_wait_committing_connections_close(5000)) + { + /* no can do, bail out from DDL */ + WSREP_WARN("RSU failed due to pending transactions, %s", thd->query()); + mysql_mutex_lock(&LOCK_wsrep_replaying); + wsrep_replaying--; + mysql_mutex_unlock(&LOCK_wsrep_replaying); + + ret = wsrep->resync(wsrep); + if (ret != WSREP_OK) + { + WSREP_WARN("resync failed %d for %s", ret, thd->query()); + } + my_error(ER_LOCK_DEADLOCK, MYF(0)); + return(1); + } + + wsrep_seqno_t seqno = wsrep->pause(wsrep); + if (seqno == WSREP_SEQNO_UNDEFINED) + { + WSREP_WARN("pause failed %lld for %s", (long long)seqno, thd->query()); + return(1); + } + WSREP_DEBUG("paused at %lld", (long long)seqno); + thd->variables.wsrep_on = 0; + return 0; +} + +static void wsrep_RSU_end(THD *thd) +{ + wsrep_status_t ret(WSREP_WARNING); + WSREP_DEBUG("RSU END: %lld, %d : %s", (long long)wsrep_thd_trx_seqno(thd), + thd->wsrep_exec_mode, thd->query() ); + + + mysql_mutex_lock(&LOCK_wsrep_replaying); + wsrep_replaying--; + mysql_mutex_unlock(&LOCK_wsrep_replaying); + + ret = wsrep->resume(wsrep); + if (ret != WSREP_OK) + { + WSREP_WARN("resume failed %d for %s", ret, thd->query()); + } + ret = wsrep->resync(wsrep); + if (ret != WSREP_OK) + { + WSREP_WARN("resync failed %d for %s", ret, thd->query()); + return; + } + thd->variables.wsrep_on = 1; +} + +int wsrep_to_isolation_begin(THD *thd, char *db_, char *table_, + const TABLE_LIST* table_list) +{ + int ret= 0; + + /* + No isolation for applier or replaying threads. + */ + if (thd->wsrep_exec_mode == REPL_RECV) + return 0; + + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + + if (thd->wsrep_conflict_state == MUST_ABORT) + { + WSREP_INFO("thread: %lu, %s has been aborted due to multi-master conflict", + thd->thread_id, thd->query()); + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + return WSREP_TRX_FAIL; + } + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + + DBUG_ASSERT(thd->wsrep_exec_mode == LOCAL_STATE); + DBUG_ASSERT(thd->wsrep_trx_meta.gtid.seqno == WSREP_SEQNO_UNDEFINED); + + if (thd->global_read_lock.can_acquire_protection()) + { + WSREP_DEBUG("Aborting TOI: Global Read-Lock (FTWRL) in place: %s %lu", + thd->query(), thd->thread_id); + return -1; + } + + if (wsrep_debug && thd->mdl_context.has_locks()) + { + WSREP_DEBUG("thread holds MDL locks at TI begin: %s %lu", + thd->query(), thd->thread_id); + } + + /* + It makes sense to set auto_increment_* to defaults in TOI operations. + Must be done before wsrep_TOI_begin() since Query_log_event encapsulating + TOI statement and auto inc variables for wsrep replication is constructed + there. Variables are reset back in THD::reset_for_next_command() before + processing of next command. + */ + if (wsrep_auto_increment_control) + { + thd->variables.auto_increment_offset = 1; + thd->variables.auto_increment_increment = 1; + } + + if (thd->variables.wsrep_on && thd->wsrep_exec_mode==LOCAL_STATE) + { + switch (wsrep_OSU_method_options) { + case WSREP_OSU_TOI: ret = wsrep_TOI_begin(thd, db_, table_, + table_list); break; + case WSREP_OSU_RSU: ret = wsrep_RSU_begin(thd, db_, table_); break; + } + if (!ret) + { + thd->wsrep_exec_mode= TOTAL_ORDER; + } + } + return ret; +} + +void wsrep_to_isolation_end(THD *thd) +{ + if (thd->wsrep_exec_mode == TOTAL_ORDER) + { + switch(wsrep_OSU_method_options) + { + case WSREP_OSU_TOI: wsrep_TOI_end(thd); break; + case WSREP_OSU_RSU: wsrep_RSU_end(thd); break; + } + wsrep_cleanup_transaction(thd); + } +} + +#define WSREP_MDL_LOG(severity, msg, req, gra) \ + WSREP_##severity( \ + "%s\n" \ + "request: (%lu \tseqno %lld \twsrep (%d, %d, %d) cmd %d %d \t%s)\n" \ + "granted: (%lu \tseqno %lld \twsrep (%d, %d, %d) cmd %d %d \t%s)", \ + msg, \ + req->thread_id, (long long)wsrep_thd_trx_seqno(req), \ + req->wsrep_exec_mode, req->wsrep_query_state, req->wsrep_conflict_state, \ + req->get_command(), req->lex->sql_command, req->query(), \ + gra->thread_id, (long long)wsrep_thd_trx_seqno(gra), \ + gra->wsrep_exec_mode, gra->wsrep_query_state, gra->wsrep_conflict_state, \ + gra->get_command(), gra->lex->sql_command, gra->query()); + +bool +wsrep_grant_mdl_exception(MDL_context *requestor_ctx, + MDL_ticket *ticket +) { + if (!WSREP_ON) return FALSE; + + THD *request_thd = requestor_ctx->get_thd(); + THD *granted_thd = ticket->get_ctx()->get_thd(); + bool ret = FALSE; + + mysql_mutex_lock(&request_thd->LOCK_wsrep_thd); + if (request_thd->wsrep_exec_mode == TOTAL_ORDER || + request_thd->wsrep_exec_mode == REPL_RECV) + { + mysql_mutex_unlock(&request_thd->LOCK_wsrep_thd); + WSREP_MDL_LOG(DEBUG, "MDL conflict ", request_thd, granted_thd); + ticket->wsrep_report(wsrep_debug); + + mysql_mutex_lock(&granted_thd->LOCK_wsrep_thd); + if (granted_thd->wsrep_exec_mode == TOTAL_ORDER || + granted_thd->wsrep_exec_mode == REPL_RECV) + { + WSREP_MDL_LOG(INFO, "MDL BF-BF conflict", request_thd, granted_thd); + ticket->wsrep_report(true); + mysql_mutex_unlock(&granted_thd->LOCK_wsrep_thd); + ret = TRUE; + } + else if (granted_thd->lex->sql_command == SQLCOM_FLUSH) + { + WSREP_DEBUG("mdl granted over FLUSH BF"); + ticket->wsrep_report(wsrep_debug); + mysql_mutex_unlock(&granted_thd->LOCK_wsrep_thd); + ret = TRUE; + } + else if (request_thd->lex->sql_command == SQLCOM_DROP_TABLE) + { + WSREP_DEBUG("DROP caused BF abort"); + ticket->wsrep_report(wsrep_debug); + mysql_mutex_unlock(&granted_thd->LOCK_wsrep_thd); + wsrep_abort_thd((void*)request_thd, (void*)granted_thd, 1); + ret = FALSE; + } + else if (granted_thd->wsrep_query_state == QUERY_COMMITTING) + { + WSREP_DEBUG("mdl granted, but commiting thd abort scheduled"); + ticket->wsrep_report(wsrep_debug); + mysql_mutex_unlock(&granted_thd->LOCK_wsrep_thd); + wsrep_abort_thd((void*)request_thd, (void*)granted_thd, 1); + ret = FALSE; + } + else + { + WSREP_MDL_LOG(DEBUG, "MDL conflict-> BF abort", request_thd, granted_thd); + ticket->wsrep_report(wsrep_debug); + mysql_mutex_unlock(&granted_thd->LOCK_wsrep_thd); + wsrep_abort_thd((void*)request_thd, (void*)granted_thd, 1); + ret = FALSE; + } + } + else + { + mysql_mutex_unlock(&request_thd->LOCK_wsrep_thd); + } + return ret; +} + + +pthread_handler_t start_wsrep_THD(void *arg) +{ + THD *thd; + rpl_sql_thread_info sql_info(NULL); + wsrep_thd_processor_fun processor= (wsrep_thd_processor_fun)arg; + + if (my_thread_init()) + { + WSREP_ERROR("Could not initialize thread"); + return(NULL); + } + + if (!(thd= new THD(true))) + { + return(NULL); + } + mysql_mutex_lock(&LOCK_thread_count); + thd->thread_id=thread_id++; + + thd->real_id=pthread_self(); // Keep purify happy + thread_count++; + thread_created++; + threads.append(thd); + + my_net_init(&thd->net,(st_vio*) 0, MYF(0)); + + DBUG_PRINT("wsrep",(("creating thread %lld"), (long long)thd->thread_id)); + thd->prior_thr_create_utime= thd->start_utime= microsecond_interval_timer(); + (void) mysql_mutex_unlock(&LOCK_thread_count); + + /* from bootstrap()... */ + thd->bootstrap=1; + thd->max_client_packet_length= thd->net.max_packet; + thd->security_ctx->master_access= ~(ulong)0; + thd->system_thread_info.rpl_sql_info= &sql_info; + + /* from handle_one_connection... */ + pthread_detach_this_thread(); + + mysql_thread_set_psi_id(thd->thread_id); + thd->thr_create_utime= microsecond_interval_timer(); + if (MYSQL_CALLBACK_ELSE(thread_scheduler, init_new_connection_thread, (), 0)) + { + close_connection(thd, ER_OUT_OF_RESOURCES); + statistic_increment(aborted_connects,&LOCK_status); + MYSQL_CALLBACK(thread_scheduler, end_thread, (thd, 0)); + + return(NULL); + } + +// </5.1.17> + /* + handle_one_connection() is normally the only way a thread would + start and would always be on the very high end of the stack , + therefore, the thread stack always starts at the address of the + first local variable of handle_one_connection, which is thd. We + need to know the start of the stack so that we could check for + stack overruns. + */ + DBUG_PRINT("wsrep", ("handle_one_connection called by thread %lld\n", + (long long)thd->thread_id)); + /* now that we've called my_thread_init(), it is safe to call DBUG_* */ + + thd->thread_stack= (char*) &thd; + if (thd->store_globals()) + { + close_connection(thd, ER_OUT_OF_RESOURCES); + statistic_increment(aborted_connects,&LOCK_status); + MYSQL_CALLBACK(thread_scheduler, end_thread, (thd, 0)); + delete thd; + + return(NULL); + } + + thd->system_thread= SYSTEM_THREAD_SLAVE_SQL; + thd->security_ctx->skip_grants(); + + /* handle_one_connection() again... */ + //thd->version= refresh_version; + thd->proc_info= 0; + thd->set_command(COM_SLEEP); + thd->set_time(); + thd->init_for_queries(); + + mysql_mutex_lock(&LOCK_thread_count); + wsrep_running_threads++; + mysql_cond_broadcast(&COND_thread_count); + mysql_mutex_unlock(&LOCK_thread_count); + + processor(thd); + + close_connection(thd, 0); + + mysql_mutex_lock(&LOCK_thread_count); + wsrep_running_threads--; + WSREP_DEBUG("wsrep running threads now: %lu", wsrep_running_threads); + mysql_cond_broadcast(&COND_thread_count); + mysql_mutex_unlock(&LOCK_thread_count); + + // Note: We can't call THD destructor without crashing + // if plugins have not been initialized. However, in most of the + // cases this means that pre SE initialization SST failed and + // we are going to exit anyway. + if (plugins_are_initialized) + { + net_end(&thd->net); + MYSQL_CALLBACK(thread_scheduler, end_thread, (thd, 1)); + } + else + { + // TODO: lightweight cleanup to get rid of: + // 'Error in my_thread_global_end(): 2 threads didn't exit' + // at server shutdown + } + + my_thread_end(); + if (thread_handling > SCHEDULER_ONE_THREAD_PER_CONNECTION) + { + mysql_mutex_lock(&LOCK_thread_count); + delete thd; + thread_count--; + mysql_mutex_unlock(&LOCK_thread_count); + } + return(NULL); +} + + +/**/ +static bool abort_replicated(THD *thd) +{ + bool ret_code= false; + if (thd->wsrep_query_state== QUERY_COMMITTING) + { + WSREP_DEBUG("aborting replicated trx: %lu", thd->real_id); + + (void)wsrep_abort_thd(thd, thd, TRUE); + ret_code= true; + } + return ret_code; +} + + +/**/ +static inline bool is_client_connection(THD *thd) +{ + return (thd->wsrep_client_thread && thd->variables.wsrep_on); +} + + +static inline bool is_replaying_connection(THD *thd) +{ + bool ret; + + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + ret= (thd->wsrep_conflict_state == REPLAYING) ? true : false; + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + + return ret; +} + + +static inline bool is_committing_connection(THD *thd) +{ + bool ret; + + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + ret= (thd->wsrep_query_state == QUERY_COMMITTING) ? true : false; + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + + return ret; +} + + +static bool have_client_connections() +{ + THD *tmp; + + I_List_iterator<THD> it(threads); + while ((tmp=it++)) + { + DBUG_PRINT("quit",("Informing thread %ld that it's time to die", + tmp->thread_id)); + if (is_client_connection(tmp) && tmp->killed == KILL_CONNECTION) + { + (void)abort_replicated(tmp); + return true; + } + } + return false; +} + +static void wsrep_close_thread(THD *thd) +{ + thd->killed= KILL_CONNECTION; + MYSQL_CALLBACK(thread_scheduler, post_kill_notification, (thd)); + if (thd->mysys_var) + { + thd->mysys_var->abort=1; + mysql_mutex_lock(&thd->mysys_var->mutex); + if (thd->mysys_var->current_cond) + { + mysql_mutex_lock(thd->mysys_var->current_mutex); + mysql_cond_broadcast(thd->mysys_var->current_cond); + mysql_mutex_unlock(thd->mysys_var->current_mutex); + } + mysql_mutex_unlock(&thd->mysys_var->mutex); + } +} + + +static my_bool have_committing_connections() +{ + THD *tmp; + mysql_mutex_lock(&LOCK_thread_count); // For unlink from list + + I_List_iterator<THD> it(threads); + while ((tmp=it++)) + { + if (!is_client_connection(tmp)) + continue; + + if (is_committing_connection(tmp)) + { + return TRUE; + } + } + mysql_mutex_unlock(&LOCK_thread_count); + return FALSE; +} + + +int wsrep_wait_committing_connections_close(int wait_time) +{ + int sleep_time= 100; + + while (have_committing_connections() && wait_time > 0) + { + WSREP_DEBUG("wait for committing transaction to close: %d", wait_time); + my_sleep(sleep_time); + wait_time -= sleep_time; + } + if (have_committing_connections()) + { + return 1; + } + return 0; +} + + +void wsrep_close_client_connections(my_bool wait_to_end) +{ + /* + First signal all threads that it's time to die + */ + + THD *tmp; + mysql_mutex_lock(&LOCK_thread_count); // For unlink from list + + bool kill_cached_threads_saved= kill_cached_threads; + kill_cached_threads= true; // prevent future threads caching + mysql_cond_broadcast(&COND_thread_cache); // tell cached threads to die + + I_List_iterator<THD> it(threads); + while ((tmp=it++)) + { + DBUG_PRINT("quit",("Informing thread %ld that it's time to die", + tmp->thread_id)); + /* We skip slave threads & scheduler on this first loop through. */ + if (!is_client_connection(tmp)) + continue; + + if (is_replaying_connection(tmp)) + { + tmp->killed= KILL_CONNECTION; + continue; + } + + /* replicated transactions must be skipped */ + if (abort_replicated(tmp)) + continue; + + WSREP_DEBUG("closing connection %ld", tmp->thread_id); + wsrep_close_thread(tmp); + } + mysql_mutex_unlock(&LOCK_thread_count); + + if (thread_count) + sleep(2); // Give threads time to die + + mysql_mutex_lock(&LOCK_thread_count); + /* + Force remaining threads to die by closing the connection to the client + */ + + I_List_iterator<THD> it2(threads); + while ((tmp=it2++)) + { +#ifndef __bsdi__ // Bug in BSDI kernel + if (is_client_connection(tmp) && + !abort_replicated(tmp) && + !is_replaying_connection(tmp)) + { + WSREP_INFO("killing local connection: %ld",tmp->thread_id); + close_connection(tmp,0); + } +#endif + } + + DBUG_PRINT("quit",("Waiting for threads to die (count=%u)",thread_count)); + WSREP_DEBUG("waiting for client connections to close: %u", thread_count); + + while (wait_to_end && have_client_connections()) + { + mysql_cond_wait(&COND_thread_count, &LOCK_thread_count); + DBUG_PRINT("quit",("One thread died (count=%u)", thread_count)); + } + + kill_cached_threads= kill_cached_threads_saved; + + mysql_mutex_unlock(&LOCK_thread_count); + + /* All client connection threads have now been aborted */ +} + + +void wsrep_close_applier(THD *thd) +{ + WSREP_DEBUG("closing applier %ld", thd->thread_id); + wsrep_close_thread(thd); +} + + +void wsrep_close_threads(THD *thd) +{ + THD *tmp; + mysql_mutex_lock(&LOCK_thread_count); // For unlink from list + + I_List_iterator<THD> it(threads); + while ((tmp=it++)) + { + DBUG_PRINT("quit",("Informing thread %ld that it's time to die", + tmp->thread_id)); + /* We skip slave threads & scheduler on this first loop through. */ + if (tmp->wsrep_applier && tmp != thd) + { + WSREP_DEBUG("closing wsrep thread %ld", tmp->thread_id); + wsrep_close_thread (tmp); + } + } + + mysql_mutex_unlock(&LOCK_thread_count); +} + +void wsrep_wait_appliers_close(THD *thd) +{ + /* Wait for wsrep appliers to gracefully exit */ + mysql_mutex_lock(&LOCK_thread_count); + while (wsrep_running_threads > 1) + // 1 is for rollbacker thread which needs to be killed explicitly. + // This gotta be fixed in a more elegant manner if we gonna have arbitrary + // number of non-applier wsrep threads. + { + if (thread_handling > SCHEDULER_ONE_THREAD_PER_CONNECTION) + { + mysql_mutex_unlock(&LOCK_thread_count); + my_sleep(100); + mysql_mutex_lock(&LOCK_thread_count); + } + else + mysql_cond_wait(&COND_thread_count,&LOCK_thread_count); + DBUG_PRINT("quit",("One applier died (count=%u)",thread_count)); + } + mysql_mutex_unlock(&LOCK_thread_count); + /* Now kill remaining wsrep threads: rollbacker */ + wsrep_close_threads (thd); + /* and wait for them to die */ + mysql_mutex_lock(&LOCK_thread_count); + while (wsrep_running_threads > 0) + { + if (thread_handling > SCHEDULER_ONE_THREAD_PER_CONNECTION) + { + mysql_mutex_unlock(&LOCK_thread_count); + my_sleep(100); + mysql_mutex_lock(&LOCK_thread_count); + } + else + mysql_cond_wait(&COND_thread_count,&LOCK_thread_count); + DBUG_PRINT("quit",("One thread died (count=%u)",thread_count)); + } + mysql_mutex_unlock(&LOCK_thread_count); + + /* All wsrep applier threads have now been aborted. However, if this thread + is also applier, we are still running... + */ +} + + +void wsrep_kill_mysql(THD *thd) +{ + if (mysqld_server_started) + { + if (!shutdown_in_progress) + { + WSREP_INFO("starting shutdown"); + kill_mysql(); + } + } + else + { + unireg_abort(1); + } +} + + +int wsrep_create_sp(THD *thd, uchar** buf, size_t* buf_len) +{ + String log_query; + sp_head *sp = thd->lex->sphead; + ulong saved_mode= thd->variables.sql_mode; + String retstr(64); + retstr.set_charset(system_charset_info); + + log_query.set_charset(system_charset_info); + + if (sp->m_type == TYPE_ENUM_FUNCTION) + { + sp_returns_type(thd, retstr, sp); + } + + if (!show_create_sp(thd, &log_query, + sp->m_type, + (sp->m_explicit_name ? sp->m_db.str : NULL), + (sp->m_explicit_name ? sp->m_db.length : 0), + sp->m_name.str, sp->m_name.length, + sp->m_params.str, sp->m_params.length, + retstr.c_ptr(), retstr.length(), + sp->m_body.str, sp->m_body.length, + sp->m_chistics, &(thd->lex->definer->user), + &(thd->lex->definer->host), + saved_mode)) + { + WSREP_WARN("SP create string failed: %s", thd->query()); + return 1; + } + + return wsrep_to_buf_helper(thd, log_query.ptr(), log_query.length(), buf, buf_len); +} + + +extern int wsrep_on(THD *thd) +{ + return (int)(WSREP(thd)); +} + + +extern "C" bool wsrep_thd_is_wsrep_on(THD *thd) +{ + return thd->variables.wsrep_on; +} + + +bool wsrep_consistency_check(THD *thd) +{ + return thd->wsrep_consistency_check == CONSISTENCY_CHECK_RUNNING; +} + + +extern "C" void wsrep_thd_set_exec_mode(THD *thd, enum wsrep_exec_mode mode) +{ + thd->wsrep_exec_mode= mode; +} + + +extern "C" void wsrep_thd_set_query_state( + THD *thd, enum wsrep_query_state state) +{ + thd->wsrep_query_state= state; +} + + +void wsrep_thd_set_conflict_state(THD *thd, enum wsrep_conflict_state state) +{ + thd->wsrep_conflict_state= state; +} + + +enum wsrep_exec_mode wsrep_thd_exec_mode(THD *thd) +{ + return thd->wsrep_exec_mode; +} + + +const char *wsrep_thd_exec_mode_str(THD *thd) +{ + return + (!thd) ? "void" : + (thd->wsrep_exec_mode == LOCAL_STATE) ? "local" : + (thd->wsrep_exec_mode == REPL_RECV) ? "applier" : + (thd->wsrep_exec_mode == TOTAL_ORDER) ? "total order" : + (thd->wsrep_exec_mode == LOCAL_COMMIT) ? "local commit" : "void"; +} + + +enum wsrep_query_state wsrep_thd_query_state(THD *thd) +{ + return thd->wsrep_query_state; +} + + +const char *wsrep_thd_query_state_str(THD *thd) +{ + return + (!thd) ? "void" : + (thd->wsrep_query_state == QUERY_IDLE) ? "idle" : + (thd->wsrep_query_state == QUERY_EXEC) ? "executing" : + (thd->wsrep_query_state == QUERY_COMMITTING) ? "committing" : + (thd->wsrep_query_state == QUERY_EXITING) ? "exiting" : + (thd->wsrep_query_state == QUERY_ROLLINGBACK) ? "rolling back" : "void"; +} + + +enum wsrep_conflict_state wsrep_thd_get_conflict_state(THD *thd) +{ + return thd->wsrep_conflict_state; +} + + +const char *wsrep_thd_conflict_state_str(THD *thd) +{ + return + (!thd) ? "void" : + (thd->wsrep_conflict_state == NO_CONFLICT) ? "no conflict" : + (thd->wsrep_conflict_state == MUST_ABORT) ? "must abort" : + (thd->wsrep_conflict_state == ABORTING) ? "aborting" : + (thd->wsrep_conflict_state == MUST_REPLAY) ? "must replay" : + (thd->wsrep_conflict_state == REPLAYING) ? "replaying" : + (thd->wsrep_conflict_state == RETRY_AUTOCOMMIT) ? "retrying" : + (thd->wsrep_conflict_state == CERT_FAILURE) ? "cert failure" : "void"; +} + + +wsrep_ws_handle_t* wsrep_thd_ws_handle(THD *thd) +{ + return &thd->wsrep_ws_handle; +} + + +void wsrep_thd_LOCK(THD *thd) +{ + mysql_mutex_lock(&thd->LOCK_wsrep_thd); +} + + +void wsrep_thd_UNLOCK(THD *thd) +{ + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); +} + + +extern "C" time_t wsrep_thd_query_start(THD *thd) +{ + return thd->query_start(); +} + + +extern "C" uint32 wsrep_thd_wsrep_rand(THD *thd) +{ + return thd->wsrep_rand; +} + +longlong wsrep_thd_trx_seqno(THD *thd) +{ + return (thd) ? thd->wsrep_trx_meta.gtid.seqno : WSREP_SEQNO_UNDEFINED; +} + + +extern "C" query_id_t wsrep_thd_query_id(THD *thd) +{ + return thd->query_id; +} + + +char *wsrep_thd_query(THD *thd) +{ + return (thd) ? thd->query() : NULL; +} + + +extern "C" query_id_t wsrep_thd_wsrep_last_query_id(THD *thd) +{ + return thd->wsrep_last_query_id; +} + + +extern "C" void wsrep_thd_set_wsrep_last_query_id(THD *thd, query_id_t id) +{ + thd->wsrep_last_query_id= id; +} + + +extern "C" void wsrep_thd_awake(THD *thd, my_bool signal) +{ + if (signal) + { + mysql_mutex_lock(&thd->LOCK_thd_data); + thd->awake(KILL_QUERY); + mysql_mutex_unlock(&thd->LOCK_thd_data); + } + else + { + mysql_mutex_lock(&LOCK_wsrep_replaying); + mysql_cond_broadcast(&COND_wsrep_replaying); + mysql_mutex_unlock(&LOCK_wsrep_replaying); + } +} + + +int wsrep_thd_retry_counter(THD *thd) +{ + return(thd->wsrep_retry_counter); +} + + +extern int +wsrep_trx_order_before(THD *thd1, THD *thd2) +{ + if (wsrep_thd_trx_seqno(thd1) < wsrep_thd_trx_seqno(thd2)) { + WSREP_DEBUG("BF conflict, order: %lld %lld\n", + (long long)wsrep_thd_trx_seqno(thd1), + (long long)wsrep_thd_trx_seqno(thd2)); + return 1; + } + WSREP_DEBUG("waiting for BF, trx order: %lld %lld\n", + (long long)wsrep_thd_trx_seqno(thd1), + (long long)wsrep_thd_trx_seqno(thd2)); + return 0; +} + + +int wsrep_trx_is_aborting(THD *thd_ptr) +{ + if (thd_ptr) { + if ((((THD *)thd_ptr)->wsrep_conflict_state == MUST_ABORT) || + (((THD *)thd_ptr)->wsrep_conflict_state == ABORTING)) { + return 1; + } + } + return 0; +} + + +void wsrep_copy_query(THD *thd) +{ + thd->wsrep_retry_command = thd->get_command(); + thd->wsrep_retry_query_len = thd->query_length(); + if (thd->wsrep_retry_query) { + my_free(thd->wsrep_retry_query); + } + thd->wsrep_retry_query = (char *)my_malloc( + thd->wsrep_retry_query_len + 1, MYF(0)); + strncpy(thd->wsrep_retry_query, thd->query(), thd->wsrep_retry_query_len); + thd->wsrep_retry_query[thd->wsrep_retry_query_len] = '\0'; +} + + +bool wsrep_is_show_query(enum enum_sql_command command) +{ + DBUG_ASSERT(command >= 0 && command <= SQLCOM_END); + return (sql_command_flags[command] & CF_STATUS_COMMAND) != 0; +} + +bool wsrep_create_like_table(THD* thd, TABLE_LIST* table, + TABLE_LIST* src_table, + HA_CREATE_INFO *create_info) +{ + TABLE *tmp_table; + bool is_tmp_table= FALSE; + + for (tmp_table= thd->temporary_tables; tmp_table; tmp_table=tmp_table->next) + { + if (!strcmp(src_table->db, tmp_table->s->db.str) && + !strcmp(src_table->table_name, tmp_table->s->table_name.str)) + { + is_tmp_table= TRUE; + break; + } + } + if (create_info->options & HA_LEX_CREATE_TMP_TABLE) + { + + /* CREATE TEMPORARY TABLE LIKE must be skipped from replication */ + WSREP_DEBUG("CREATE TEMPORARY TABLE LIKE... skipped replication\n %s", + thd->query()); + } + else if (!is_tmp_table) + { + /* this is straight CREATE TABLE LIKE... eith no tmp tables */ + WSREP_TO_ISOLATION_BEGIN(table->db, table->table_name, NULL); + } + else + { + /* here we have CREATE TABLE LIKE <temporary table> + the temporary table definition will be needed in slaves to + enable the create to succeed + */ + TABLE_LIST tbl; + bzero((void*) &tbl, sizeof(tbl)); + tbl.db= src_table->db; + tbl.table_name= tbl.alias= src_table->table_name; + tbl.table= tmp_table; + char buf[2048]; + String query(buf, sizeof(buf), system_charset_info); + query.length(0); // Have to zero it since constructor doesn't + + (void) store_create_info(thd, &tbl, &query, NULL, TRUE, FALSE); + WSREP_DEBUG("TMP TABLE: %s", query.ptr()); + + thd->wsrep_TOI_pre_query= query.ptr(); + thd->wsrep_TOI_pre_query_len= query.length(); + + WSREP_TO_ISOLATION_BEGIN(table->db, table->table_name, NULL); + + thd->wsrep_TOI_pre_query= NULL; + thd->wsrep_TOI_pre_query_len= 0; + } + + return(false); + +error: + thd->wsrep_TOI_pre_query= NULL; + return (true); +} + + +int wsrep_create_trigger_query(THD *thd, uchar** buf, size_t* buf_len) +{ + LEX *lex= thd->lex; + String stmt_query; + + LEX_STRING definer_user; + LEX_STRING definer_host; + + if (!lex->definer) + { + if (!thd->slave_thread) + { + if (!(lex->definer= create_default_definer(thd, false))) + return 1; + } + } + + if (lex->definer) + { + /* SUID trigger. */ + + definer_user= lex->definer->user; + definer_host= lex->definer->host; + } + else + { + /* non-SUID trigger. */ + + definer_user.str= 0; + definer_user.length= 0; + + definer_host.str= 0; + definer_host.length= 0; + } + + stmt_query.append(STRING_WITH_LEN("CREATE ")); + + append_definer(thd, &stmt_query, &definer_user, &definer_host); + + LEX_STRING stmt_definition; + stmt_definition.str= (char*) thd->lex->stmt_definition_begin; + stmt_definition.length= thd->lex->stmt_definition_end + - thd->lex->stmt_definition_begin; + trim_whitespace(thd->charset(), & stmt_definition); + + stmt_query.append(stmt_definition.str, stmt_definition.length); + + return wsrep_to_buf_helper(thd, stmt_query.c_ptr(), stmt_query.length(), + buf, buf_len); +} + +/***** callbacks for wsrep service ************/ + +my_bool get_wsrep_debug() +{ + return wsrep_debug; +} + +my_bool get_wsrep_load_data_splitting() +{ + return wsrep_load_data_splitting; +} + +long get_wsrep_protocol_version() +{ + return wsrep_protocol_version; +} + +my_bool get_wsrep_drupal_282555_workaround() +{ + return wsrep_drupal_282555_workaround; +} + +my_bool get_wsrep_log_conflicts() +{ + return wsrep_log_conflicts; +} + +wsrep_t *get_wsrep() +{ + return wsrep; +} + +my_bool get_wsrep_certify_nonPK() +{ + return wsrep_certify_nonPK; +} + +void wsrep_lock_rollback() +{ + mysql_mutex_lock(&LOCK_wsrep_rollback); +} + +void wsrep_unlock_rollback() +{ + mysql_cond_signal(&COND_wsrep_rollback); + mysql_mutex_unlock(&LOCK_wsrep_rollback); +} + +my_bool wsrep_aborting_thd_contains(THD *thd) +{ + wsrep_aborting_thd_t abortees = wsrep_aborting_thd; + while (abortees) + { + if (abortees->aborting_thd == thd) + return true; + abortees = abortees->next; + } + return false; +} + +void wsrep_aborting_thd_enqueue(THD *thd) +{ + wsrep_aborting_thd_t aborting = (wsrep_aborting_thd_t) + my_malloc(sizeof(struct wsrep_aborting_thd), MYF(0)); + aborting->aborting_thd = thd; + aborting->next = wsrep_aborting_thd; + wsrep_aborting_thd = aborting; +} diff --git a/sql/wsrep_mysqld.h b/sql/wsrep_mysqld.h new file mode 100644 index 00000000000..ce22dc473fb --- /dev/null +++ b/sql/wsrep_mysqld.h @@ -0,0 +1,336 @@ +/* Copyright 2008-2013 Codership Oy <http://www.codership.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include <wsrep.h> + +#ifndef WSREP_MYSQLD_H +#define WSREP_MYSQLD_H + +#include <mysql/plugin.h> +#include <mysql/service_wsrep.h> + +#ifdef WITH_WSREP + +typedef struct st_mysql_show_var SHOW_VAR; +#include <sql_priv.h> +//#include "rpl_gtid.h" +#include "../wsrep/wsrep_api.h" +#include "mdl.h" +#include "mysqld.h" +#include "sql_table.h" + +#define WSREP_UNDEFINED_TRX_ID ULONGLONG_MAX + +class set_var; +class THD; + +enum wsrep_consistency_check_mode { + NO_CONSISTENCY_CHECK, + CONSISTENCY_CHECK_DECLARED, + CONSISTENCY_CHECK_RUNNING, +}; + +struct wsrep_thd_shadow { + ulonglong options; + uint server_status; + enum wsrep_exec_mode wsrep_exec_mode; + Vio *vio; + ulong tx_isolation; + char *db; + size_t db_length; +}; + +// Global wsrep parameters +extern wsrep_t* wsrep; + +// MySQL wsrep options +extern const char* wsrep_provider; +extern const char* wsrep_provider_options; +extern const char* wsrep_cluster_name; +extern const char* wsrep_cluster_address; +extern const char* wsrep_node_name; +extern const char* wsrep_node_address; +extern const char* wsrep_node_incoming_address; +extern const char* wsrep_data_home_dir; +extern const char* wsrep_dbug_option; +extern long wsrep_slave_threads; +extern int wsrep_slave_count_change; +extern my_bool wsrep_convert_LOCK_to_trx; +extern ulong wsrep_retry_autocommit; +extern my_bool wsrep_auto_increment_control; +extern my_bool wsrep_incremental_data_collection; +extern const char* wsrep_start_position; +extern ulong wsrep_max_ws_size; +extern ulong wsrep_max_ws_rows; +extern const char* wsrep_notify_cmd; +extern long wsrep_max_protocol_version; +extern ulong wsrep_forced_binlog_format; +extern ulong wsrep_OSU_method_options; +extern my_bool wsrep_desync; +extern my_bool wsrep_recovery; +extern my_bool wsrep_replicate_myisam; +extern ulong wsrep_mysql_replication_bundle; +extern my_bool wsrep_restart_slave; +extern my_bool wsrep_restart_slave_activated; +extern my_bool wsrep_slave_FK_checks; +extern my_bool wsrep_slave_UK_checks; +extern ulong wsrep_running_threads; +extern bool wsrep_new_cluster; + +enum enum_wsrep_OSU_method { WSREP_OSU_TOI, WSREP_OSU_RSU }; +enum enum_wsrep_sync_wait { + WSREP_SYNC_WAIT_NONE = 0x0, + // show, select, begin + WSREP_SYNC_WAIT_BEFORE_READ = 0x1, + WSREP_SYNC_WAIT_BEFORE_UPDATE_DELETE = 0x2, + WSREP_SYNC_WAIT_BEFORE_INSERT_REPLACE = 0x4, + WSREP_SYNC_WAIT_MAX = 0x7 +}; + +// MySQL status variables +extern my_bool wsrep_connected; +extern my_bool wsrep_ready; +extern const char* wsrep_cluster_state_uuid; +extern long long wsrep_cluster_conf_id; +extern const char* wsrep_cluster_status; +extern long wsrep_cluster_size; +extern long wsrep_local_index; +extern long long wsrep_local_bf_aborts; +extern const char* wsrep_provider_name; +extern const char* wsrep_provider_version; +extern const char* wsrep_provider_vendor; + +int wsrep_show_status(THD *thd, SHOW_VAR *var, char *buff); +int wsrep_init(); +void wsrep_deinit(bool free_options); +void wsrep_recover(); +bool wsrep_before_SE(); // initialize wsrep before storage + // engines (true) or after (false) +/* wsrep initialization sequence at startup + * @param before wsrep_before_SE() value */ +void wsrep_init_startup(bool before); + +// Other wsrep global variables +extern my_bool wsrep_inited; // whether wsrep is initialized ? + + +extern "C" void wsrep_thd_set_exec_mode(THD *thd, enum wsrep_exec_mode mode); +extern "C" void wsrep_thd_set_query_state( + THD *thd, enum wsrep_query_state state); + +extern "C" void wsrep_thd_set_trx_to_replay(THD *thd, uint64 trx_id); + +extern "C" uint32 wsrep_thd_wsrep_rand(THD *thd); +extern "C" time_t wsrep_thd_query_start(THD *thd); +extern "C" query_id_t wsrep_thd_query_id(THD *thd); +extern "C" query_id_t wsrep_thd_wsrep_last_query_id(THD *thd); +extern "C" void wsrep_thd_set_wsrep_last_query_id(THD *thd, query_id_t id); + + +extern void wsrep_close_client_connections(my_bool wait_to_end); +extern int wsrep_wait_committing_connections_close(int wait_time); +extern void wsrep_close_applier(THD *thd); +extern void wsrep_wait_appliers_close(THD *thd); +extern void wsrep_close_applier_threads(int count); +extern void wsrep_kill_mysql(THD *thd); + +/* new defines */ +extern void wsrep_stop_replication(THD *thd); +extern bool wsrep_start_replication(); +extern bool wsrep_sync_wait(THD* thd, uint mask = WSREP_SYNC_WAIT_BEFORE_READ); +extern int wsrep_check_opts(); +extern void wsrep_prepend_PATH (const char* path); +/* some inline functions are defined in wsrep_mysqld_inl.h */ + +/* Other global variables */ +extern wsrep_seqno_t wsrep_locked_seqno; + +#define WSREP_ON \ + (global_system_variables.wsrep_on) + +#define WSREP(thd) \ + (WSREP_ON && (thd && thd->variables.wsrep_on)) + +#define WSREP_CLIENT(thd) \ + (WSREP(thd) && thd->wsrep_client_thread) + +#define WSREP_EMULATE_BINLOG(thd) \ + (WSREP(thd) && wsrep_emulate_bin_log) + +#define WSREP_FORMAT(my_format) \ + ((wsrep_forced_binlog_format != BINLOG_FORMAT_UNSPEC) \ + ? wsrep_forced_binlog_format : (ulong)(my_format)) + +// prefix all messages with "WSREP" +#define WSREP_LOG(fun, ...) \ + do { \ + char msg[1024] = {'\0'}; \ + snprintf(msg, sizeof(msg) - 1, ## __VA_ARGS__); \ + fun("WSREP: %s", msg); \ + } while(0) + +#define WSREP_LOG_CONFLICT_THD(thd, role) \ + WSREP_LOG(sql_print_information, \ + "%s: \n " \ + " THD: %lu, mode: %s, state: %s, conflict: %s, seqno: %lld\n " \ + " SQL: %s", \ + role, thd_get_thread_id(thd), wsrep_thd_exec_mode_str(thd), \ + wsrep_thd_query_state_str(thd), \ + wsrep_thd_conflict_state_str(thd), (long long)wsrep_thd_trx_seqno(thd), \ + wsrep_thd_query(thd) \ + ); + +#define WSREP_LOG_CONFLICT(bf_thd, victim_thd, bf_abort) \ + if (wsrep_debug || wsrep_log_conflicts) \ + { \ + WSREP_LOG(sql_print_information, "cluster conflict due to %s for threads:",\ + (bf_abort) ? "high priority abort" : "certification failure" \ + ); \ + if (bf_thd != NULL) WSREP_LOG_CONFLICT_THD(bf_thd, "Winning thread"); \ + if (victim_thd) WSREP_LOG_CONFLICT_THD(victim_thd, "Victim thread"); \ + } + +#define WSREP_PROVIDER_EXISTS \ + (wsrep_provider && strncasecmp(wsrep_provider, WSREP_NONE, FN_REFLEN)) + +extern void wsrep_ready_wait(); + +class Ha_trx_info; +struct THD_TRANS; +void wsrep_register_hton(THD* thd, bool all); +void wsrep_brute_force_killer(THD *thd); +int wsrep_hire_brute_force_killer(THD *thd, uint64_t trx_id); + +/* this is visible for client build so that innodb plugin gets this */ +typedef struct wsrep_aborting_thd { + struct wsrep_aborting_thd *next; + THD *aborting_thd; +} *wsrep_aborting_thd_t; + +extern mysql_mutex_t LOCK_wsrep_ready; +extern mysql_cond_t COND_wsrep_ready; +extern mysql_mutex_t LOCK_wsrep_sst; +extern mysql_cond_t COND_wsrep_sst; +extern mysql_mutex_t LOCK_wsrep_sst_init; +extern mysql_cond_t COND_wsrep_sst_init; +extern mysql_mutex_t LOCK_wsrep_rollback; +extern mysql_cond_t COND_wsrep_rollback; +extern int wsrep_replaying; +extern mysql_mutex_t LOCK_wsrep_replaying; +extern mysql_cond_t COND_wsrep_replaying; +extern mysql_mutex_t LOCK_wsrep_slave_threads; +extern mysql_mutex_t LOCK_wsrep_desync; +extern wsrep_aborting_thd_t wsrep_aborting_thd; +extern my_bool wsrep_emulate_bin_log; +extern int wsrep_to_isolation; +#ifdef GTID_SUPPORT +extern rpl_sidno wsrep_sidno; +#endif /* GTID_SUPPORT */ +extern my_bool wsrep_preordered_opt; +extern handlerton *wsrep_hton; + +#ifdef HAVE_PSI_INTERFACE +extern PSI_mutex_key key_LOCK_wsrep_thd; +extern PSI_cond_key key_COND_wsrep_thd; +extern PSI_mutex_key key_LOCK_wsrep_ready; +extern PSI_mutex_key key_COND_wsrep_ready; +extern PSI_mutex_key key_LOCK_wsrep_sst; +extern PSI_cond_key key_COND_wsrep_sst; +extern PSI_mutex_key key_LOCK_wsrep_sst_init; +extern PSI_cond_key key_COND_wsrep_sst_init; +extern PSI_mutex_key key_LOCK_wsrep_sst_thread; +extern PSI_cond_key key_COND_wsrep_sst_thread; +extern PSI_mutex_key key_LOCK_wsrep_rollback; +extern PSI_cond_key key_COND_wsrep_rollback; +extern PSI_mutex_key key_LOCK_wsrep_replaying; +extern PSI_cond_key key_COND_wsrep_replaying; +extern PSI_mutex_key key_LOCK_wsrep_slave_threads; +extern PSI_mutex_key key_LOCK_wsrep_desync; +#endif /* HAVE_PSI_INTERFACE */ +struct TABLE_LIST; +int wsrep_to_isolation_begin(THD *thd, char *db_, char *table_, + const TABLE_LIST* table_list); +void wsrep_to_isolation_end(THD *thd); +void wsrep_cleanup_transaction(THD *thd); +int wsrep_to_buf_helper( + THD* thd, const char *query, uint query_len, uchar** buf, size_t* buf_len); +int wsrep_create_sp(THD *thd, uchar** buf, size_t* buf_len); +int wsrep_create_trigger_query(THD *thd, uchar** buf, size_t* buf_len); +int wsrep_create_event_query(THD *thd, uchar** buf, size_t* buf_len); +int wsrep_alter_event_query(THD *thd, uchar** buf, size_t* buf_len); + +struct xid_t; +void wsrep_get_SE_checkpoint(xid_t*); +void wsrep_set_SE_checkpoint(xid_t*); +void wsrep_init_sidno(const wsrep_uuid_t&); +void wsrep_xid_init(xid_t*, const wsrep_uuid_t*, wsrep_seqno_t); +const wsrep_uuid_t* wsrep_xid_uuid(const xid_t*); +wsrep_seqno_t wsrep_xid_seqno(const xid_t*); + +extern bool +wsrep_grant_mdl_exception(MDL_context *requestor_ctx, + MDL_ticket *ticket); +IO_CACHE * get_trans_log(THD * thd); +bool wsrep_trans_cache_is_empty(THD *thd); +void thd_binlog_flush_pending_rows_event(THD *thd, bool stmt_end); +void thd_binlog_rollback_stmt(THD * thd); +void thd_binlog_trx_reset(THD * thd); + +typedef void (*wsrep_thd_processor_fun)(THD *); +pthread_handler_t start_wsrep_THD(void *arg); +int wsrep_wait_committing_connections_close(int wait_time); +void wsrep_close_client_connections(my_bool wait_to_end); +void wsrep_close_applier(THD *thd); +void wsrep_close_applier_threads(int count); +void wsrep_wait_appliers_close(THD *thd); +void wsrep_kill_mysql(THD *thd); +void wsrep_close_threads(THD *thd); +int wsrep_create_sp(THD *thd, uchar** buf, size_t* buf_len); +void wsrep_copy_query(THD *thd); +bool wsrep_is_show_query(enum enum_sql_command command); +void wsrep_replay_transaction(THD *thd); +bool wsrep_create_like_table(THD* thd, TABLE_LIST* table, + TABLE_LIST* src_table, + HA_CREATE_INFO *create_info); +int wsrep_create_trigger_query(THD *thd, uchar** buf, size_t* buf_len); + +#else /* WITH_WSREP */ + +#define WSREP(T) (0) +#define WSREP_ON (0) +#define WSREP_EMULATE_BINLOG(thd) (0) +#define WSREP_CLIENT(thd) (0) +#define WSREP_FORMAT(my_format) ((ulong)my_format) +#define wsrep_emulate_bin_log (0) +#define wsrep_xid_seqno(X) (0) +#define wsrep_to_isolation (0) +#define wsrep_recovery (0) +#define wsrep_init() (1) +#define wsrep_prepend_PATH(X) +#define wsrep_before_SE() (0) +#define wsrep_init_startup(X) +#define wsrep_sync_wait(...) (0) +#define wsrep_to_isolation_begin(...) (0) +#define wsrep_register_hton(...) do { } while(0) +#define wsrep_check_opts() (0) +#define wsrep_stop_replication(X) do { } while(0) +#define wsrep_inited (0) +#define wsrep_deinit(X) do { } while(0) +#define wsrep_recover() do { } while(0) +#define wsrep_slave_threads (1) +#define wsrep_replicate_myisam (0) + +#endif /* WITH_WSREP */ +#endif /* WSREP_MYSQLD_H */ diff --git a/sql/wsrep_notify.cc b/sql/wsrep_notify.cc new file mode 100644 index 00000000000..6eefb961b62 --- /dev/null +++ b/sql/wsrep_notify.cc @@ -0,0 +1,111 @@ +/* Copyright 2010 Codership Oy <http://www.codership.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include <mysqld.h> +#include "wsrep_priv.h" +#include "wsrep_utils.h" + +const char* wsrep_notify_cmd=""; + +static const char* _status_str(wsrep_member_status_t status) +{ + switch (status) + { + case WSREP_MEMBER_UNDEFINED: return "Undefined"; + case WSREP_MEMBER_JOINER: return "Joiner"; + case WSREP_MEMBER_DONOR: return "Donor"; + case WSREP_MEMBER_JOINED: return "Joined"; + case WSREP_MEMBER_SYNCED: return "Synced"; + default: return "Error(?)"; + } +} + +void wsrep_notify_status (wsrep_member_status_t status, + const wsrep_view_info_t* view) +{ + if (!wsrep_notify_cmd || 0 == strlen(wsrep_notify_cmd)) + { + WSREP_INFO("wsrep_notify_cmd is not defined, skipping notification."); + return; + } + + char cmd_buf[1 << 16]; // this can be long + long cmd_len = sizeof(cmd_buf) - 1; + char* cmd_ptr = cmd_buf; + long cmd_off = 0; + + cmd_off += snprintf (cmd_ptr + cmd_off, cmd_len - cmd_off, "%s", + wsrep_notify_cmd); + + if (status >= WSREP_MEMBER_UNDEFINED && status < WSREP_MEMBER_ERROR) + { + cmd_off += snprintf (cmd_ptr + cmd_off, cmd_len - cmd_off, " --status %s", + _status_str(status)); + } + else + { + /* here we preserve provider error codes */ + cmd_off += snprintf (cmd_ptr + cmd_off, cmd_len - cmd_off, + " --status 'Error(%d)'", status); + } + + if (0 != view) + { + char uuid_str[40]; + + wsrep_uuid_print (&view->state_id.uuid, uuid_str, sizeof(uuid_str)); + cmd_off += snprintf (cmd_ptr + cmd_off, cmd_len - cmd_off, + " --uuid %s", uuid_str); + + cmd_off += snprintf (cmd_ptr + cmd_off, cmd_len - cmd_off, + " --primary %s", view->view >= 0 ? "yes" : "no"); + + cmd_off += snprintf (cmd_ptr + cmd_off, cmd_len - cmd_off, + " --index %d", view->my_idx); + + if (view->memb_num) + { + cmd_off += snprintf (cmd_ptr + cmd_off, cmd_len - cmd_off, " --members"); + + for (int i = 0; i < view->memb_num; i++) + { + wsrep_uuid_print (&view->members[i].id, uuid_str, sizeof(uuid_str)); + cmd_off += snprintf (cmd_ptr + cmd_off, cmd_len - cmd_off, + "%c%s/%s/%s", i > 0 ? ',' : ' ', + uuid_str, view->members[i].name, + view->members[i].incoming); + } + } + } + + if (cmd_off == cmd_len) + { + WSREP_ERROR("Notification buffer too short (%ld). Aborting notification.", + cmd_len); + return; + } + + wsp::process p(cmd_ptr, "r"); + + p.wait(); + int err = p.error(); + + if (err) + { + WSREP_ERROR("Notification command failed: %d (%s): \"%s\"", + err, strerror(err), cmd_ptr); + } +} + diff --git a/sql/wsrep_priv.h b/sql/wsrep_priv.h new file mode 100644 index 00000000000..5c66587d757 --- /dev/null +++ b/sql/wsrep_priv.h @@ -0,0 +1,53 @@ +/* Copyright 2010 Codership Oy <http://www.codership.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +//! @file declares symbols private to wsrep integration layer + +#ifndef WSREP_PRIV_H +#define WSREP_PRIV_H + +#include "wsrep_mysqld.h" +#include "../wsrep/wsrep_api.h" + +#include <log.h> +#include <pthread.h> +#include <cstdio> + +void wsrep_ready_set (my_bool x); + +ssize_t wsrep_sst_prepare (void** msg); +wsrep_cb_status wsrep_sst_donate_cb (void* app_ctx, + void* recv_ctx, + const void* msg, size_t msg_len, + const wsrep_gtid_t* current_id, + const char* state, size_t state_len, + bool bypass); +extern unsigned int wsrep_check_ip (const char* addr); +extern size_t wsrep_guess_ip (char* buf, size_t buf_len); +extern size_t wsrep_guess_address(char* buf, size_t buf_len); + +extern wsrep_uuid_t local_uuid; +extern wsrep_seqno_t local_seqno; + +// a helper function +extern void wsrep_sst_received(wsrep_t*, const wsrep_uuid_t*, wsrep_seqno_t, + const void*, size_t); +/*! SST thread signals init thread about sst completion */ +extern void wsrep_sst_complete(const wsrep_uuid_t*, wsrep_seqno_t, bool); + +void wsrep_notify_status (wsrep_member_status_t new_status, + const wsrep_view_info_t* view = 0); +#endif /* WSREP_PRIV_H */ diff --git a/sql/wsrep_sst.cc b/sql/wsrep_sst.cc new file mode 100644 index 00000000000..17943308c37 --- /dev/null +++ b/sql/wsrep_sst.cc @@ -0,0 +1,1152 @@ +/* Copyright 2008-2012 Codership Oy <http://www.codership.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include "wsrep_sst.h" + +#include <mysqld.h> +#include <m_ctype.h> +#include <my_sys.h> +#include <strfunc.h> +#include <sql_class.h> +#include <set_var.h> +#include <sql_acl.h> +#include <sql_reload.h> +#include <sql_parse.h> +#include "wsrep_priv.h" +#include "wsrep_utils.h" +#include <cstdio> +#include <cstdlib> + +char wsrep_defaults_file[FN_REFLEN * 2 + 10 + + sizeof(WSREP_SST_OPT_CONF) + + sizeof(WSREP_SST_OPT_EXTRA_CONF)] = {0}; + +const char* wsrep_sst_method = WSREP_SST_DEFAULT; +const char* wsrep_sst_receive_address = WSREP_SST_ADDRESS_AUTO; +const char* wsrep_sst_donor = ""; + char* wsrep_sst_auth = NULL; + +// container for real auth string +static const char* sst_auth_real = NULL; +my_bool wsrep_sst_donor_rejects_queries = FALSE; + +bool wsrep_sst_method_check (sys_var *self, THD* thd, set_var* var) +{ + if ((! var->save_result.string_value.str) || + (var->save_result.string_value.length == 0 )) + { + my_error(ER_WRONG_VALUE_FOR_VAR, MYF(0), var->var->name.str, + var->save_result.string_value.str ? + var->save_result.string_value.str : "NULL"); + return 1; + } + + return 0; +} + +bool wsrep_sst_method_update (sys_var *self, THD* thd, enum_var_type type) +{ + return 0; +} + + +static void make_wsrep_defaults_file() +{ + if (!wsrep_defaults_file[0]) + { + char *ptr= wsrep_defaults_file; + char *end= ptr + sizeof(wsrep_defaults_file); + if (my_defaults_file) + ptr= strxnmov(ptr, end - ptr, + WSREP_SST_OPT_CONF, " '", my_defaults_file, "' ", NULL); + + if (my_defaults_extra_file) + ptr= strxnmov(ptr, end - ptr, + WSREP_SST_OPT_EXTRA_CONF, " '", my_defaults_extra_file, "' ", NULL); + } +} + + +// TODO: Improve address verification. +static bool sst_receive_address_check (const char* str) +{ + if (!strncasecmp(str, "127.0.0.1", strlen("127.0.0.1")) || + !strncasecmp(str, "localhost", strlen("localhost"))) + { + return 1; + } + + return 0; +} + +bool wsrep_sst_receive_address_check (sys_var *self, THD* thd, set_var* var) +{ + char addr_buf[FN_REFLEN]; + + if ((! var->save_result.string_value.str) || + (var->save_result.string_value.length > (FN_REFLEN - 1))) // safety + { + goto err; + } + + memcpy(addr_buf, var->save_result.string_value.str, + var->save_result.string_value.length); + addr_buf[var->save_result.string_value.length]= 0; + + if (sst_receive_address_check(addr_buf)) + { + goto err; + } + + return 0; + +err: + my_error(ER_WRONG_VALUE_FOR_VAR, MYF(0), var->var->name.str, + var->save_result.string_value.str ? + var->save_result.string_value.str : "NULL"); + return 1; +} + +bool wsrep_sst_receive_address_update (sys_var *self, THD* thd, + enum_var_type type) +{ + return 0; +} + +bool wsrep_sst_auth_check (sys_var *self, THD* thd, set_var* var) +{ + return 0; +} + +static bool sst_auth_real_set (const char* value) +{ + const char* v= NULL; + + if (value) + { + v= my_strdup(value, MYF(0)); + } + else // its NULL + { + wsrep_sst_auth_free(); + return 0; + } + + if (v) + { + // set sst_auth_real + if (sst_auth_real) { my_free((void *) sst_auth_real); } + sst_auth_real = v; + + // mask wsrep_sst_auth + if (strlen(sst_auth_real)) + { + if (wsrep_sst_auth) { my_free((void*) wsrep_sst_auth); } + wsrep_sst_auth= my_strdup(WSREP_SST_AUTH_MASK, MYF(0)); + } + return 0; + } + return 1; +} + +void wsrep_sst_auth_free() +{ + if (wsrep_sst_auth) { my_free((void *) wsrep_sst_auth); } + if (sst_auth_real) { my_free((void *) sst_auth_real); } + wsrep_sst_auth= NULL; + sst_auth_real= NULL; +} + +bool wsrep_sst_auth_update (sys_var *self, THD* thd, enum_var_type type) +{ + return sst_auth_real_set (wsrep_sst_auth); +} + +void wsrep_sst_auth_init (const char* value) +{ + if (wsrep_sst_auth == value) wsrep_sst_auth = NULL; + if (value) sst_auth_real_set (value); +} + +bool wsrep_sst_donor_check (sys_var *self, THD* thd, set_var* var) +{ + return 0; +} + +bool wsrep_sst_donor_update (sys_var *self, THD* thd, enum_var_type type) +{ + return 0; +} + +static wsrep_uuid_t cluster_uuid = WSREP_UUID_UNDEFINED; + +bool wsrep_before_SE() +{ + return (wsrep_provider != NULL + && strcmp (wsrep_provider, WSREP_NONE) + && strcmp (wsrep_sst_method, WSREP_SST_SKIP) + && strcmp (wsrep_sst_method, WSREP_SST_MYSQLDUMP)); +} + +static bool sst_complete = false; +static bool sst_needed = false; + +void wsrep_sst_grab () +{ + WSREP_INFO("wsrep_sst_grab()"); + if (mysql_mutex_lock (&LOCK_wsrep_sst)) abort(); + sst_complete = false; + mysql_mutex_unlock (&LOCK_wsrep_sst); +} + +// Wait for end of SST +bool wsrep_sst_wait () +{ + if (mysql_mutex_lock (&LOCK_wsrep_sst)) abort(); + while (!sst_complete) + { + WSREP_INFO("Waiting for SST to complete."); + mysql_cond_wait (&COND_wsrep_sst, &LOCK_wsrep_sst); + } + + if (local_seqno >= 0) + { + WSREP_INFO("SST complete, seqno: %lld", (long long) local_seqno); + } + else + { + WSREP_ERROR("SST failed: %d (%s)", + int(-local_seqno), strerror(-local_seqno)); + } + + mysql_mutex_unlock (&LOCK_wsrep_sst); + + return (local_seqno >= 0); +} + +// Signal end of SST +void wsrep_sst_complete (const wsrep_uuid_t* sst_uuid, + wsrep_seqno_t sst_seqno, + bool needed) +{ + if (mysql_mutex_lock (&LOCK_wsrep_sst)) abort(); + if (!sst_complete) + { + sst_complete = true; + sst_needed = needed; + local_uuid = *sst_uuid; + local_seqno = sst_seqno; + mysql_cond_signal (&COND_wsrep_sst); + } + else + { + /* This can happen when called from wsrep_synced_cb(). + At the moment there is no way to check there + if main thread is still waiting for signal, + so wsrep_sst_complete() is called from there + each time wsrep_ready changes from FALSE -> TRUE. + */ + WSREP_DEBUG("Nobody is waiting for SST."); + } + mysql_mutex_unlock (&LOCK_wsrep_sst); +} + +void wsrep_sst_received (wsrep_t* const wsrep, + const wsrep_uuid_t* const uuid, + wsrep_seqno_t const seqno, + const void* const state, + size_t const state_len) +{ + int const rcode(seqno < 0 ? seqno : 0); + wsrep_gtid_t const state_id = { + *uuid, (rcode ? WSREP_SEQNO_UNDEFINED : seqno) + }; +#ifdef GTID_SUPPORT + wsrep_init_sidno(state_id.uuid); +#endif /* GTID_SUPPORT */ + wsrep->sst_received(wsrep, &state_id, state, state_len, rcode); +} + +// Let applier threads to continue +void wsrep_sst_continue () +{ + if (sst_needed) + { + WSREP_INFO("Signalling provider to continue."); + wsrep_sst_received (wsrep, &local_uuid, local_seqno, NULL, 0); + } +} + +struct sst_thread_arg +{ + const char* cmd; + int err; + char* ret_str; + mysql_mutex_t lock; + mysql_cond_t cond; + + sst_thread_arg (const char* c) : cmd(c), err(-1), ret_str(0) + { + mysql_mutex_init(key_LOCK_wsrep_sst_thread, &lock, MY_MUTEX_INIT_FAST); + mysql_cond_init(key_COND_wsrep_sst_thread, &cond, NULL); + } + + ~sst_thread_arg() + { + mysql_cond_destroy (&cond); + mysql_mutex_unlock (&lock); + mysql_mutex_destroy (&lock); + } +}; + +static int sst_scan_uuid_seqno (const char* str, + wsrep_uuid_t* uuid, wsrep_seqno_t* seqno) +{ + int offt = wsrep_uuid_scan (str, strlen(str), uuid); + if (offt > 0 && strlen(str) > (unsigned int)offt && ':' == str[offt]) + { + *seqno = strtoll (str + offt + 1, NULL, 10); + if (*seqno != LLONG_MAX || errno != ERANGE) + { + return 0; + } + } + + WSREP_ERROR("Failed to parse uuid:seqno pair: '%s'", str); + return EINVAL; +} + +// get rid of trailing \n +static char* my_fgets (char* buf, size_t buf_len, FILE* stream) +{ + char* ret= fgets (buf, buf_len, stream); + + if (ret) + { + size_t len = strlen(ret); + if (len > 0 && ret[len - 1] == '\n') ret[len - 1] = '\0'; + } + + return ret; +} + +/* + Generate opt_binlog_opt_val for sst_donate_other(), sst_prepare_other(). + + Returns zero on success, negative error code otherwise. + + String containing binlog name is stored in param ret if binlog is enabled + and GTID mode is on, otherwise empty string. Returned string should be + freed with my_free(). + */ +static int generate_binlog_opt_val(char** ret) +{ + DBUG_ASSERT(ret); + *ret= NULL; + if (opt_bin_log) + { + assert(opt_bin_logname); + *ret= strcmp(opt_bin_logname, "0") ? + my_strdup(opt_bin_logname, MYF(0)) : my_strdup("", MYF(0)); + } + else + { + *ret= my_strdup("", MYF(0)); + } + if (!*ret) return -ENOMEM; + return 0; +} + +static void* sst_joiner_thread (void* a) +{ + sst_thread_arg* arg= (sst_thread_arg*) a; + int err= 1; + + { + const char magic[] = "ready"; + const size_t magic_len = sizeof(magic) - 1; + const size_t out_len = 512; + char out[out_len]; + + WSREP_INFO("Running: '%s'", arg->cmd); + + wsp::process proc (arg->cmd, "r"); + + if (proc.pipe() && !proc.error()) + { + const char* tmp= my_fgets (out, out_len, proc.pipe()); + + if (!tmp || strlen(tmp) < (magic_len + 2) || + strncasecmp (tmp, magic, magic_len)) + { + WSREP_ERROR("Failed to read '%s <addr>' from: %s\n\tRead: '%s'", + magic, arg->cmd, tmp); + proc.wait(); + if (proc.error()) err = proc.error(); + } + else + { + err = 0; + } + } + else + { + err = proc.error(); + WSREP_ERROR("Failed to execute: %s : %d (%s)", + arg->cmd, err, strerror(err)); + } + + // signal sst_prepare thread with ret code, + // it will go on sending SST request + mysql_mutex_lock (&arg->lock); + if (!err) + { + arg->ret_str = strdup (out + magic_len + 1); + if (!arg->ret_str) err = ENOMEM; + } + arg->err = -err; + mysql_cond_signal (&arg->cond); + mysql_mutex_unlock (&arg->lock); //! @note arg is unusable after that. + + if (err) return NULL; /* lp:808417 - return immediately, don't signal + * initializer thread to ensure single thread of + * shutdown. */ + + wsrep_uuid_t ret_uuid = WSREP_UUID_UNDEFINED; + wsrep_seqno_t ret_seqno = WSREP_SEQNO_UNDEFINED; + + // in case of successfull receiver start, wait for SST completion/end + char* tmp = my_fgets (out, out_len, proc.pipe()); + + proc.wait(); + err= EINVAL; + + if (!tmp) + { + WSREP_ERROR("Failed to read uuid:seqno from joiner script."); + if (proc.error()) err = proc.error(); + } + else + { + err= sst_scan_uuid_seqno (out, &ret_uuid, &ret_seqno); + } + + if (err) + { + ret_uuid= WSREP_UUID_UNDEFINED; + ret_seqno= -err; + } + + // Tell initializer thread that SST is complete + wsrep_sst_complete (&ret_uuid, ret_seqno, true); + } + + return NULL; +} + +static ssize_t sst_prepare_other (const char* method, + const char* addr_in, + const char** addr_out) +{ + ssize_t cmd_len= 1024; + char cmd_str[cmd_len]; + const char* sst_dir= mysql_real_data_home; + const char* binlog_opt= ""; + char* binlog_opt_val= NULL; + + int ret; + if ((ret= generate_binlog_opt_val(&binlog_opt_val))) + { + WSREP_ERROR("sst_prepare_other(): generate_binlog_opt_val() failed: %d", + ret); + return ret; + } + if (strlen(binlog_opt_val)) binlog_opt= WSREP_SST_OPT_BINLOG; + + make_wsrep_defaults_file(); + + ret= snprintf (cmd_str, cmd_len, + "wsrep_sst_%s " + WSREP_SST_OPT_ROLE" 'joiner' " + WSREP_SST_OPT_ADDR" '%s' " + WSREP_SST_OPT_AUTH" '%s' " + WSREP_SST_OPT_DATA" '%s' " + " %s " + WSREP_SST_OPT_PARENT" '%d'" + " %s '%s' ", + method, addr_in, (sst_auth_real) ? sst_auth_real : "", + sst_dir, wsrep_defaults_file, (int)getpid(), + binlog_opt, binlog_opt_val); + my_free(binlog_opt_val); + + if (ret < 0 || ret >= cmd_len) + { + WSREP_ERROR("sst_prepare_other(): snprintf() failed: %d", ret); + return (ret < 0 ? ret : -EMSGSIZE); + } + + pthread_t tmp; + sst_thread_arg arg(cmd_str); + mysql_mutex_lock (&arg.lock); + ret = pthread_create (&tmp, NULL, sst_joiner_thread, &arg); + if (ret) + { + WSREP_ERROR("sst_prepare_other(): pthread_create() failed: %d (%s)", + ret, strerror(ret)); + return ret; + } + mysql_cond_wait (&arg.cond, &arg.lock); + + *addr_out= arg.ret_str; + + if (!arg.err) + ret = strlen(*addr_out); + else + { + assert (arg.err < 0); + ret = arg.err; + } + + pthread_detach (tmp); + + return ret; +} + +extern uint mysqld_port; + +/*! Just tells donor where to send mysqldump */ +static ssize_t sst_prepare_mysqldump (const char* addr_in, + const char** addr_out) +{ + ssize_t ret = strlen (addr_in); + + if (!strrchr(addr_in, ':')) + { + ssize_t s = ret + 7; + char* tmp = (char*) malloc (s); + + if (tmp) + { + ret= snprintf (tmp, s, "%s:%u", addr_in, mysqld_port); + + if (ret > 0 && ret < s) + { + *addr_out= tmp; + return ret; + } + if (ret > 0) /* buffer too short */ ret = -EMSGSIZE; + free (tmp); + } + else { + ret= -ENOMEM; + } + + WSREP_ERROR ("Could not prepare state transfer request: " + "adding default port failed: %zd.", ret); + } + else { + *addr_out= addr_in; + } + + return ret; +} + +static bool SE_initialized = false; + +ssize_t wsrep_sst_prepare (void** msg) +{ + const ssize_t ip_max= 256; + char ip_buf[ip_max]; + const char* addr_in= NULL; + const char* addr_out= NULL; + + if (!strcmp(wsrep_sst_method, WSREP_SST_SKIP)) + { + ssize_t ret = strlen(WSREP_STATE_TRANSFER_TRIVIAL) + 1; + *msg = strdup(WSREP_STATE_TRANSFER_TRIVIAL); + if (!msg) + { + WSREP_ERROR("Could not allocate %zd bytes for state request", ret); + unireg_abort(1); + } + return ret; + } + + // Figure out SST address. Common for all SST methods + if (wsrep_sst_receive_address && + strcmp (wsrep_sst_receive_address, WSREP_SST_ADDRESS_AUTO)) + { + addr_in= wsrep_sst_receive_address; + } + else if (wsrep_node_address && strlen(wsrep_node_address)) + { + const char* const colon= strchr (wsrep_node_address, ':'); + if (colon) + { + ptrdiff_t const len= colon - wsrep_node_address; + strncpy (ip_buf, wsrep_node_address, len); + ip_buf[len]= '\0'; + addr_in= ip_buf; + } + else + { + addr_in= wsrep_node_address; + } + } + else + { + ssize_t ret= wsrep_guess_ip (ip_buf, ip_max); + + if (ret && ret < ip_max) + { + addr_in= ip_buf; + } + else + { + WSREP_ERROR("Could not prepare state transfer request: " + "failed to guess address to accept state transfer at. " + "wsrep_sst_receive_address must be set manually."); + unireg_abort(1); + } + } + + ssize_t addr_len= -ENOSYS; + if (!strcmp(wsrep_sst_method, WSREP_SST_MYSQLDUMP)) + { + addr_len= sst_prepare_mysqldump (addr_in, &addr_out); + if (addr_len < 0) unireg_abort(1); + } + else + { + /*! A heuristic workaround until we learn how to stop and start engines */ + if (SE_initialized) + { + // we already did SST at initializaiton, now engines are running + // sql_print_information() is here because the message is too long + // for WSREP_INFO. + sql_print_information ("WSREP: " + "You have configured '%s' state snapshot transfer method " + "which cannot be performed on a running server. " + "Wsrep provider won't be able to fall back to it " + "if other means of state transfer are unavailable. " + "In that case you will need to restart the server.", + wsrep_sst_method); + *msg = 0; + return 0; + } + + addr_len = sst_prepare_other (wsrep_sst_method, addr_in, &addr_out); + if (addr_len < 0) + { + WSREP_ERROR("Failed to prepare for '%s' SST. Unrecoverable.", + wsrep_sst_method); + unireg_abort(1); + } + } + + size_t const method_len(strlen(wsrep_sst_method)); + size_t const msg_len (method_len + addr_len + 2 /* + auth_len + 1*/); + + *msg = malloc (msg_len); + if (NULL != *msg) { + char* const method_ptr(reinterpret_cast<char*>(*msg)); + strcpy (method_ptr, wsrep_sst_method); + char* const addr_ptr(method_ptr + method_len + 1); + strcpy (addr_ptr, addr_out); + + WSREP_INFO ("Prepared SST request: %s|%s", method_ptr, addr_ptr); + } + else { + WSREP_ERROR("Failed to allocate SST request of size %zu. Can't continue.", + msg_len); + unireg_abort(1); + } + + if (addr_out != addr_in) /* malloc'ed */ free ((char*)addr_out); + + return msg_len; +} + +// helper method for donors +static int sst_run_shell (const char* cmd_str, int max_tries) +{ + int ret = 0; + + for (int tries=1; tries <= max_tries; tries++) + { + wsp::process proc (cmd_str, "r"); + + if (NULL != proc.pipe()) + { + proc.wait(); + } + + if ((ret = proc.error())) + { + WSREP_ERROR("Try %d/%d: '%s' failed: %d (%s)", + tries, max_tries, proc.cmd(), ret, strerror(ret)); + sleep (1); + } + else + { + WSREP_DEBUG("SST script successfully completed."); + break; + } + } + + return -ret; +} + +static void sst_reject_queries(my_bool close_conn) +{ + wsrep_ready_set (FALSE); // this will be resotred when donor becomes synced + WSREP_INFO("Rejecting client queries for the duration of SST."); + if (TRUE == close_conn) wsrep_close_client_connections(FALSE); +} + +static int sst_mysqldump_check_addr (const char* user, const char* pswd, + const char* host, const char* port) +{ + return 0; +} + +static int sst_donate_mysqldump (const char* addr, + const wsrep_uuid_t* uuid, + const char* uuid_str, + wsrep_seqno_t seqno, + bool bypass) +{ + size_t host_len; + const char* port = strchr (addr, ':'); + + if (port) + { + port += 1; + host_len = port - addr; + } + else + { + port = ""; + host_len = strlen (addr) + 1; + } + + char host[host_len]; + + strncpy (host, addr, host_len - 1); + host[host_len - 1] = '\0'; + + const char* auth = sst_auth_real; + const char* pswd = (auth) ? strchr (auth, ':') : NULL; + size_t user_len; + + if (pswd) + { + pswd += 1; + user_len = pswd - auth; + } + else + { + pswd = ""; + user_len = (auth) ? strlen (auth) + 1 : 1; + } + + char user[user_len]; + + strncpy (user, (auth) ? auth : "", user_len - 1); + user[user_len - 1] = '\0'; + + int ret = sst_mysqldump_check_addr (user, pswd, host, port); + if (!ret) + { + size_t cmd_len= 1024; + char cmd_str[cmd_len]; + + if (!bypass && wsrep_sst_donor_rejects_queries) sst_reject_queries(TRUE); + + make_wsrep_defaults_file(); + + snprintf (cmd_str, cmd_len, + "wsrep_sst_mysqldump " + WSREP_SST_OPT_USER" '%s' " + WSREP_SST_OPT_PSWD" '%s' " + WSREP_SST_OPT_HOST" '%s' " + WSREP_SST_OPT_PORT" '%s' " + WSREP_SST_OPT_LPORT" '%u' " + WSREP_SST_OPT_SOCKET" '%s' " + " %s " + WSREP_SST_OPT_GTID" '%s:%lld'" + "%s", + user, pswd, host, port, mysqld_port, mysqld_unix_port, + wsrep_defaults_file, uuid_str, + (long long)seqno, bypass ? " "WSREP_SST_OPT_BYPASS : ""); + + WSREP_DEBUG("Running: '%s'", cmd_str); + + ret= sst_run_shell (cmd_str, 3); + } + + wsrep_gtid_t const state_id = { *uuid, (ret ? WSREP_SEQNO_UNDEFINED : seqno)}; + + wsrep->sst_sent (wsrep, &state_id, ret); + + return ret; +} + +wsrep_seqno_t wsrep_locked_seqno= WSREP_SEQNO_UNDEFINED; + +static int run_sql_command(THD *thd, const char *query) +{ + thd->set_query((char *)query, strlen(query)); + + Parser_state ps; + if (ps.init(thd, thd->query(), thd->query_length())) + { + WSREP_ERROR("SST query: %s failed", query); + return -1; + } + + mysql_parse(thd, thd->query(), thd->query_length(), &ps); + if (thd->is_error()) + { + int const err= thd->get_stmt_da()->sql_errno(); + WSREP_WARN ("error executing '%s': %d (%s)%s", + query, err, thd->get_stmt_da()->message(), + err == ER_UNKNOWN_SYSTEM_VARIABLE ? + ". Was mysqld built with --with-innodb-disallow-writes ?" : ""); + thd->clear_error(); + return -1; + } + return 0; +} + +static int sst_flush_tables(THD* thd) +{ + WSREP_INFO("Flushing tables for SST..."); + + int err; + int not_used; + CHARSET_INFO *current_charset; + + current_charset = thd->variables.character_set_client; + + if (!is_supported_parser_charset(current_charset)) + { + /* Do not use non-supported parser character sets */ + WSREP_WARN("Current client character set is non-supported parser character set: %s", current_charset->csname); + thd->variables.character_set_client = &my_charset_latin1; + WSREP_WARN("For SST temporally setting character set to : %s", + my_charset_latin1.csname); + } + + if (run_sql_command(thd, "FLUSH TABLES WITH READ LOCK")) + { + WSREP_ERROR("Failed to flush and lock tables"); + err = -1; + } + else + { + /* make sure logs are flushed after global read lock acquired */ + err= reload_acl_and_cache(thd, REFRESH_ENGINE_LOG | REFRESH_BINARY_LOG, + (TABLE_LIST*) 0, ¬_used); + } + + thd->variables.character_set_client = current_charset; + + + if (err) + { + WSREP_ERROR("Failed to flush tables: %d (%s)", err, strerror(err)); + } + else + { + WSREP_INFO("Tables flushed."); + const char base_name[]= "tables_flushed"; + ssize_t const full_len= strlen(mysql_real_data_home) + strlen(base_name)+2; + char real_name[full_len]; + sprintf(real_name, "%s/%s", mysql_real_data_home, base_name); + char tmp_name[full_len + 4]; + sprintf(tmp_name, "%s.tmp", real_name); + + FILE* file= fopen(tmp_name, "w+"); + if (0 == file) + { + err= errno; + WSREP_ERROR("Failed to open '%s': %d (%s)", tmp_name, err,strerror(err)); + } + else + { + fprintf(file, "%s:%lld\n", + wsrep_cluster_state_uuid, (long long)wsrep_locked_seqno); + fsync(fileno(file)); + fclose(file); + if (rename(tmp_name, real_name) == -1) + { + err= errno; + WSREP_ERROR("Failed to rename '%s' to '%s': %d (%s)", + tmp_name, real_name, err,strerror(err)); + } + } + } + + return err; +} + +static void sst_disallow_writes (THD* thd, bool yes) +{ + char query_str[64] = { 0, }; + ssize_t const query_max = sizeof(query_str) - 1; + CHARSET_INFO *current_charset; + + current_charset = thd->variables.character_set_client; + + if (!is_supported_parser_charset(current_charset)) + { + /* Do not use non-supported parser character sets */ + WSREP_WARN("Current client character set is non-supported parser character set: %s", current_charset->csname); + thd->variables.character_set_client = &my_charset_latin1; + WSREP_WARN("For SST temporally setting character set to : %s", + my_charset_latin1.csname); + } + + snprintf (query_str, query_max, "SET GLOBAL innodb_disallow_writes=%d", + yes ? 1 : 0); + + if (run_sql_command(thd, query_str)) + { + WSREP_ERROR("Failed to disallow InnoDB writes"); + } + thd->variables.character_set_client = current_charset; +} + +static void* sst_donor_thread (void* a) +{ + sst_thread_arg* arg= (sst_thread_arg*)a; + + WSREP_INFO("Running: '%s'", arg->cmd); + + int err= 1; + bool locked= false; + + const char* out= NULL; + const size_t out_len= 128; + char out_buf[out_len]; + + wsrep_uuid_t ret_uuid= WSREP_UUID_UNDEFINED; + wsrep_seqno_t ret_seqno= WSREP_SEQNO_UNDEFINED; // seqno of complete SST + + wsp::thd thd(FALSE); // we turn off wsrep_on for this THD so that it can + // operate with wsrep_ready == OFF + wsp::process proc(arg->cmd, "r"); + + err= proc.error(); + +/* Inform server about SST script startup and release TO isolation */ + mysql_mutex_lock (&arg->lock); + arg->err = -err; + mysql_cond_signal (&arg->cond); + mysql_mutex_unlock (&arg->lock); //! @note arg is unusable after that. + + if (proc.pipe() && !err) + { +wait_signal: + out= my_fgets (out_buf, out_len, proc.pipe()); + + if (out) + { + const char magic_flush[]= "flush tables"; + const char magic_cont[]= "continue"; + const char magic_done[]= "done"; + + if (!strcasecmp (out, magic_flush)) + { + err= sst_flush_tables (thd.ptr); + if (!err) + { + sst_disallow_writes (thd.ptr, true); + locked= true; + goto wait_signal; + } + } + else if (!strcasecmp (out, magic_cont)) + { + if (locked) + { + sst_disallow_writes (thd.ptr, false); + thd.ptr->global_read_lock.unlock_global_read_lock (thd.ptr); + locked= false; + } + err= 0; + goto wait_signal; + } + else if (!strncasecmp (out, magic_done, strlen(magic_done))) + { + err= sst_scan_uuid_seqno (out + strlen(magic_done) + 1, + &ret_uuid, &ret_seqno); + } + else + { + WSREP_WARN("Received unknown signal: '%s'", out); + } + } + else + { + WSREP_ERROR("Failed to read from: %s", proc.cmd()); + proc.wait(); + } + if (!err && proc.error()) err= proc.error(); + } + else + { + WSREP_ERROR("Failed to execute: %s : %d (%s)", + proc.cmd(), err, strerror(err)); + } + + if (locked) // don't forget to unlock server before return + { + sst_disallow_writes (thd.ptr, false); + thd.ptr->global_read_lock.unlock_global_read_lock (thd.ptr); + } + + // signal to donor that SST is over + struct wsrep_gtid const state_id = { + ret_uuid, err ? WSREP_SEQNO_UNDEFINED : ret_seqno + }; + wsrep->sst_sent (wsrep, &state_id, -err); + proc.wait(); + + return NULL; +} + + + +static int sst_donate_other (const char* method, + const char* addr, + const char* uuid, + wsrep_seqno_t seqno, + bool bypass) +{ + ssize_t cmd_len = 4096; + char cmd_str[cmd_len]; + const char* binlog_opt= ""; + char* binlog_opt_val= NULL; + + int ret; + if ((ret= generate_binlog_opt_val(&binlog_opt_val))) + { + WSREP_ERROR("sst_donate_other(): generate_binlog_opt_val() failed: %d",ret); + return ret; + } + if (strlen(binlog_opt_val)) binlog_opt= WSREP_SST_OPT_BINLOG; + + make_wsrep_defaults_file(); + + ret= snprintf (cmd_str, cmd_len, + "wsrep_sst_%s " + WSREP_SST_OPT_ROLE" 'donor' " + WSREP_SST_OPT_ADDR" '%s' " + WSREP_SST_OPT_AUTH" '%s' " + WSREP_SST_OPT_SOCKET" '%s' " + WSREP_SST_OPT_DATA" '%s' " + " %s " + " %s '%s' " + WSREP_SST_OPT_GTID" '%s:%lld'" + "%s", + method, addr, sst_auth_real, mysqld_unix_port, + mysql_real_data_home, wsrep_defaults_file, + binlog_opt, binlog_opt_val, + uuid, (long long) seqno, + bypass ? " "WSREP_SST_OPT_BYPASS : ""); + my_free(binlog_opt_val); + + if (ret < 0 || ret >= cmd_len) + { + WSREP_ERROR("sst_donate_other(): snprintf() failed: %d", ret); + return (ret < 0 ? ret : -EMSGSIZE); + } + + if (!bypass && wsrep_sst_donor_rejects_queries) sst_reject_queries(FALSE); + + pthread_t tmp; + sst_thread_arg arg(cmd_str); + mysql_mutex_lock (&arg.lock); + ret = pthread_create (&tmp, NULL, sst_donor_thread, &arg); + if (ret) + { + WSREP_ERROR("sst_donate_other(): pthread_create() failed: %d (%s)", + ret, strerror(ret)); + return ret; + } + mysql_cond_wait (&arg.cond, &arg.lock); + + WSREP_INFO("sst_donor_thread signaled with %d", arg.err); + return arg.err; +} + +wsrep_cb_status_t wsrep_sst_donate_cb (void* app_ctx, void* recv_ctx, + const void* msg, size_t msg_len, + const wsrep_gtid_t* current_gtid, + const char* state, size_t state_len, + bool bypass) +{ + /* This will be reset when sync callback is called. + * Should we set wsrep_ready to FALSE here too? */ +// wsrep_notify_status(WSREP_MEMBER_DONOR); + local_status.set(WSREP_MEMBER_DONOR); + + const char* method = (char*)msg; + size_t method_len = strlen (method); + const char* data = method + method_len + 1; + + char uuid_str[37]; + wsrep_uuid_print (¤t_gtid->uuid, uuid_str, sizeof(uuid_str)); + + int ret; + if (!strcmp (WSREP_SST_MYSQLDUMP, method)) + { + ret = sst_donate_mysqldump(data, ¤t_gtid->uuid, uuid_str, + current_gtid->seqno, bypass); + } + else + { + ret = sst_donate_other(method, data, uuid_str, current_gtid->seqno,bypass); + } + + return (ret > 0 ? WSREP_CB_SUCCESS : WSREP_CB_FAILURE); +} + +void wsrep_SE_init_grab() +{ + if (mysql_mutex_lock (&LOCK_wsrep_sst_init)) abort(); +} + +void wsrep_SE_init_wait() +{ + while (SE_initialized == false) + { + mysql_cond_wait (&COND_wsrep_sst_init, &LOCK_wsrep_sst_init); + } + mysql_mutex_unlock (&LOCK_wsrep_sst_init); +} + +void wsrep_SE_init_done() +{ + mysql_cond_signal (&COND_wsrep_sst_init); + mysql_mutex_unlock (&LOCK_wsrep_sst_init); +} + +void wsrep_SE_initialized() +{ + SE_initialized = true; +} diff --git a/sql/wsrep_sst.h b/sql/wsrep_sst.h new file mode 100644 index 00000000000..2a6ab406297 --- /dev/null +++ b/sql/wsrep_sst.h @@ -0,0 +1,80 @@ +/* Copyright (C) 2013 Codership Oy <info@codership.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ + +#include <my_config.h> + +#ifndef WSREP_SST_H +#define WSREP_SST_H + +#ifdef WITH_WSREP + +#include <mysql.h> // my_bool + +#define WSREP_SST_OPT_ROLE "--role" +#define WSREP_SST_OPT_ADDR "--address" +#define WSREP_SST_OPT_AUTH "--auth" +#define WSREP_SST_OPT_DATA "--datadir" +#define WSREP_SST_OPT_CONF "--defaults-file" +#define WSREP_SST_OPT_EXTRA_CONF "--defaults-extra-file" +#define WSREP_SST_OPT_PARENT "--parent" +#define WSREP_SST_OPT_BINLOG "--binlog" + +// mysqldump-specific options +#define WSREP_SST_OPT_USER "--user" +#define WSREP_SST_OPT_PSWD "--password" +#define WSREP_SST_OPT_HOST "--host" +#define WSREP_SST_OPT_PORT "--port" +#define WSREP_SST_OPT_LPORT "--local-port" + +// donor-specific +#define WSREP_SST_OPT_SOCKET "--socket" +#define WSREP_SST_OPT_GTID "--gtid" +#define WSREP_SST_OPT_BYPASS "--bypass" + +#define WSREP_SST_MYSQLDUMP "mysqldump" +#define WSREP_SST_RSYNC "rsync" +#define WSREP_SST_SKIP "skip" +#define WSREP_SST_DEFAULT WSREP_SST_RSYNC +#define WSREP_SST_ADDRESS_AUTO "AUTO" +#define WSREP_SST_AUTH_MASK "********" + +/* system variables */ +extern const char* wsrep_sst_method; +extern const char* wsrep_sst_receive_address; +extern const char* wsrep_sst_donor; +extern char* wsrep_sst_auth; +extern my_bool wsrep_sst_donor_rejects_queries; + +/*! Synchronizes applier thread start with init thread */ +extern void wsrep_sst_grab(); +/*! Init thread waits for SST completion */ +extern bool wsrep_sst_wait(); +/*! Signals wsrep that initialization is complete, writesets can be applied */ +extern void wsrep_sst_continue(); +extern void wsrep_sst_auth_free(); + +extern void wsrep_SE_init_grab(); /*! grab init critical section */ +extern void wsrep_SE_init_wait(); /*! wait for SE init to complete */ +extern void wsrep_SE_init_done(); /*! signal that SE init is complte */ +extern void wsrep_SE_initialized(); /*! mark SE initialization complete */ + +#else +#define wsrep_SE_initialized() do { } while(0) +#define wsrep_SE_init_grab() do { } while(0) +#define wsrep_SE_init_done() do { } while(0) +#define wsrep_sst_continue() do { } while(0) + +#endif /* WITH_WSREP */ +#endif /* WSREP_SST_H */ diff --git a/sql/wsrep_thd.cc b/sql/wsrep_thd.cc new file mode 100644 index 00000000000..b65eead817d --- /dev/null +++ b/sql/wsrep_thd.cc @@ -0,0 +1,600 @@ +/* Copyright (C) 2013 Codership Oy <info@codership.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ + +#include "wsrep_thd.h" + +#include "transaction.h" +#include "rpl_rli.h" +#include "log_event.h" +#include "sql_parse.h" +//#include "global_threads.h" // LOCK_thread_count, etc. +#include "sql_base.h" // close_thread_tables() +#include "mysqld.h" // start_wsrep_THD(); + +#include "slave.h" // opt_log_slave_updates +#include "rpl_filter.h" +#include "rpl_rli.h" +#include "rpl_mi.h" + +#if (__LP64__) +static volatile int64 wsrep_bf_aborts_counter(0); +#define WSREP_ATOMIC_LOAD_LONG my_atomic_load64 +#define WSREP_ATOMIC_ADD_LONG my_atomic_add64 +#else +static volatile int32 wsrep_bf_aborts_counter(0); +#define WSREP_ATOMIC_LOAD_LONG my_atomic_load32 +#define WSREP_ATOMIC_ADD_LONG my_atomic_add32 +#endif + +int wsrep_show_bf_aborts (THD *thd, SHOW_VAR *var, char *buff) +{ + wsrep_local_bf_aborts = WSREP_ATOMIC_LOAD_LONG(&wsrep_bf_aborts_counter); + var->type = SHOW_LONGLONG; + var->value = (char*)&wsrep_local_bf_aborts; + return 0; +} + +/* must have (&thd->LOCK_wsrep_thd) */ +void wsrep_client_rollback(THD *thd) +{ + WSREP_DEBUG("client rollback due to BF abort for (%ld), query: %s", + thd->thread_id, thd->query()); + + WSREP_ATOMIC_ADD_LONG(&wsrep_bf_aborts_counter, 1); + + thd->wsrep_conflict_state= ABORTING; + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + trans_rollback(thd); + + if (thd->locked_tables_mode && thd->lock) + { + WSREP_DEBUG("unlocking tables for BF abort (%ld)", thd->thread_id); + thd->locked_tables_list.unlock_locked_tables(thd); + thd->variables.option_bits&= ~(OPTION_TABLE_LOCK); + } + + if (thd->global_read_lock.is_acquired()) + { + WSREP_DEBUG("unlocking GRL for BF abort (%ld)", thd->thread_id); + thd->global_read_lock.unlock_global_read_lock(thd); + } + + /* Release transactional metadata locks. */ + thd->mdl_context.release_transactional_locks(); + + /* release explicit MDL locks */ + thd->mdl_context.release_explicit_locks(); + + if (thd->get_binlog_table_maps()) + { + WSREP_DEBUG("clearing binlog table map for BF abort (%ld)", thd->thread_id); + thd->clear_binlog_table_maps(); + } + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + thd->wsrep_conflict_state= ABORTED; +} + +#define NUMBER_OF_FIELDS_TO_IDENTIFY_COORDINATOR 1 +#define NUMBER_OF_FIELDS_TO_IDENTIFY_WORKER 2 +//#include "rpl_info_factory.h" + +#ifdef NOT_USED +static Relay_log_info* wsrep_relay_log_init(const char* log_fname) +{ + + /* MySQL 5.6 version has rli factory: */ +#ifdef MYSQL_56 + uint rli_option = INFO_REPOSITORY_DUMMY; + Relay_log_info *rli= NULL; + rli = Rpl_info_factory::create_rli(rli_option, false); + rli->set_rli_description_event( + new Format_description_log_event(BINLOG_VERSION)); +#endif + Relay_log_info* rli= new Relay_log_info(false); + rli->sql_driver_thd= current_thd; + + rli->no_storage= true; + rli->relay_log.description_event_for_exec= + new Format_description_log_event(4); + + return rli; +} +#endif + +class Master_info; + +static rpl_group_info* wsrep_relay_group_init(const char* log_fname) +{ + Relay_log_info* rli= new Relay_log_info(false); + + rli->no_storage= true; + if (!rli->relay_log.description_event_for_exec) + { + rli->relay_log.description_event_for_exec= + new Format_description_log_event(4); + } + static LEX_STRING dbname= { C_STRING_WITH_LEN("mysql") }; + + rli->mi = new Master_info( &dbname, false); + //rli->mi = new Master_info( &(C_STRING_WITH_LEN("wsrep")), false); + + rli->mi->rpl_filter = new Rpl_filter; + copy_filter_setting(rli->mi->rpl_filter, get_or_create_rpl_filter("", 0)); + + rli->sql_driver_thd= current_thd; + + struct rpl_group_info *rgi= new rpl_group_info(rli); + rgi->thd= current_thd; + + return rgi; +} + +static void wsrep_prepare_bf_thd(THD *thd, struct wsrep_thd_shadow* shadow) +{ + shadow->options = thd->variables.option_bits; + shadow->server_status = thd->server_status; + shadow->wsrep_exec_mode = thd->wsrep_exec_mode; + shadow->vio = thd->net.vio; + + if (opt_log_slave_updates) + thd->variables.option_bits|= OPTION_BIN_LOG; + else + thd->variables.option_bits&= ~(OPTION_BIN_LOG); + + //if (!thd->wsrep_rli) thd->wsrep_rli= wsrep_relay_log_init("wsrep_relay"); + if (!thd->wsrep_rgi) thd->wsrep_rgi= wsrep_relay_group_init("wsrep_relay"); + // thd->wsrep_rli->info_thd = thd; + + thd->wsrep_exec_mode= REPL_RECV; + thd->net.vio= 0; + thd->clear_error(); + + shadow->tx_isolation = thd->variables.tx_isolation; + thd->variables.tx_isolation = ISO_READ_COMMITTED; + thd->tx_isolation = ISO_READ_COMMITTED; + + shadow->db = thd->db; + shadow->db_length = thd->db_length; + thd->reset_db(NULL, 0); +} + +static void wsrep_return_from_bf_mode(THD *thd, struct wsrep_thd_shadow* shadow) +{ + thd->variables.option_bits = shadow->options; + thd->server_status = shadow->server_status; + thd->wsrep_exec_mode = shadow->wsrep_exec_mode; + thd->net.vio = shadow->vio; + thd->variables.tx_isolation = shadow->tx_isolation; + thd->reset_db(shadow->db, shadow->db_length); + + delete thd->wsrep_rgi->rli->mi->rpl_filter; + delete thd->wsrep_rgi->rli->mi; + delete thd->wsrep_rgi->rli; + delete thd->wsrep_rgi; + thd->wsrep_rgi = NULL; +; +} + +void wsrep_replay_transaction(THD *thd) +{ + /* checking if BF trx must be replayed */ + if (thd->wsrep_conflict_state== MUST_REPLAY) { + DBUG_ASSERT(wsrep_thd_trx_seqno(thd)); + if (thd->wsrep_exec_mode!= REPL_RECV) { + if (thd->get_stmt_da()->is_sent()) + { + WSREP_ERROR("replay issue, thd has reported status already"); + } + thd->get_stmt_da()->reset_diagnostics_area(); + + thd->wsrep_conflict_state= REPLAYING; + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + + mysql_reset_thd_for_next_command(thd); + thd->killed= NOT_KILLED; + close_thread_tables(thd); + if (thd->locked_tables_mode && thd->lock) + { + WSREP_DEBUG("releasing table lock for replaying (%ld)", + thd->thread_id); + thd->locked_tables_list.unlock_locked_tables(thd); + thd->variables.option_bits&= ~(OPTION_TABLE_LOCK); + } + thd->mdl_context.release_transactional_locks(); + /* + Replaying will call MYSQL_START_STATEMENT when handling + BEGIN Query_log_event so end statement must be called before + replaying. + */ + MYSQL_END_STATEMENT(thd->m_statement_psi, thd->get_stmt_da()); + thd->m_statement_psi= NULL; + thd_proc_info(thd, "wsrep replaying trx"); + WSREP_DEBUG("replay trx: %s %lld", + thd->query() ? thd->query() : "void", + (long long)wsrep_thd_trx_seqno(thd)); + struct wsrep_thd_shadow shadow; + wsrep_prepare_bf_thd(thd, &shadow); + + /* From trans_begin() */ + thd->variables.option_bits|= OPTION_BEGIN; + thd->server_status|= SERVER_STATUS_IN_TRANS; + + int rcode = wsrep->replay_trx(wsrep, + &thd->wsrep_ws_handle, + (void *)thd); + + wsrep_return_from_bf_mode(thd, &shadow); + if (thd->wsrep_conflict_state!= REPLAYING) + WSREP_WARN("lost replaying mode: %d", thd->wsrep_conflict_state ); + + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + + switch (rcode) + { + case WSREP_OK: + thd->wsrep_conflict_state= NO_CONFLICT; + wsrep->post_commit(wsrep, &thd->wsrep_ws_handle); + WSREP_DEBUG("trx_replay successful for: %ld %llu", + thd->thread_id, (long long)thd->real_id); + if (thd->get_stmt_da()->is_sent()) + { + WSREP_WARN("replay ok, thd has reported status"); + } + else if (thd->get_stmt_da()->is_set()) + { + if (thd->get_stmt_da()->status() != Diagnostics_area::DA_OK) + { + WSREP_WARN("replay ok, thd has error status %d", + thd->get_stmt_da()->status()); + } + } + else + { + my_ok(thd); + } + break; + case WSREP_TRX_FAIL: + if (thd->get_stmt_da()->is_sent()) + { + WSREP_ERROR("replay failed, thd has reported status"); + } + else + { + WSREP_DEBUG("replay failed, rolling back"); + //my_error(ER_LOCK_DEADLOCK, MYF(0), "wsrep aborted transaction"); + } + thd->wsrep_conflict_state= ABORTED; + wsrep->post_rollback(wsrep, &thd->wsrep_ws_handle); + break; + default: + WSREP_ERROR("trx_replay failed for: %d, query: %s", + rcode, thd->query() ? thd->query() : "void"); + /* we're now in inconsistent state, must abort */ + unireg_abort(1); + break; + } + + wsrep_cleanup_transaction(thd); + + mysql_mutex_lock(&LOCK_wsrep_replaying); + wsrep_replaying--; + WSREP_DEBUG("replaying decreased: %d, thd: %lu", + wsrep_replaying, thd->thread_id); + mysql_cond_broadcast(&COND_wsrep_replaying); + mysql_mutex_unlock(&LOCK_wsrep_replaying); + } + } +} + +static void wsrep_replication_process(THD *thd) +{ + int rcode; + DBUG_ENTER("wsrep_replication_process"); + + struct wsrep_thd_shadow shadow; + wsrep_prepare_bf_thd(thd, &shadow); + + /* From trans_begin() */ + thd->variables.option_bits|= OPTION_BEGIN; + thd->server_status|= SERVER_STATUS_IN_TRANS; + + rcode = wsrep->recv(wsrep, (void *)thd); + DBUG_PRINT("wsrep",("wsrep_repl returned: %d", rcode)); + + WSREP_INFO("applier thread exiting (code:%d)", rcode); + + switch (rcode) { + case WSREP_OK: + case WSREP_NOT_IMPLEMENTED: + case WSREP_CONN_FAIL: + /* provider does not support slave operations / disconnected from group, + * just close applier thread */ + break; + case WSREP_NODE_FAIL: + /* data inconsistency => SST is needed */ + /* Note: we cannot just blindly restart replication here, + * SST might require server restart if storage engines must be + * initialized after SST */ + WSREP_ERROR("node consistency compromised, aborting"); + wsrep_kill_mysql(thd); + break; + case WSREP_WARNING: + case WSREP_TRX_FAIL: + case WSREP_TRX_MISSING: + /* these suggests a bug in provider code */ + WSREP_WARN("bad return from recv() call: %d", rcode); + /* fall through to node shutdown */ + case WSREP_FATAL: + /* Cluster connectivity is lost. + * + * If applier was killed on purpose (KILL_CONNECTION), we + * avoid mysql shutdown. This is because the killer will then handle + * shutdown processing (or replication restarting) + */ + if (thd->killed != KILL_CONNECTION) + { + wsrep_kill_mysql(thd); + } + break; + } + + mysql_mutex_lock(&LOCK_thread_count); + wsrep_close_applier(thd); + mysql_cond_broadcast(&COND_thread_count); + mysql_mutex_unlock(&LOCK_thread_count); + + TABLE *tmp; + while ((tmp = thd->temporary_tables)) + { + WSREP_WARN("Applier %lu, has temporary tables at exit: %s.%s", + thd->thread_id, + (tmp->s) ? tmp->s->db.str : "void", + (tmp->s) ? tmp->s->table_name.str : "void"); + } + wsrep_return_from_bf_mode(thd, &shadow); + DBUG_VOID_RETURN; +} + +void wsrep_create_appliers(long threads) +{ + if (!wsrep_connected) + { + /* see wsrep_replication_start() for the logic */ + if (wsrep_cluster_address && strlen(wsrep_cluster_address) && + wsrep_provider && strcasecmp(wsrep_provider, "none")) + { + WSREP_ERROR("Trying to launch slave threads before creating " + "connection at '%s'", wsrep_cluster_address); + assert(0); + } + return; + } + + long wsrep_threads=0; + pthread_t hThread; + while (wsrep_threads++ < threads) { + if (pthread_create( + &hThread, &connection_attrib, + start_wsrep_THD, (void*)wsrep_replication_process)) + WSREP_WARN("Can't create thread to manage wsrep replication"); + } +} + +static void wsrep_rollback_process(THD *thd) +{ + DBUG_ENTER("wsrep_rollback_process"); + + mysql_mutex_lock(&LOCK_wsrep_rollback); + wsrep_aborting_thd= NULL; + + while (thd->killed == NOT_KILLED) { + thd_proc_info(thd, "wsrep aborter idle"); + thd->mysys_var->current_mutex= &LOCK_wsrep_rollback; + thd->mysys_var->current_cond= &COND_wsrep_rollback; + + mysql_cond_wait(&COND_wsrep_rollback,&LOCK_wsrep_rollback); + + WSREP_DEBUG("WSREP rollback thread wakes for signal"); + + mysql_mutex_lock(&thd->mysys_var->mutex); + thd_proc_info(thd, "wsrep aborter active"); + thd->mysys_var->current_mutex= 0; + thd->mysys_var->current_cond= 0; + mysql_mutex_unlock(&thd->mysys_var->mutex); + + /* check for false alarms */ + if (!wsrep_aborting_thd) + { + WSREP_DEBUG("WSREP rollback thread has empty abort queue"); + } + /* process all entries in the queue */ + while (wsrep_aborting_thd) { + THD *aborting; + wsrep_aborting_thd_t next = wsrep_aborting_thd->next; + aborting = wsrep_aborting_thd->aborting_thd; + my_free(wsrep_aborting_thd); + wsrep_aborting_thd= next; + /* + * must release mutex, appliers my want to add more + * aborting thds in our work queue, while we rollback + */ + mysql_mutex_unlock(&LOCK_wsrep_rollback); + + mysql_mutex_lock(&aborting->LOCK_wsrep_thd); + if (aborting->wsrep_conflict_state== ABORTED) + { + WSREP_DEBUG("WSREP, thd already aborted: %llu state: %d", + (long long)aborting->real_id, + aborting->wsrep_conflict_state); + + mysql_mutex_unlock(&aborting->LOCK_wsrep_thd); + mysql_mutex_lock(&LOCK_wsrep_rollback); + continue; + } + aborting->wsrep_conflict_state= ABORTING; + + mysql_mutex_unlock(&aborting->LOCK_wsrep_thd); + + set_current_thd(aborting); + aborting->store_globals(); + + mysql_mutex_lock(&aborting->LOCK_wsrep_thd); + wsrep_client_rollback(aborting); + WSREP_DEBUG("WSREP rollbacker aborted thd: (%lu %llu)", + aborting->thread_id, (long long)aborting->real_id); + mysql_mutex_unlock(&aborting->LOCK_wsrep_thd); + + set_current_thd(thd); + thd->store_globals(); + + mysql_mutex_lock(&LOCK_wsrep_rollback); + } + } + + mysql_mutex_unlock(&LOCK_wsrep_rollback); + sql_print_information("WSREP: rollbacker thread exiting"); + + DBUG_PRINT("wsrep",("wsrep rollbacker thread exiting")); + DBUG_VOID_RETURN; +} + +void wsrep_create_rollbacker() +{ + if (wsrep_provider && strcasecmp(wsrep_provider, "none")) + { + pthread_t hThread; + /* create rollbacker */ + if (pthread_create( &hThread, &connection_attrib, + start_wsrep_THD, (void*)wsrep_rollback_process)) + WSREP_WARN("Can't create thread to manage wsrep rollback"); + } +} + +void wsrep_thd_set_PA_safe(void *thd_ptr, my_bool safe) +{ + if (thd_ptr) + { + THD* thd = (THD*)thd_ptr; + thd->wsrep_PA_safe = safe; + } +} + +enum wsrep_conflict_state wsrep_thd_conflict_state(THD *thd, my_bool sync) +{ + enum wsrep_conflict_state state = NO_CONFLICT; + if (thd) + { + if (sync) mysql_mutex_lock(&thd->LOCK_wsrep_thd); + + state = thd->wsrep_conflict_state; + if (sync) mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + } + return state; +} + +my_bool wsrep_thd_is_wsrep(THD *thd) +{ + my_bool status = FALSE; + if (thd) + { + status = (WSREP(thd) && WSREP_PROVIDER_EXISTS); + } + return status; +} + +my_bool wsrep_thd_is_BF(THD *thd, my_bool sync) +{ + my_bool status = FALSE; + if (thd) + { + // THD can be BF only if provider exists + if (wsrep_thd_is_wsrep(thd)) + { + if (sync) + mysql_mutex_lock(&thd->LOCK_wsrep_thd); + + status = ((thd->wsrep_exec_mode == REPL_RECV) || + (thd->wsrep_exec_mode == TOTAL_ORDER)); + if (sync) + mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + } + } + return status; +} + +extern "C" +my_bool wsrep_thd_is_BF_or_commit(void *thd_ptr, my_bool sync) +{ + bool status = FALSE; + if (thd_ptr) + { + THD* thd = (THD*)thd_ptr; + if (sync) mysql_mutex_lock(&thd->LOCK_wsrep_thd); + + status = ((thd->wsrep_exec_mode == REPL_RECV) || + (thd->wsrep_exec_mode == TOTAL_ORDER) || + (thd->wsrep_exec_mode == LOCAL_COMMIT)); + if (sync) mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + } + return status; +} + +extern "C" +my_bool wsrep_thd_is_local(void *thd_ptr, my_bool sync) +{ + bool status = FALSE; + if (thd_ptr) + { + THD* thd = (THD*)thd_ptr; + if (sync) mysql_mutex_lock(&thd->LOCK_wsrep_thd); + + status = (thd->wsrep_exec_mode == LOCAL_STATE); + if (sync) mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + } + return status; +} + +int wsrep_abort_thd(void *bf_thd_ptr, void *victim_thd_ptr, my_bool signal) +{ + THD *victim_thd = (THD *) victim_thd_ptr; + THD *bf_thd = (THD *) bf_thd_ptr; + DBUG_ENTER("wsrep_abort_thd"); + + if ( (WSREP(bf_thd) || + ( (WSREP_ON || wsrep_OSU_method_options == WSREP_OSU_RSU) && + bf_thd->wsrep_exec_mode == TOTAL_ORDER) ) && + victim_thd) + { + WSREP_DEBUG("wsrep_abort_thd, by: %llu, victim: %llu", (bf_thd) ? + (long long)bf_thd->real_id : 0, (long long)victim_thd->real_id); + ha_abort_transaction(bf_thd, victim_thd, signal); + } + else + { + WSREP_DEBUG("wsrep_abort_thd not effective: %p %p", bf_thd, victim_thd); + } + + DBUG_RETURN(1); +} + +extern "C" +int wsrep_thd_in_locking_session(void *thd_ptr) +{ + if (thd_ptr && ((THD *)thd_ptr)->in_lock_tables) { + return 1; + } + return 0; +} + diff --git a/sql/wsrep_thd.h b/sql/wsrep_thd.h new file mode 100644 index 00000000000..0d01d28f01b --- /dev/null +++ b/sql/wsrep_thd.h @@ -0,0 +1,53 @@ +/* Copyright (C) 2013 Codership Oy <info@codership.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ + +#include <my_config.h> + +#ifndef WSREP_THD_H +#define WSREP_THD_H + +#ifdef WITH_WSREP + +#include "sql_class.h" + +int wsrep_show_bf_aborts (THD *thd, SHOW_VAR *var, char *buff); +void wsrep_client_rollback(THD *thd); +void wsrep_replay_transaction(THD *thd); +void wsrep_create_appliers(long threads); +void wsrep_create_rollbacker(); + +int wsrep_abort_thd(void *bf_thd_ptr, void *victim_thd_ptr, + my_bool signal); + +/* + PA = Parallel Applying (on the slave side) +*/ +extern void wsrep_thd_set_PA_safe(void *thd_ptr, my_bool safe); +extern my_bool wsrep_thd_is_BF(THD *thd, my_bool sync); +extern my_bool wsrep_thd_is_wsrep(void *thd_ptr); + +enum wsrep_conflict_state wsrep_thd_conflict_state(void *thd_ptr, my_bool sync); +extern "C" my_bool wsrep_thd_is_BF_or_commit(void *thd_ptr, my_bool sync); +extern "C" my_bool wsrep_thd_is_local(void *thd_ptr, my_bool sync); +extern "C" int wsrep_thd_in_locking_session(void *thd_ptr); + +#else /* WITH_WSREP */ + +#define wsrep_thd_is_BF(T, S) (0) +#define wsrep_abort_thd(X,Y,Z) do { } while(0) +#define wsrep_create_appliers(T) do { } while(0) + +#endif +#endif /* WSREP_THD_H */ diff --git a/sql/wsrep_utils.cc b/sql/wsrep_utils.cc new file mode 100644 index 00000000000..c4a992c751a --- /dev/null +++ b/sql/wsrep_utils.cc @@ -0,0 +1,501 @@ +/* Copyright 2010 Codership Oy <http://www.codership.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +//! @file some utility functions and classes not directly related to replication + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE // POSIX_SPAWN_USEVFORK flag +#endif + +#include "wsrep_utils.h" +#include "wsrep_mysqld.h" + +#include <sql_class.h> + +#include <spawn.h> // posix_spawn() +#include <unistd.h> // pipe() +#include <errno.h> // errno +#include <string.h> // strerror() +#include <sys/wait.h> // waitpid() +#include <sys/types.h> +#include <sys/socket.h> +#include <netdb.h> // getaddrinfo() + +#ifdef HAVE_GETIFADDRS +#include <ifaddrs.h> +#endif + +extern char** environ; // environment variables + +static wsp::string wsrep_PATH; + +void +wsrep_prepend_PATH (const char* path) +{ + int count = 0; + + while (environ[count]) + { + if (strncmp (environ[count], "PATH=", 5)) + { + count++; + continue; + } + + char* const old_path (environ[count]); + + if (strstr (old_path, path)) return; // path already there + + size_t const new_path_len(strlen(old_path) + strlen(":") + + strlen(path) + 1); + + char* const new_path (reinterpret_cast<char*>(malloc(new_path_len))); + + if (new_path) + { + snprintf (new_path, new_path_len, "PATH=%s:%s", path, + old_path + strlen("PATH=")); + + wsrep_PATH.set (new_path); + environ[count] = new_path; + } + else + { + WSREP_ERROR ("Failed to allocate 'PATH' environment variable " + "buffer of size %zu.", new_path_len); + } + + return; + } + + WSREP_ERROR ("Failed to find 'PATH' environment variable. " + "State snapshot transfer may not be working."); +} + +namespace wsp +{ + +#define PIPE_READ 0 +#define PIPE_WRITE 1 +#define STDIN_FD 0 +#define STDOUT_FD 1 + +#ifndef POSIX_SPAWN_USEVFORK +# define POSIX_SPAWN_USEVFORK 0 +#endif + +process::process (const char* cmd, const char* type) + : str_(cmd ? strdup(cmd) : strdup("")), io_(NULL), err_(EINVAL), pid_(0) +{ + if (0 == str_) + { + WSREP_ERROR ("Can't allocate command line of size: %zu", strlen(cmd)); + err_ = ENOMEM; + return; + } + + if (0 == strlen(str_)) + { + WSREP_ERROR ("Can't start a process: null or empty command line."); + return; + } + + if (NULL == type || (strcmp (type, "w") && strcmp(type, "r"))) + { + WSREP_ERROR ("type argument should be either \"r\" or \"w\"."); + return; + } + + int pipe_fds[2] = { -1, }; + if (::pipe(pipe_fds)) + { + err_ = errno; + WSREP_ERROR ("pipe() failed: %d (%s)", err_, strerror(err_)); + return; + } + + // which end of pipe will be returned to parent + int const parent_end (strcmp(type,"w") ? PIPE_READ : PIPE_WRITE); + int const child_end (parent_end == PIPE_READ ? PIPE_WRITE : PIPE_READ); + int const close_fd (parent_end == PIPE_READ ? STDOUT_FD : STDIN_FD); + + char* const pargv[4] = { strdup("sh"), strdup("-c"), strdup(str_), NULL }; + if (!(pargv[0] && pargv[1] && pargv[2])) + { + err_ = ENOMEM; + WSREP_ERROR ("Failed to allocate pargv[] array."); + goto cleanup_pipe; + } + + posix_spawnattr_t attr; + err_ = posix_spawnattr_init (&attr); + if (err_) + { + WSREP_ERROR ("posix_spawnattr_init() failed: %d (%s)", + err_, strerror(err_)); + goto cleanup_pipe; + } + + err_ = posix_spawnattr_setflags (&attr, POSIX_SPAWN_SETSIGDEF | + POSIX_SPAWN_USEVFORK); + if (err_) + { + WSREP_ERROR ("posix_spawnattr_setflags() failed: %d (%s)", + err_, strerror(err_)); + goto cleanup_attr; + } + + posix_spawn_file_actions_t fact; + err_ = posix_spawn_file_actions_init (&fact); + if (err_) + { + WSREP_ERROR ("posix_spawn_file_actions_init() failed: %d (%s)", + err_, strerror(err_)); + goto cleanup_attr; + } + + // close child's stdout|stdin depending on what we returning + err_ = posix_spawn_file_actions_addclose (&fact, close_fd); + if (err_) + { + WSREP_ERROR ("posix_spawn_file_actions_addclose() failed: %d (%s)", + err_, strerror(err_)); + goto cleanup_fact; + } + + // substitute our pipe descriptor in place of the closed one + err_ = posix_spawn_file_actions_adddup2 (&fact, + pipe_fds[child_end], close_fd); + if (err_) + { + WSREP_ERROR ("posix_spawn_file_actions_addup2() failed: %d (%s)", + err_, strerror(err_)); + goto cleanup_fact; + } + + err_ = posix_spawnp (&pid_, pargv[0], &fact, &attr, pargv, environ); + if (err_) + { + WSREP_ERROR ("posix_spawnp(%s) failed: %d (%s)", + pargv[2], err_, strerror(err_)); + pid_ = 0; // just to make sure it was not messed up in the call + goto cleanup_fact; + } + + io_ = fdopen (pipe_fds[parent_end], type); + + if (io_) + { + pipe_fds[parent_end] = -1; // skip close on cleanup + } + else + { + err_ = errno; + WSREP_ERROR ("fdopen() failed: %d (%s)", err_, strerror(err_)); + } + +cleanup_fact: + int err; // to preserve err_ code + err = posix_spawn_file_actions_destroy (&fact); + if (err) + { + WSREP_ERROR ("posix_spawn_file_actions_destroy() failed: %d (%s)\n", + err, strerror(err)); + } + +cleanup_attr: + err = posix_spawnattr_destroy (&attr); + if (err) + { + WSREP_ERROR ("posix_spawnattr_destroy() failed: %d (%s)", + err, strerror(err)); + } + +cleanup_pipe: + if (pipe_fds[0] >= 0) close (pipe_fds[0]); + if (pipe_fds[1] >= 0) close (pipe_fds[1]); + + free (pargv[0]); + free (pargv[1]); + free (pargv[2]); +} + +process::~process () +{ + if (io_) + { + assert (pid_); + assert (str_); + + WSREP_WARN("Closing pipe to child process: %s, PID(%ld) " + "which might still be running.", str_, (long)pid_); + + if (fclose (io_) == -1) + { + err_ = errno; + WSREP_ERROR("fclose() failed: %d (%s)", err_, strerror(err_)); + } + } + + if (str_) free (const_cast<char*>(str_)); +} + +int +process::wait () +{ + if (pid_) + { + int status; + if (-1 == waitpid(pid_, &status, 0)) + { + err_ = errno; assert (err_); + WSREP_ERROR("Waiting for process failed: %s, PID(%ld): %d (%s)", + str_, (long)pid_, err_, strerror (err_)); + } + else + { // command completed, check exit status + if (WIFEXITED (status)) { + err_ = WEXITSTATUS (status); + } + else { // command didn't complete with exit() + WSREP_ERROR("Process was aborted."); + err_ = errno ? errno : ECHILD; + } + + if (err_) { + switch (err_) /* Translate error codes to more meaningful */ + { + case 126: err_ = EACCES; break; /* Permission denied */ + case 127: err_ = ENOENT; break; /* No such file or directory */ + } + WSREP_ERROR("Process completed with error: %s: %d (%s)", + str_, err_, strerror(err_)); + } + + pid_ = 0; + if (io_) fclose (io_); + io_ = NULL; + } + } + else { + assert (NULL == io_); + WSREP_ERROR("Command did not run: %s", str_); + } + + return err_; +} + +thd::thd (my_bool won) : init(), ptr(new THD) +{ + if (ptr) + { + ptr->thread_stack= (char*) &ptr; + ptr->store_globals(); + ptr->variables.option_bits&= ~OPTION_BIN_LOG; // disable binlog + ptr->variables.wsrep_on = won; + ptr->security_ctx->master_access= ~(ulong)0; + lex_start(ptr); + } +} + +thd::~thd () +{ + if (ptr) + { + delete ptr; + my_pthread_setspecific_ptr (THR_THD, 0); + } +} + +} // namespace wsp + +/* Returns INADDR_NONE, INADDR_ANY, INADDR_LOOPBACK or something else */ +unsigned int wsrep_check_ip (const char* const addr) +{ +#if 0 + if (addr && 0 == strcasecmp(addr, MY_BIND_ALL_ADDRESSES)) return INADDR_ANY; +#endif + + unsigned int ret = INADDR_NONE; + struct addrinfo *res, hints; + + memset (&hints, 0, sizeof(hints)); + hints.ai_flags= AI_PASSIVE/*|AI_ADDRCONFIG*/; + hints.ai_socktype= SOCK_STREAM; + hints.ai_family= AF_UNSPEC; + + int gai_ret = getaddrinfo(addr, NULL, &hints, &res); + if (0 == gai_ret) + { + if (AF_INET == res->ai_family) /* IPv4 */ + { + struct sockaddr_in* a= (struct sockaddr_in*)res->ai_addr; + ret= htonl(a->sin_addr.s_addr); + } + else /* IPv6 */ + { + struct sockaddr_in6* a= (struct sockaddr_in6*)res->ai_addr; + if (IN6_IS_ADDR_UNSPECIFIED(&a->sin6_addr)) + ret= INADDR_ANY; + else if (IN6_IS_ADDR_LOOPBACK(&a->sin6_addr)) + ret= INADDR_LOOPBACK; + else + ret= 0xdeadbeef; + } + freeaddrinfo (res); + } + else { + WSREP_ERROR ("getaddrinfo() failed on '%s': %d (%s)", + addr, gai_ret, gai_strerror(gai_ret)); + } + + // uint8_t* b= (uint8_t*)&ret; + // fprintf (stderr, "########## wsrep_check_ip returning: %hhu.%hhu.%hhu.%hhu\n", + // b[0], b[1], b[2], b[3]); + + return ret; +} + +extern char* my_bind_addr_str; + +size_t wsrep_guess_ip (char* buf, size_t buf_len) +{ + size_t ip_len = 0; + + if (my_bind_addr_str && my_bind_addr_str[0] != '\0') + { + unsigned int const ip_type= wsrep_check_ip(my_bind_addr_str); + + if (INADDR_NONE == ip_type) { + WSREP_ERROR("Networking not configured, cannot receive state transfer."); + return 0; + } + + if (INADDR_ANY != ip_type) {; + strncpy (buf, my_bind_addr_str, buf_len); + return strlen(buf); + } + } + + // mysqld binds to all interfaces - try IP from wsrep_node_address + if (wsrep_node_address && wsrep_node_address[0] != '\0') { + const char* const colon_ptr = strchr(wsrep_node_address, ':'); + + if (colon_ptr) + ip_len = colon_ptr - wsrep_node_address; + else + ip_len = strlen(wsrep_node_address); + + if (ip_len >= buf_len) { + WSREP_WARN("default_ip(): buffer too short: %zu <= %zd", buf_len, ip_len); + return 0; + } + + memcpy (buf, wsrep_node_address, ip_len); + buf[ip_len] = '\0'; + return ip_len; + } + +#if HAVE_GETIFADDRS + struct ifaddrs *ifaddr, *ifa; + if (getifaddrs(&ifaddr) == 0) + { + for (ifa= ifaddr; ifa != NULL; ifa = ifa->ifa_next) + { + if (!ifa->ifa_addr || ifa->ifa_addr->sa_family != AF_INET) // TODO AF_INET6 + continue; + + if (vio_getnameinfo(ifa->ifa_addr, buf, buf_len, NULL, 0, NI_NUMERICHOST)) + continue; + + if (strcmp(buf, "127.0.0.1") == 0) // lame + continue; + + freeifaddrs(ifaddr); + return strlen(buf); + } + freeifaddrs(ifaddr); + } +#endif + + return 0; +} + +size_t wsrep_guess_address(char* buf, size_t buf_len) +{ + size_t addr_len = wsrep_guess_ip (buf, buf_len); + + if (addr_len && addr_len < buf_len) { + addr_len += snprintf (buf + addr_len, buf_len - addr_len, + ":%u", mysqld_port); + } + + return addr_len; +} + +/* + * WSREPXid + */ + +#define WSREP_XID_PREFIX "WSREPXid" +#define WSREP_XID_PREFIX_LEN MYSQL_XID_PREFIX_LEN +#define WSREP_XID_UUID_OFFSET 8 +#define WSREP_XID_SEQNO_OFFSET (WSREP_XID_UUID_OFFSET + sizeof(wsrep_uuid_t)) +#define WSREP_XID_GTRID_LEN (WSREP_XID_SEQNO_OFFSET + sizeof(wsrep_seqno_t)) + +void wsrep_xid_init(XID* xid, const wsrep_uuid_t* uuid, wsrep_seqno_t seqno) +{ + xid->formatID= 1; + xid->gtrid_length= WSREP_XID_GTRID_LEN; + xid->bqual_length= 0; + memset(xid->data, 0, sizeof(xid->data)); + memcpy(xid->data, WSREP_XID_PREFIX, WSREP_XID_PREFIX_LEN); + memcpy(xid->data + WSREP_XID_UUID_OFFSET, uuid, sizeof(wsrep_uuid_t)); + memcpy(xid->data + WSREP_XID_SEQNO_OFFSET, &seqno, sizeof(wsrep_seqno_t)); +} + +const wsrep_uuid_t* wsrep_xid_uuid(const XID* xid) +{ + if (wsrep_is_wsrep_xid(xid)) + return reinterpret_cast<const wsrep_uuid_t*>(xid->data + + WSREP_XID_UUID_OFFSET); + else + return &WSREP_UUID_UNDEFINED; +} + +wsrep_seqno_t wsrep_xid_seqno(const XID* xid) +{ + + if (wsrep_is_wsrep_xid(xid)) + { + wsrep_seqno_t seqno; + memcpy(&seqno, xid->data + WSREP_XID_SEQNO_OFFSET, sizeof(wsrep_seqno_t)); + return seqno; + } + else + { + return WSREP_SEQNO_UNDEFINED; + } +} + +extern +int wsrep_is_wsrep_xid(const XID* xid) +{ + return (xid->formatID == 1 && + xid->gtrid_length == WSREP_XID_GTRID_LEN && + xid->bqual_length == 0 && + !memcmp(xid->data, WSREP_XID_PREFIX, WSREP_XID_PREFIX_LEN)); +} diff --git a/sql/wsrep_utils.h b/sql/wsrep_utils.h new file mode 100644 index 00000000000..337678238f8 --- /dev/null +++ b/sql/wsrep_utils.h @@ -0,0 +1,208 @@ +/* Copyright (C) 2013 Codership Oy <info@codership.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ + +#ifndef WSREP_UTILS_H +#define WSREP_UTILS_H + +#include "wsrep_priv.h" + +unsigned int wsrep_check_ip (const char* addr); +size_t wsrep_guess_ip (char* buf, size_t buf_len); +size_t wsrep_guess_address(char* buf, size_t buf_len); + +namespace wsp { +class node_status +{ +public: + node_status() : status(WSREP_MEMBER_UNDEFINED) {} + void set(wsrep_member_status_t new_status, + const wsrep_view_info_t* view = 0) + { + if (status != new_status || 0 != view) + { + wsrep_notify_status(new_status, view); + status = new_status; + } + } + wsrep_member_status_t get() const { return status; } +private: + wsrep_member_status_t status; +}; +} /* namespace wsp */ + +extern wsp::node_status local_status; + +namespace wsp { +/* A small class to run external programs. */ +class process +{ +private: + const char* const str_; + FILE* io_; + int err_; + pid_t pid_; + +public: +/*! @arg type is a pointer to a null-terminated string which must contain + either the letter 'r' for reading or the letter 'w' for writing. + */ + process (const char* cmd, const char* type); + ~process (); + + FILE* pipe () { return io_; } + int error() { return err_; } + int wait (); + const char* cmd() { return str_; } +}; + +class thd +{ + class thd_init + { + public: + thd_init() { my_thread_init(); } + ~thd_init() { my_thread_end(); } + } + init; + + thd (const thd&); + thd& operator= (const thd&); + +public: + + thd(my_bool wsrep_on); + ~thd(); + THD* const ptr; +}; + +class string +{ +public: + string() : string_(0) {} + void set(char* str) { if (string_) free (string_); string_ = str; } + ~string() { set (0); } +private: + char* string_; +}; + +#ifdef REMOVED +class lock +{ + pthread_mutex_t* const mtx_; + +public: + + lock (pthread_mutex_t* mtx) : mtx_(mtx) + { + int err = pthread_mutex_lock (mtx_); + + if (err) + { + WSREP_ERROR("Mutex lock failed: %s", strerror(err)); + abort(); + } + } + + virtual ~lock () + { + int err = pthread_mutex_unlock (mtx_); + + if (err) + { + WSREP_ERROR("Mutex unlock failed: %s", strerror(err)); + abort(); + } + } + + inline void wait (pthread_cond_t* cond) + { + pthread_cond_wait (cond, mtx_); + } + +private: + + lock (const lock&); + lock& operator=(const lock&); + +}; + +class monitor +{ + int mutable refcnt; + pthread_mutex_t mutable mtx; + pthread_cond_t mutable cond; + +public: + + monitor() : refcnt(0) + { + pthread_mutex_init (&mtx, NULL); + pthread_cond_init (&cond, NULL); + } + + ~monitor() + { + pthread_mutex_destroy (&mtx); + pthread_cond_destroy (&cond); + } + + void enter() const + { + lock l(&mtx); + + while (refcnt) + { + l.wait(&cond); + } + refcnt++; + } + + void leave() const + { + lock l(&mtx); + + refcnt--; + if (refcnt == 0) + { + pthread_cond_signal (&cond); + } + } + +private: + + monitor (const monitor&); + monitor& operator= (const monitor&); +}; + +class critical +{ + const monitor& mon; + +public: + + critical(const monitor& m) : mon(m) { mon.enter(); } + + ~critical() { mon.leave(); } + +private: + + critical (const critical&); + critical& operator= (const critical&); +}; +#endif + +} // namespace wsrep + +#endif /* WSREP_UTILS_H */ diff --git a/sql/wsrep_var.cc b/sql/wsrep_var.cc new file mode 100644 index 00000000000..c6e9b89ca55 --- /dev/null +++ b/sql/wsrep_var.cc @@ -0,0 +1,571 @@ +/* Copyright 2008 Codership Oy <http://www.codership.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include "wsrep_var.h" + +#include <sql_plugin.h> +#include <mysqld.h> +#include <sql_class.h> +#include <set_var.h> +#include <sql_acl.h> +#include "wsrep_priv.h" +#include "wsrep_thd.h" +#include <my_dir.h> +#include <cstdio> +#include <cstdlib> + +const char* wsrep_provider = 0; +const char* wsrep_provider_options = 0; +const char* wsrep_cluster_address = 0; +const char* wsrep_cluster_name = 0; +const char* wsrep_node_name = 0; +const char* wsrep_node_address = 0; +const char* wsrep_node_incoming_address = 0; +const char* wsrep_start_position = 0; +ulong wsrep_OSU_method_options; + +int wsrep_init_vars() +{ + wsrep_provider = my_strdup(WSREP_NONE, MYF(MY_WME)); + wsrep_provider_options= my_strdup("", MYF(MY_WME)); + wsrep_cluster_address = my_strdup("", MYF(MY_WME)); + wsrep_cluster_name = my_strdup(WSREP_CLUSTER_NAME, MYF(MY_WME)); + wsrep_node_name = my_strdup("", MYF(MY_WME)); + wsrep_node_address = my_strdup("", MYF(MY_WME)); + wsrep_node_incoming_address= my_strdup(WSREP_NODE_INCOMING_AUTO, MYF(MY_WME)); + wsrep_start_position = my_strdup(WSREP_START_POSITION_ZERO, MYF(MY_WME)); + + global_system_variables.binlog_format=BINLOG_FORMAT_ROW; + return 0; +} + +bool wsrep_on_update (sys_var *self, THD* thd, enum_var_type var_type) +{ + if (var_type == OPT_GLOBAL) { + // FIXME: this variable probably should be changed only per session + thd->variables.wsrep_on = global_system_variables.wsrep_on; + } + return false; +} + +bool wsrep_causal_reads_update (SV *sv) +{ + if (sv->wsrep_causal_reads) { + sv->wsrep_sync_wait |= WSREP_SYNC_WAIT_BEFORE_READ; + } else { + sv->wsrep_sync_wait &= ~WSREP_SYNC_WAIT_BEFORE_READ; + } + return false; +} + +bool wsrep_sync_wait_update (sys_var* self, THD* thd, enum_var_type var_type) +{ + if (var_type == OPT_GLOBAL) + global_system_variables.wsrep_causal_reads = + MY_TEST(global_system_variables.wsrep_sync_wait & WSREP_SYNC_WAIT_BEFORE_READ); + else + thd->variables.wsrep_causal_reads = + MY_TEST(thd->variables.wsrep_sync_wait & WSREP_SYNC_WAIT_BEFORE_READ); + return false; +} + +static int wsrep_start_position_verify (const char* start_str) +{ + size_t start_len; + wsrep_uuid_t uuid; + ssize_t uuid_len; + + start_len = strlen (start_str); + if (start_len < 34) + return 1; + + uuid_len = wsrep_uuid_scan (start_str, start_len, &uuid); + if (uuid_len < 0 || (start_len - uuid_len) < 2) + return 1; + + if (start_str[uuid_len] != ':') // separator should follow UUID + return 1; + + char* endptr; + wsrep_seqno_t const seqno __attribute__((unused)) // to avoid GCC warnings + (strtoll(&start_str[uuid_len + 1], &endptr, 10)); + + if (*endptr == '\0') return 0; // remaining string was seqno + + return 1; +} + +bool wsrep_start_position_check (sys_var *self, THD* thd, set_var* var) +{ + char start_pos_buf[FN_REFLEN]; + + if ((! var->save_result.string_value.str) || + (var->save_result.string_value.length > (FN_REFLEN - 1))) // safety + goto err; + + memcpy(start_pos_buf, var->save_result.string_value.str, + var->save_result.string_value.length); + start_pos_buf[var->save_result.string_value.length]= 0; + + if (!wsrep_start_position_verify(start_pos_buf)) return 0; + +err: + my_error(ER_WRONG_VALUE_FOR_VAR, MYF(0), var->var->name.str, + var->save_result.string_value.str ? + var->save_result.string_value.str : "NULL"); + return 1; +} + +void wsrep_set_local_position (const char* value) +{ + size_t value_len = strlen (value); + size_t uuid_len = wsrep_uuid_scan (value, value_len, &local_uuid); + + local_seqno = strtoll (value + uuid_len + 1, NULL, 10); + + XID xid; + wsrep_xid_init(&xid, &local_uuid, local_seqno); + wsrep_set_SE_checkpoint(&xid); + WSREP_INFO ("wsrep_start_position var submitted: '%s'", wsrep_start_position); +} + +bool wsrep_start_position_update (sys_var *self, THD* thd, enum_var_type type) +{ + // since this value passed wsrep_start_position_check, don't check anything + // here + wsrep_set_local_position (wsrep_start_position); + + if (wsrep) { + wsrep_sst_received (wsrep, &local_uuid, local_seqno, NULL, 0); + } + + return 0; +} + +void wsrep_start_position_init (const char* val) +{ + if (NULL == val || wsrep_start_position_verify (val)) + { + WSREP_ERROR("Bad initial value for wsrep_start_position: %s", + (val ? val : "")); + return; + } + + wsrep_set_local_position (val); +} + +static bool refresh_provider_options() +{ + WSREP_DEBUG("refresh_provider_options: %s", + (wsrep_provider_options) ? wsrep_provider_options : "null"); + char* opts= wsrep->options_get(wsrep); + if (opts) + { + if (wsrep_provider_options) my_free((void *)wsrep_provider_options); + wsrep_provider_options = (char*)my_memdup(opts, strlen(opts) + 1, + MYF(MY_WME)); + } + else + { + WSREP_ERROR("Failed to get provider options"); + return true; + } + return false; +} + +static int wsrep_provider_verify (const char* provider_str) +{ + MY_STAT f_stat; + char path[FN_REFLEN]; + + if (!provider_str || strlen(provider_str)== 0) + return 1; + + if (!strcmp(provider_str, WSREP_NONE)) + return 0; + + if (!unpack_filename(path, provider_str)) + return 1; + + /* check that provider file exists */ + bzero(&f_stat, sizeof(MY_STAT)); + if (!my_stat(path, &f_stat, MYF(0))) + { + return 1; + } + return 0; +} + +bool wsrep_provider_check (sys_var *self, THD* thd, set_var* var) +{ + char wsrep_provider_buf[FN_REFLEN]; + + if ((! var->save_result.string_value.str) || + (var->save_result.string_value.length > (FN_REFLEN - 1))) // safety + goto err; + + memcpy(wsrep_provider_buf, var->save_result.string_value.str, + var->save_result.string_value.length); + wsrep_provider_buf[var->save_result.string_value.length]= 0; + + if (!wsrep_provider_verify(wsrep_provider_buf)) return 0; + +err: + my_error(ER_WRONG_VALUE_FOR_VAR, MYF(0), var->var->name.str, + var->save_result.string_value.str ? + var->save_result.string_value.str : "NULL"); + return 1; +} + +bool wsrep_provider_update (sys_var *self, THD* thd, enum_var_type type) +{ + bool rcode= false; + + bool wsrep_on_saved= thd->variables.wsrep_on; + thd->variables.wsrep_on= false; + + WSREP_DEBUG("wsrep_provider_update: %s", wsrep_provider); + + /* stop replication is heavy operation, and includes closing all client + connections. Closing clients may need to get LOCK_global_system_variables + at least in MariaDB. + + Note: releasing LOCK_global_system_variables may cause race condition, if + there can be several concurrent clients changing wsrep_provider + */ + mysql_mutex_unlock(&LOCK_global_system_variables); + wsrep_stop_replication(thd); + mysql_mutex_lock(&LOCK_global_system_variables); + + if (wsrep_inited == 1) + wsrep_deinit(false); + + char* tmp= strdup(wsrep_provider); // wsrep_init() rewrites provider + //when fails + if (wsrep_init()) + { + my_error(ER_CANT_OPEN_LIBRARY, MYF(0), tmp); + rcode = true; + } + free(tmp); + + // we sure don't want to use old address with new provider + wsrep_cluster_address_init(NULL); + wsrep_provider_options_init(NULL); + + thd->variables.wsrep_on= wsrep_on_saved; + + refresh_provider_options(); + + return rcode; +} + +void wsrep_provider_init (const char* value) +{ + WSREP_DEBUG("wsrep_provider_init: %s -> %s", + (wsrep_provider) ? wsrep_provider : "null", + (value) ? value : "null"); + if (NULL == value || wsrep_provider_verify (value)) + { + WSREP_ERROR("Bad initial value for wsrep_provider: %s", + (value ? value : "")); + return; + } + + if (wsrep_provider) my_free((void *)wsrep_provider); + wsrep_provider = my_strdup(value, MYF(0)); +} + +bool wsrep_provider_options_check(sys_var *self, THD* thd, set_var* var) +{ + return 0; +} + +bool wsrep_provider_options_update(sys_var *self, THD* thd, enum_var_type type) +{ + wsrep_status_t ret= wsrep->options_set(wsrep, wsrep_provider_options); + if (ret != WSREP_OK) + { + WSREP_ERROR("Set options returned %d", ret); + refresh_provider_options(); + return true; + } + return refresh_provider_options(); +} + +void wsrep_provider_options_init(const char* value) +{ + if (wsrep_provider_options && wsrep_provider_options != value) + my_free((void *)wsrep_provider_options); + wsrep_provider_options = (value) ? my_strdup(value, MYF(0)) : NULL; +} + +static int wsrep_cluster_address_verify (const char* cluster_address_str) +{ + /* There is no predefined address format, it depends on provider. */ + return 0; +} + +bool wsrep_cluster_address_check (sys_var *self, THD* thd, set_var* var) +{ + char addr_buf[FN_REFLEN]; + + if ((! var->save_result.string_value.str) || + (var->save_result.string_value.length > (FN_REFLEN - 1))) // safety + goto err; + + memcpy(addr_buf, var->save_result.string_value.str, + var->save_result.string_value.length); + addr_buf[var->save_result.string_value.length]= 0; + + if (!wsrep_cluster_address_verify(addr_buf)) return 0; + + err: + my_error(ER_WRONG_VALUE_FOR_VAR, MYF(0), var->var->name.str, + var->save_result.string_value.str ? + var->save_result.string_value.str : "NULL"); + return 1; +} + +bool wsrep_cluster_address_update (sys_var *self, THD* thd, enum_var_type type) +{ + bool wsrep_on_saved= thd->variables.wsrep_on; + thd->variables.wsrep_on= false; + + /* stop replication is heavy operation, and includes closing all client + connections. Closing clients may need to get LOCK_global_system_variables + at least in MariaDB. + + Note: releasing LOCK_global_system_variables may cause race condition, if + there can be several concurrent clients changing wsrep_provider + */ + mysql_mutex_unlock(&LOCK_global_system_variables); + wsrep_stop_replication(thd); + mysql_mutex_lock(&LOCK_global_system_variables); + + if (wsrep_start_replication()) + { + wsrep_create_rollbacker(); + wsrep_create_appliers(wsrep_slave_threads); + } + + thd->variables.wsrep_on= wsrep_on_saved; + + return false; +} + +void wsrep_cluster_address_init (const char* value) +{ + WSREP_DEBUG("wsrep_cluster_address_init: %s -> %s", + (wsrep_cluster_address) ? wsrep_cluster_address : "null", + (value) ? value : "null"); + + if (wsrep_cluster_address) my_free ((void*)wsrep_cluster_address); + wsrep_cluster_address = (value) ? my_strdup(value, MYF(0)) : NULL; +} + +/* wsrep_cluster_name cannot be NULL or an empty string. */ +bool wsrep_cluster_name_check (sys_var *self, THD* thd, set_var* var) +{ + if (!var->save_result.string_value.str || + (var->save_result.string_value.length == 0)) + { + my_error(ER_WRONG_VALUE_FOR_VAR, MYF(0), var->var->name.str, + (var->save_result.string_value.str ? + var->save_result.string_value.str : "NULL")); + return 1; + } + return 0; +} + +bool wsrep_cluster_name_update (sys_var *self, THD* thd, enum_var_type type) +{ + return 0; +} + +bool wsrep_node_name_check (sys_var *self, THD* thd, set_var* var) +{ + // TODO: for now 'allow' 0-length string to be valid (default) + if (!var->save_result.string_value.str) + { + my_error(ER_WRONG_VALUE_FOR_VAR, MYF(0), var->var->name.str, + (var->save_result.string_value.str ? + var->save_result.string_value.str : "NULL")); + return 1; + } + return 0; +} + +bool wsrep_node_name_update (sys_var *self, THD* thd, enum_var_type type) +{ + return 0; +} + +// TODO: do something more elaborate, like checking connectivity +bool wsrep_node_address_check (sys_var *self, THD* thd, set_var* var) +{ + char addr_buf[FN_REFLEN]; + + if ((! var->save_result.string_value.str) || + (var->save_result.string_value.length > (FN_REFLEN - 1))) // safety + goto err; + + memcpy(addr_buf, var->save_result.string_value.str, + var->save_result.string_value.length); + addr_buf[var->save_result.string_value.length]= 0; + + // TODO: for now 'allow' 0-length string to be valid (default) + return 0; + +err: + my_error(ER_WRONG_VALUE_FOR_VAR, MYF(0), var->var->name.str, + var->save_result.string_value.str ? + var->save_result.string_value.str : "NULL"); + return 1; +} + +bool wsrep_node_address_update (sys_var *self, THD* thd, enum_var_type type) +{ + return 0; +} + +void wsrep_node_address_init (const char* value) +{ + if (wsrep_node_address && strcmp(wsrep_node_address, value)) + my_free ((void*)wsrep_node_address); + + wsrep_node_address = (value) ? my_strdup(value, MYF(0)) : NULL; +} + +bool wsrep_slave_threads_check (sys_var *self, THD* thd, set_var* var) +{ + mysql_mutex_lock(&LOCK_wsrep_slave_threads); + wsrep_slave_count_change += (var->save_result.ulonglong_value - + wsrep_slave_threads); + mysql_mutex_unlock(&LOCK_wsrep_slave_threads); + + return 0; +} + +bool wsrep_slave_threads_update (sys_var *self, THD* thd, enum_var_type type) +{ + if (wsrep_slave_count_change > 0) + { + wsrep_create_appliers(wsrep_slave_count_change); + wsrep_slave_count_change = 0; + } + return false; +} + +bool wsrep_desync_check (sys_var *self, THD* thd, set_var* var) +{ + bool new_wsrep_desync= (bool) var->save_result.ulonglong_value; + if (wsrep_desync == new_wsrep_desync) { + if (new_wsrep_desync) { + push_warning (thd, Sql_condition::WARN_LEVEL_WARN, + ER_WRONG_VALUE_FOR_VAR, + "'wsrep_desync' is already ON."); + } else { + push_warning (thd, Sql_condition::WARN_LEVEL_WARN, + ER_WRONG_VALUE_FOR_VAR, + "'wsrep_desync' is already OFF."); + } + } + return 0; +} + +bool wsrep_desync_update (sys_var *self, THD* thd, enum_var_type type) +{ + wsrep_status_t ret(WSREP_WARNING); + if (wsrep_desync) { + ret = wsrep->desync (wsrep); + if (ret != WSREP_OK) { + WSREP_WARN ("SET desync failed %d for %s", ret, thd->query()); + my_error (ER_CANNOT_USER, MYF(0), "'desync'", thd->query()); + return true; + } + } else { + ret = wsrep->resync (wsrep); + if (ret != WSREP_OK) { + WSREP_WARN ("SET resync failed %d for %s", ret, thd->query()); + my_error (ER_CANNOT_USER, MYF(0), "'resync'", thd->query()); + return true; + } + } + return false; +} + +static SHOW_VAR wsrep_status_vars[]= +{ + {"connected", (char*) &wsrep_connected, SHOW_BOOL}, + {"ready", (char*) &wsrep_ready, SHOW_BOOL}, + {"cluster_state_uuid",(char*) &wsrep_cluster_state_uuid,SHOW_CHAR_PTR}, + {"cluster_conf_id", (char*) &wsrep_cluster_conf_id, SHOW_LONGLONG}, + {"cluster_status", (char*) &wsrep_cluster_status, SHOW_CHAR_PTR}, + {"cluster_size", (char*) &wsrep_cluster_size, SHOW_LONG_NOFLUSH}, + {"local_index", (char*) &wsrep_local_index, SHOW_LONG_NOFLUSH}, + {"local_bf_aborts", (char*) &wsrep_show_bf_aborts, SHOW_SIMPLE_FUNC}, + {"provider_name", (char*) &wsrep_provider_name, SHOW_CHAR_PTR}, + {"provider_version", (char*) &wsrep_provider_version, SHOW_CHAR_PTR}, + {"provider_vendor", (char*) &wsrep_provider_vendor, SHOW_CHAR_PTR}, + {"thread_count", (char*) &wsrep_running_threads, SHOW_LONG_NOFLUSH} +}; + +static int show_var_cmp(const void *var1, const void *var2) +{ + return strcasecmp(((SHOW_VAR*)var1)->name, ((SHOW_VAR*)var2)->name); +} + +int wsrep_show_status (THD *thd, SHOW_VAR *var, char *buff) +{ + uint i, maxi= SHOW_VAR_FUNC_BUFF_SIZE / sizeof(*var) - 1; + SHOW_VAR *v= (SHOW_VAR *)buff; + + var->type= SHOW_ARRAY; + var->value= buff; + + for (i=0; i < array_elements(wsrep_status_vars); i++) + *v++= wsrep_status_vars[i]; + + DBUG_ASSERT(i < maxi); + + wsrep_stats_var* stats= wsrep->stats_get(wsrep); + for (wsrep_stats_var *sv= stats; i < maxi && sv && sv->name; i++, sv++, v++) + { + v->name = thd->strdup(sv->name); + switch (sv->type) { + case WSREP_VAR_INT64: + v->value = (char*)thd->memdup(&sv->value._int64, sizeof(longlong)); + v->type = SHOW_LONGLONG; + break; + case WSREP_VAR_STRING: + v->value = thd->strdup(sv->value._string); + v->type = SHOW_CHAR; + break; + case WSREP_VAR_DOUBLE: + v->value = (char*)thd->memdup(&sv->value._double, sizeof(double)); + v->type = SHOW_DOUBLE; + break; + } + DBUG_ASSERT(i < maxi); + } + wsrep->stats_free(wsrep, stats); + + my_qsort(buff, i, sizeof(*v), show_var_cmp); + + v->name= 0; // terminator + return 0; +} + diff --git a/sql/wsrep_var.h b/sql/wsrep_var.h new file mode 100644 index 00000000000..6914204148d --- /dev/null +++ b/sql/wsrep_var.h @@ -0,0 +1,101 @@ +/* Copyright (C) 2013 Codership Oy <info@codership.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ + +#include <my_config.h> + +#ifndef WSREP_VAR_H +#define WSREP_VAR_H + +#ifdef WITH_WSREP + +#define WSREP_CLUSTER_NAME "my_wsrep_cluster" +#define WSREP_NODE_INCOMING_AUTO "AUTO" +#define WSREP_START_POSITION_ZERO "00000000-0000-0000-0000-000000000000:-1" + +// MySQL variables funcs + +#include "sql_priv.h" +class sys_var; +class set_var; +class THD; + +int wsrep_init_vars(); + +#define CHECK_ARGS (sys_var *self, THD* thd, set_var *var) +#define UPDATE_ARGS (sys_var *self, THD* thd, enum_var_type type) +#define DEFAULT_ARGS (THD* thd, enum_var_type var_type) +#define INIT_ARGS (const char* opt) + +struct system_variables; +bool wsrep_causal_reads_update(struct system_variables *sv); + +extern bool wsrep_on_update UPDATE_ARGS; +extern bool wsrep_sync_wait_update UPDATE_ARGS; +extern bool wsrep_start_position_check CHECK_ARGS; +extern bool wsrep_start_position_update UPDATE_ARGS; +extern void wsrep_start_position_init INIT_ARGS; + +extern bool wsrep_provider_check CHECK_ARGS; +extern bool wsrep_provider_update UPDATE_ARGS; +extern void wsrep_provider_init INIT_ARGS; + +extern bool wsrep_provider_options_check CHECK_ARGS; +extern bool wsrep_provider_options_update UPDATE_ARGS; +extern void wsrep_provider_options_init INIT_ARGS; + +extern bool wsrep_cluster_address_check CHECK_ARGS; +extern bool wsrep_cluster_address_update UPDATE_ARGS; +extern void wsrep_cluster_address_init INIT_ARGS; + +extern bool wsrep_cluster_name_check CHECK_ARGS; +extern bool wsrep_cluster_name_update UPDATE_ARGS; + +extern bool wsrep_node_name_check CHECK_ARGS; +extern bool wsrep_node_name_update UPDATE_ARGS; + +extern bool wsrep_node_address_check CHECK_ARGS; +extern bool wsrep_node_address_update UPDATE_ARGS; +extern void wsrep_node_address_init INIT_ARGS; + +extern bool wsrep_sst_method_check CHECK_ARGS; +extern bool wsrep_sst_method_update UPDATE_ARGS; +extern void wsrep_sst_method_init INIT_ARGS; + +extern bool wsrep_sst_receive_address_check CHECK_ARGS; +extern bool wsrep_sst_receive_address_update UPDATE_ARGS; + +extern bool wsrep_sst_auth_check CHECK_ARGS; +extern bool wsrep_sst_auth_update UPDATE_ARGS; +extern void wsrep_sst_auth_init INIT_ARGS; + +extern bool wsrep_sst_donor_check CHECK_ARGS; +extern bool wsrep_sst_donor_update UPDATE_ARGS; + +extern bool wsrep_slave_threads_check CHECK_ARGS; +extern bool wsrep_slave_threads_update UPDATE_ARGS; + +extern bool wsrep_desync_check CHECK_ARGS; +extern bool wsrep_desync_update UPDATE_ARGS; + +#else /* WITH_WSREP */ + +#define WSREP_NONE +#define wsrep_provider_init(X) +#define wsrep_init_vars() (0) +#define wsrep_start_position_init(X) +#define wsrep_sst_auth_init(X) + +#endif /* WITH_WSREP */ +#endif /* WSREP_VAR_H */ |