diff options
author | unknown <serg@serg.mylan> | 2005-02-14 21:50:09 +0100 |
---|---|---|
committer | unknown <serg@serg.mylan> | 2005-02-14 21:50:09 +0100 |
commit | 789f0a6e897028a03a0b527ef03e18254823daf8 (patch) | |
tree | ea9dd08a255018554104b4f137dea7cfe36ece0a /sql | |
parent | 75bdf7178be7934dd49283cd00f13403bcbc7ba3 (diff) | |
parent | f3855774c5c3a86cb07bcd5976ab0b1eb8a5fa54 (diff) | |
download | mariadb-git-789f0a6e897028a03a0b527ef03e18254823daf8.tar.gz |
manually merged
client/mysqlbinlog.cc:
Auto merged
configure.in:
Auto merged
include/my_global.h:
Auto merged
include/my_pthread.h:
Auto merged
innobase/trx/trx0trx.c:
Auto merged
mysql-test/include/varchar.inc:
Auto merged
mysql-test/r/bdb.result:
Auto merged
mysql-test/r/myisam.result:
Auto merged
mysql-test/r/mysqlbinlog2.result:
Auto merged
mysql-test/t/ctype_ucs.test:
Auto merged
mysql-test/t/user_var.test:
Auto merged
mysys/hash.c:
Auto merged
sql/item_func.cc:
Auto merged
sql/log_event.cc:
Auto merged
sql/log_event.h:
Auto merged
sql/set_var.h:
Auto merged
sql/slave.cc:
Auto merged
sql/slave.h:
Auto merged
sql/sp_head.cc:
Auto merged
sql/sql_base.cc:
Auto merged
sql/sql_class.cc:
Auto merged
sql/sql_lex.h:
Auto merged
sql/sql_repl.cc:
Auto merged
sql/sql_repl.h:
Auto merged
sql/sql_table.cc:
Auto merged
sql/sql_trigger.cc:
Auto merged
mysql-test/r/ctype_ucs.result:
ul
mysql-test/r/drop_temp_table.result:
ul
mysql-test/r/innodb.result:
ul
mysql-test/r/insert_select.result:
ul
mysql-test/r/mix_innodb_myisam_binlog.result:
ul
mysql-test/r/rpl_change_master.result:
ul
mysql-test/r/rpl_charset.result:
ul
mysql-test/r/rpl_error_ignored_table.result:
ul
mysql-test/r/rpl_flush_log_loop.result:
ul
mysql-test/r/rpl_flush_tables.result:
ul
mysql-test/r/rpl_loaddata.result:
ul
mysql-test/r/rpl_loaddata_rule_m.result:
ul
mysql-test/r/rpl_log.result:
ul
mysql-test/r/rpl_max_relay_size.result:
ul
mysql-test/r/rpl_relayrotate.result:
ul
mysql-test/r/rpl_replicate_do.result:
ul
mysql-test/r/rpl_rotate_logs.result:
ul
mysql-test/r/rpl_temporary.result:
ul
mysql-test/r/rpl_timezone.result:
ul
mysql-test/r/rpl_until.result:
ul
mysql-test/r/rpl_user_variables.result:
ul
mysql-test/r/user_var.result:
ul
Diffstat (limited to 'sql')
-rw-r--r-- | sql/ha_berkeley.cc | 144 | ||||
-rw-r--r-- | sql/ha_berkeley.h | 4 | ||||
-rw-r--r-- | sql/ha_innodb.cc | 347 | ||||
-rw-r--r-- | sql/ha_innodb.h | 29 | ||||
-rw-r--r-- | sql/handler.cc | 894 | ||||
-rw-r--r-- | sql/handler.h | 199 | ||||
-rw-r--r-- | sql/item_func.cc | 2 | ||||
-rw-r--r-- | sql/lex.h | 9 | ||||
-rw-r--r-- | sql/log.cc | 1431 | ||||
-rw-r--r-- | sql/log_event.cc | 295 | ||||
-rw-r--r-- | sql/log_event.h | 146 | ||||
-rw-r--r-- | sql/mysql_priv.h | 30 | ||||
-rw-r--r-- | sql/mysqld.cc | 438 | ||||
-rw-r--r-- | sql/set_var.h | 2 | ||||
-rw-r--r-- | sql/share/errmsg.txt | 12 | ||||
-rw-r--r-- | sql/slave.cc | 174 | ||||
-rw-r--r-- | sql/slave.h | 19 | ||||
-rw-r--r-- | sql/sp_head.cc | 2 | ||||
-rw-r--r-- | sql/sql_base.cc | 20 | ||||
-rw-r--r-- | sql/sql_class.cc | 38 | ||||
-rw-r--r-- | sql/sql_class.h | 203 | ||||
-rw-r--r-- | sql/sql_delete.cc | 25 | ||||
-rw-r--r-- | sql/sql_insert.cc | 11 | ||||
-rw-r--r-- | sql/sql_lex.cc | 4 | ||||
-rw-r--r-- | sql/sql_lex.h | 13 | ||||
-rw-r--r-- | sql/sql_load.cc | 22 | ||||
-rw-r--r-- | sql/sql_parse.cc | 418 | ||||
-rw-r--r-- | sql/sql_repl.cc | 288 | ||||
-rw-r--r-- | sql/sql_repl.h | 11 | ||||
-rw-r--r-- | sql/sql_table.cc | 33 | ||||
-rw-r--r-- | sql/sql_trigger.cc | 12 | ||||
-rw-r--r-- | sql/sql_update.cc | 22 | ||||
-rw-r--r-- | sql/sql_yacc.yy | 1286 |
33 files changed, 3939 insertions, 2644 deletions
diff --git a/sql/ha_berkeley.cc b/sql/ha_berkeley.cc index 915d5dcea26..6c26a9555a6 100644 --- a/sql/ha_berkeley.cc +++ b/sql/ha_berkeley.cc @@ -102,11 +102,34 @@ static int write_status(DB *status_block, char *buff, uint length); static void update_status(BDB_SHARE *share, TABLE *table); static void berkeley_noticecall(DB_ENV *db_env, db_notices notice); - +static int berkeley_close_connection(THD *thd); +static int berkeley_commit(THD *thd, bool all); +static int berkeley_rollback(THD *thd, bool all); + +static handlerton berkeley_hton = { + 0, /* slot */ + 0, /* savepoint size */ + berkeley_close_connection, + NULL, /* savepoint_set */ + NULL, /* savepoint_rollback */ + NULL, /* savepoint_release */ + berkeley_commit, + berkeley_rollback, + NULL, /* prepare */ + NULL, /* recover */ + NULL, /* commit_by_xid */ + NULL /* rollback_by_xid */ +}; + +typedef struct st_berkeley_trx_data { + DB_TXN *all; + DB_TXN *stmt; + uint bdb_lock_count; +} berkeley_trx_data; /* General functions */ -bool berkeley_init(void) +handlerton *berkeley_init(void) { DBUG_ENTER("berkeley_init"); @@ -135,7 +158,7 @@ bool berkeley_init(void) berkeley_log_file_size= max(berkeley_log_file_size, 10*1024*1024L); if (db_env_create(&db_env,0)) - DBUG_RETURN(1); /* purecov: inspected */ + DBUG_RETURN(0); db_env->set_errcall(db_env,berkeley_print_error); db_env->set_errpfx(db_env,"bdb"); db_env->set_noticecall(db_env, berkeley_noticecall); @@ -163,16 +186,15 @@ bool berkeley_init(void) DB_INIT_LOG | DB_INIT_MPOOL | DB_INIT_TXN | DB_CREATE | DB_THREAD, 0666)) { - db_env->close(db_env,0); /* purecov: inspected */ - db_env=0; /* purecov: inspected */ - goto err; + db_env->close(db_env,0); + db_env=0; + DBUG_RETURN(0); } (void) hash_init(&bdb_open_tables,system_charset_info,32,0,0, (hash_get_key) bdb_get_key,0,0); pthread_mutex_init(&bdb_mutex,MY_MUTEX_INIT_FAST); -err: - DBUG_RETURN(db_env == 0); + DBUG_RETURN(&berkeley_hton); } @@ -190,6 +212,11 @@ bool berkeley_end(void) DBUG_RETURN(error != 0); } +static int berkeley_close_connection(THD *thd) +{ + my_free((gptr)thd->ha_data[berkeley_hton.slot], MYF(0)); +} + bool berkeley_flush_logs() { int error; @@ -208,26 +235,29 @@ bool berkeley_flush_logs() DBUG_RETURN(result); } - -int berkeley_commit(THD *thd, void *trans) +static int berkeley_commit(THD *thd, bool all) { DBUG_ENTER("berkeley_commit"); - DBUG_PRINT("trans",("ending transaction %s", - trans == thd->transaction.stmt.bdb_tid ? "stmt" : "all")); - int error=txn_commit((DB_TXN*) trans,0); + DBUG_PRINT("trans",("ending transaction %s", all ? "all" : "stmt")); + berkeley_trx_data *trx=(berkeley_trx_data *)thd->ha_data[berkeley_hton.slot]; + DB_TXN **txn= all ? &trx->all : &trx->stmt; + int error=txn_commit(*txn,0); + *txn=0; #ifndef DBUG_OFF if (error) - DBUG_PRINT("error",("error: %d",error)); /* purecov: inspected */ + DBUG_PRINT("error",("error: %d",error)); #endif DBUG_RETURN(error); } -int berkeley_rollback(THD *thd, void *trans) +static int berkeley_rollback(THD *thd, bool all) { DBUG_ENTER("berkeley_rollback"); - DBUG_PRINT("trans",("aborting transaction %s", - trans == thd->transaction.stmt.bdb_tid ? "stmt" : "all")); - int error=txn_abort((DB_TXN*) trans); + DBUG_PRINT("trans",("aborting transaction %s", all ? "all" : "stmt")); + berkeley_trx_data *trx=(berkeley_trx_data *)thd->ha_data[berkeley_hton.slot]; + DB_TXN **txn= all ? &trx->all : &trx->stmt; + int error=txn_abort(*txn); + *txn=0; DBUG_RETURN(error); } @@ -1842,62 +1872,65 @@ int ha_berkeley::reset(void) int ha_berkeley::external_lock(THD *thd, int lock_type) { int error=0; + berkeley_trx_data *trx=(berkeley_trx_data *)thd->ha_data[berkeley_hton.slot]; DBUG_ENTER("ha_berkeley::external_lock"); + if (!trx) + { + thd->ha_data[berkeley_hton.slot]= trx= (berkeley_trx_data *) + my_malloc(sizeof(*trx), MYF(MY_ZEROFILL)); + if (!trx) + DBUG_RETURN(1); + } if (lock_type != F_UNLCK) { - if (!thd->transaction.bdb_lock_count++) + if (!trx->bdb_lock_count++) { - DBUG_ASSERT(thd->transaction.stmt.bdb_tid == 0); + DBUG_ASSERT(trx->stmt == 0); transaction=0; // Safety /* First table lock, start transaction */ if ((thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN | - OPTION_TABLE_LOCK)) && - !thd->transaction.all.bdb_tid) + OPTION_TABLE_LOCK)) && !trx->all) { /* We have to start a master transaction */ DBUG_PRINT("trans",("starting transaction all: options: 0x%lx", (ulong) thd->options)); - if ((error=txn_begin(db_env, 0, - (DB_TXN**) &thd->transaction.all.bdb_tid, - 0))) + if ((error=txn_begin(db_env, 0, &trx->all, 0))) { - thd->transaction.bdb_lock_count--; // We didn't get the lock /* purecov: inspected */ - DBUG_RETURN(error); /* purecov: inspected */ + trx->bdb_lock_count--; // We didn't get the lock + DBUG_RETURN(error); } + trans_register_ha(thd, TRUE, &berkeley_hton); if (thd->in_lock_tables) DBUG_RETURN(0); // Don't create stmt trans } DBUG_PRINT("trans",("starting transaction stmt")); - if ((error=txn_begin(db_env, - (DB_TXN*) thd->transaction.all.bdb_tid, - (DB_TXN**) &thd->transaction.stmt.bdb_tid, - 0))) + if ((error=txn_begin(db_env, trx->all, &trx->stmt, 0))) { /* We leave the possible master transaction open */ - thd->transaction.bdb_lock_count--; // We didn't get the lock /* purecov: inspected */ - DBUG_RETURN(error); /* purecov: inspected */ + trx->bdb_lock_count--; // We didn't get the lock + DBUG_RETURN(error); } + trans_register_ha(thd, FALSE, &berkeley_hton); } - transaction= (DB_TXN*) thd->transaction.stmt.bdb_tid; + transaction= trx->stmt; } else { lock.type=TL_UNLOCK; // Unlocked thread_safe_add(share->rows, changed_rows, &share->mutex); changed_rows=0; - if (!--thd->transaction.bdb_lock_count) + if (!--trx->bdb_lock_count) { - if (thd->transaction.stmt.bdb_tid) + if (trx->stmt) { /* - F_UNLOCK is done without a transaction commit / rollback. + F_UNLCK is done without a transaction commit / rollback. This happens if the thread didn't update any rows We must in this case commit the work to keep the row locks */ DBUG_PRINT("trans",("commiting non-updating transaction")); - error=txn_commit((DB_TXN*) thd->transaction.stmt.bdb_tid,0); - thd->transaction.stmt.bdb_tid=0; - transaction=0; + error= txn_commit(trx->stmt,0); + trx->stmt= transaction= 0; } } } @@ -1915,14 +1948,20 @@ int ha_berkeley::start_stmt(THD *thd) { int error=0; DBUG_ENTER("ha_berkeley::start_stmt"); - if (!thd->transaction.stmt.bdb_tid) + berkeley_trx_data *trx=(berkeley_trx_data *)thd->ha_data[berkeley_hton.slot]; + DBUG_ASSERT(trx); + /* + note that trx->stmt may have been already initialized as start_stmt() + is called for *each table* not for each storage engine, + and there could be many bdb tables referenced in the query + */ + if (!trx->stmt) { DBUG_PRINT("trans",("starting transaction stmt")); - error=txn_begin(db_env, (DB_TXN*) thd->transaction.all.bdb_tid, - (DB_TXN**) &thd->transaction.stmt.bdb_tid, - 0); + error=txn_begin(db_env, trx->all, &trx->stmt, 0); + trans_register_ha(thd, FALSE, &berkeley_hton); } - transaction= (DB_TXN*) thd->transaction.stmt.bdb_tid; + transaction= trx->stmt; DBUG_RETURN(error); } @@ -2258,6 +2297,8 @@ int ha_berkeley::analyze(THD* thd, HA_CHECK_OPT* check_opt) uint i; DB_BTREE_STAT *stat=0; DB_TXN_STAT *txn_stat_ptr= 0; + berkeley_trx_data *trx=(berkeley_trx_data *)thd->ha_data[berkeley_hton.slot]; + DBUG_ASSERT(trx); /* Original bdb documentation says: @@ -2272,13 +2313,10 @@ int ha_berkeley::analyze(THD* thd, HA_CHECK_OPT* check_opt) txn_stat_ptr && txn_stat_ptr->st_nactive>=2) { DB_TXN_ACTIVE *atxn_stmt= 0, *atxn_all= 0; - - DB_TXN *txn_all= (DB_TXN*) thd->transaction.all.bdb_tid; - u_int32_t all_id= txn_all->id(txn_all); - - DB_TXN *txn_stmt= (DB_TXN*) thd->transaction.stmt.bdb_tid; - u_int32_t stmt_id= txn_stmt->id(txn_stmt); - + + u_int32_t all_id= trx->all->id(trx->all); + u_int32_t stmt_id= trx->stmt->id(trx->stmt); + DB_TXN_ACTIVE *cur= txn_stat_ptr->st_txnarray; DB_TXN_ACTIVE *end= cur + txn_stat_ptr->st_nactive; for (; cur!=end && (!atxn_stmt || !atxn_all); cur++) @@ -2286,7 +2324,7 @@ int ha_berkeley::analyze(THD* thd, HA_CHECK_OPT* check_opt) if (cur->txnid==all_id) atxn_all= cur; if (cur->txnid==stmt_id) atxn_stmt= cur; } - + if (atxn_stmt && atxn_all && log_compare(&atxn_stmt->lsn,&atxn_all->lsn)) { diff --git a/sql/ha_berkeley.h b/sql/ha_berkeley.h index e485b12bdb4..10e61455867 100644 --- a/sql/ha_berkeley.h +++ b/sql/ha_berkeley.h @@ -168,9 +168,7 @@ extern char *berkeley_home, *berkeley_tmpdir, *berkeley_logdir; extern long berkeley_lock_scan_time; extern TYPELIB berkeley_lock_typelib; -bool berkeley_init(void); +handlerton *berkeley_init(void); bool berkeley_end(void); bool berkeley_flush_logs(void); -int berkeley_commit(THD *thd, void *trans); -int berkeley_rollback(THD *thd, void *trans); int berkeley_show_logs(Protocol *protocol); diff --git a/sql/ha_innodb.cc b/sql/ha_innodb.cc index e36fed9b283..ee9009ebf27 100644 --- a/sql/ha_innodb.cc +++ b/sql/ha_innodb.cc @@ -14,14 +14,12 @@ along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -/* This file defines the InnoDB handler: the interface between MySQL and -InnoDB +/* This file defines the InnoDB handler: the interface between MySQL and InnoDB NOTE: You can only use noninlined InnoDB functions in this file, because we have disables the InnoDB inlining in this file. */ -/* TODO list for the InnoDB handler in 4.1: - - Remove the flag innodb_active_trans from thd and replace it with a - function call innodb_active_trans(thd), which looks at the InnoDB +/* TODO list for the InnoDB handler in 5.0: + - Remove the flag trx->active_trans and look at the InnoDB trx struct state field - Find out what kind of problems the OS X case-insensitivity causes to table and database names; should we 'normalize' the names like we do @@ -141,8 +139,6 @@ ulong innobase_active_counter = 0; char* innobase_home = NULL; -char innodb_dummy_stmt_trx_handle = 'D'; - static HASH innobase_open_tables; #ifdef __NETWARE__ /* some special cleanup for NetWare */ @@ -153,6 +149,26 @@ static mysql_byte* innobase_get_key(INNOBASE_SHARE *share,uint *length, my_bool not_used __attribute__((unused))); static INNOBASE_SHARE *get_share(const char *table_name); static void free_share(INNOBASE_SHARE *share); +static int innobase_close_connection(THD* thd); +static int innobase_commit(THD* thd, bool all); +static int innobase_rollback(THD* thd, bool all); +static int innobase_rollback_to_savepoint(THD* thd, void *savepoint); +static int innobase_savepoint(THD* thd, void *savepoint); + +static handlerton innobase_hton = { + 0, /* slot */ + sizeof(trx_named_savept_t), /* savepoint size. TODO: use it */ + innobase_close_connection, + innobase_savepoint, + innobase_rollback_to_savepoint, + innobase_release_savepoint + innobase_commit, /* commit */ + innobase_rollback, /* rollback */ + innobase_xa_prepare, /* prepare */ + innobase_xa_recover, /* recover */ + innobase_commit_by_xid, /* commit_by_xid */ + innobase_rollback_by_xid /* rollback_by_xid */ +}; /********************************************************************* Commits a transaction in an InnoDB database. */ @@ -250,7 +266,7 @@ struct show_var_st innodb_status_variables[]= { {"rows_updated", (char*) &export_vars.innodb_rows_updated, SHOW_LONG}, {NullS, NullS, SHOW_LONG}}; - + /* General functions */ /********************************************************************** @@ -317,9 +333,11 @@ documentation, see handler.cc. */ void innobase_release_temporary_latches( /*===============================*/ - void* innobase_tid) + THD *thd) { - innobase_release_stat_resources((trx_t*)innobase_tid); + trx_t *trx= (trx_t*) thd->ha_data[innobase_hton.slot]; + if (trx) + innobase_release_stat_resources(trx); } /************************************************************************ @@ -652,25 +670,17 @@ check_trx_exists( ut_ad(thd == current_thd); - trx = (trx_t*) thd->transaction.all.innobase_tid; + trx = (trx_t*) thd->ha_data[innobase_hton.slot]; if (trx == NULL) { DBUG_ASSERT(thd != NULL); trx = trx_allocate_for_mysql(); trx->mysql_thd = thd; - trx->mysql_query_str = &((*thd).query); - - thd->transaction.all.innobase_tid = trx; + trx->mysql_query_str = &(thd->query); + trx->active_trans = 0; - /* The execution of a single SQL statement is denoted by - a 'transaction' handle which is a dummy pointer: InnoDB - remembers internally where the latest SQL statement - started, and if error handling requires rolling back the - latest statement, InnoDB does a rollback to a savepoint. */ - - thd->transaction.stmt.innobase_tid = - (void*)&innodb_dummy_stmt_trx_handle; + thd->ha_data[innobase_hton.slot] = trx; } else { if (trx->magic_n != TRX_MAGIC_N) { mem_analyze_corruption((byte*)trx); @@ -707,7 +717,7 @@ ha_innobase::update_thd( { row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt; trx_t* trx; - + trx = check_trx_exists(thd); if (prebuilt->trx != trx) { @@ -720,6 +730,24 @@ ha_innobase::update_thd( return(0); } +/************************************************************************* +Registers the InnoDB transaction in MySQL, to receive commit/rollback +events. This function must be called every time InnoDB starts a +transaction internally. */ +static +void +register_trans( +/*============*/ + THD* thd) /* in: thd to use the handle */ +{ + /* register the start of the statement */ + trans_register_ha(thd, FALSE, &innobase_hton); + if (thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) { + + /* no autocommit mode, register for a transaction */ + trans_register_ha(thd, TRUE, &innobase_hton); + } +} /* BACKGROUND INFO: HOW THE MYSQL QUERY CACHE WORKS WITH INNODB ------------------------------------------------------------ @@ -813,12 +841,7 @@ innobase_query_caching_of_table_permitted( return((my_bool)FALSE); } - trx = (trx_t*) thd->transaction.all.innobase_tid; - - if (trx == NULL) { - trx = check_trx_exists(thd); - } - + trx = check_trx_exists(thd); if (trx->has_search_latch) { ut_print_timestamp(stderr); fprintf(stderr, @@ -871,7 +894,11 @@ innobase_query_caching_of_table_permitted( /* The call of row_search_.. will start a new transaction if it is not yet started */ - thd->transaction.all.innodb_active_trans = 1; + if (trx->active_trans == 0) { + + register_trans(thd); + trx->active_trans = 1; + } if (row_search_check_if_query_cache_permitted(trx, norm_name)) { @@ -983,7 +1010,12 @@ ha_innobase::init_table_handle_for_HANDLER(void) /* Set the MySQL flag to mark that there is an active transaction */ - current_thd->transaction.all.innodb_active_trans = 1; + if (prebuilt->trx->active_trans == 0) { + + register_trans(current_thd); + + prebuilt->trx->active_trans = 1; + } /* We did the necessary inits in this function, no need to repeat them in row_search_for_mysql */ @@ -1013,7 +1045,7 @@ ha_innobase::init_table_handle_for_HANDLER(void) /************************************************************************* Opens an InnoDB database. */ -bool +handlerton * innobase_init(void) /*===============*/ /* out: TRUE if error */ @@ -1090,7 +1122,7 @@ innobase_init(void) "InnoDB: syntax error in innodb_data_file_path"); my_free(internal_innobase_data_file_path, MYF(MY_ALLOW_ZERO_PTR)); - DBUG_RETURN(TRUE); + DBUG_RETURN(0); } /* -------------- Log files ---------------------------*/ @@ -1122,7 +1154,7 @@ innobase_init(void) my_free(internal_innobase_data_file_path, MYF(MY_ALLOW_ZERO_PTR)); - DBUG_RETURN(TRUE); + DBUG_RETURN(0); } /* --------------------------------------------------*/ @@ -1212,7 +1244,7 @@ innobase_init(void) if (err != DB_SUCCESS) { my_free(internal_innobase_data_file_path, MYF(MY_ALLOW_ZERO_PTR)); - DBUG_RETURN(1); + DBUG_RETURN(0); } (void) hash_init(&innobase_open_tables,system_charset_info, 32, 0, 0, @@ -1235,7 +1267,7 @@ innobase_init(void) glob_mi.pos = trx_sys_mysql_master_log_pos; } */ - DBUG_RETURN(0); + DBUG_RETURN(&innobase_hton); } /*********************************************************************** @@ -1359,7 +1391,12 @@ innobase_start_trx_and_assign_read_view( /* Set the MySQL flag to mark that there is an active transaction */ - current_thd->transaction.all.innodb_active_trans = 1; + if (trx->active_trans == 0) { + + register_trans(current_thd); + + trx->active_trans = 1; + } DBUG_RETURN(0); } @@ -1368,15 +1405,14 @@ innobase_start_trx_and_assign_read_view( Commits a transaction in an InnoDB database or marks an SQL statement ended. */ -int +static int innobase_commit( /*============*/ /* out: 0 */ THD* thd, /* in: MySQL thread handle of the user for whom the transaction should be committed */ - void* trx_handle)/* in: InnoDB trx handle or - &innodb_dummy_stmt_trx_handle: the latter means - that the current SQL statement ended */ + bool all) /* in: TRUE - commit transaction + FALSE - the current SQL statement ended */ { trx_t* trx; @@ -1391,7 +1427,7 @@ innobase_commit( innobase_release_stat_resources(trx); - /* The flag thd->transaction.all.innodb_active_trans is set to 1 in + /* The flag trx->active_trans is set to 1 in 1. ::external_lock(), 2. ::start_stmt(), @@ -1406,23 +1442,22 @@ innobase_commit( For the time being, we play safe and do the cleanup though there should be nothing to clean up. */ - if (thd->transaction.all.innodb_active_trans == 0 + if (trx->active_trans == 0 && trx->conc_state != TRX_NOT_STARTED) { fprintf(stderr, -"InnoDB: Error: thd->transaction.all.innodb_active_trans == 0\n" +"InnoDB: Error: trx->active_trans == 0\n" "InnoDB: but trx->conc_state != TRX_NOT_STARTED\n"); } - if (trx_handle != (void*)&innodb_dummy_stmt_trx_handle - || (!(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)))) { + if (all || (!(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)))) { /* We were instructed to commit the whole transaction, or this is an SQL statement end and autocommit is on */ innobase_commit_low(trx); - thd->transaction.all.innodb_active_trans = 0; + trx->active_trans = 0; } else { /* We just mark the SQL statement ended and do not do a transaction commit */ @@ -1448,6 +1483,11 @@ innobase_commit( DBUG_RETURN(0); } +/* + don't delete it - it may be re-enabled later + as an optimization for the most common case InnoDB+binlog +*/ +#if 0 /********************************************************************* This is called when MySQL writes the binlog entry for the current transaction. Writes to the InnoDB tablespace info which tells where the @@ -1473,18 +1513,51 @@ innobase_report_binlog_offset_and_commit( ut_a(trx != NULL); - trx->mysql_log_file_name = log_file_name; + trx->mysql_log_file_name = log_file_name; trx->mysql_log_offset = (ib_longlong)end_offset; - + trx->flush_log_later = TRUE; - innobase_commit(thd, trx_handle); + innobase_commit(thd, trx_handle); trx->flush_log_later = FALSE; return(0); } +/*********************************************************************** +This function stores the binlog offset and flushes logs. */ + +void +innobase_store_binlog_offset_and_flush_log( +/*=======================================*/ + char *binlog_name, /* in: binlog name */ + longlong offset) /* in: binlog offset */ +{ + mtr_t mtr; + + assert(binlog_name != NULL); + + /* Start a mini-transaction */ + mtr_start_noninline(&mtr); + + /* Update the latest MySQL binlog name and offset info + in trx sys header */ + + trx_sys_update_mysql_binlog_offset( + binlog_name, + offset, + TRX_SYS_MYSQL_LOG_INFO, &mtr); + + /* Commits the mini-transaction */ + mtr_commit(&mtr); + + /* Syncronous flush of the log buffer to disk */ + log_buffer_flush_to_disk(); +} + +#endif + /********************************************************************* This is called after MySQL has written the binlog entry for the current transaction. Flushes the InnoDB log files to disk if required. */ @@ -1493,20 +1566,23 @@ int innobase_commit_complete( /*=====================*/ /* out: 0 */ - void* trx_handle) /* in: InnoDB trx handle */ + THD* thd) /* in: user thread */ { trx_t* trx; - if (srv_flush_log_at_trx_commit == 0) { + trx = (trx_t*) thd->ha_data[innobase_hton.slot]; - return(0); - } + if (trx && trx->active_trans) { - trx = (trx_t*)trx_handle; + trx->active_trans = 0; - ut_a(trx != NULL); + if (srv_flush_log_at_trx_commit == 0) { - trx_commit_complete_for_mysql(trx); + return(0); + } + + trx_commit_complete_for_mysql(trx); + } return(0); } @@ -1514,15 +1590,14 @@ innobase_commit_complete( /********************************************************************* Rolls back a transaction or the latest SQL statement. */ -int +static int innobase_rollback( /*==============*/ /* out: 0 or error number */ THD* thd, /* in: handle to the MySQL thread of the user whose transaction should be rolled back */ - void* trx_handle)/* in: InnoDB trx handle or a dummy stmt handle; - the latter means we roll back the latest SQL - statement */ + bool all) /* in: TRUE - commit transaction + FALSE - the current SQL statement ended */ { int error = 0; trx_t* trx; @@ -1546,11 +1621,10 @@ innobase_rollback( row_unlock_table_autoinc_for_mysql(trx); } - if (trx_handle != (void*)&innodb_dummy_stmt_trx_handle - || (!(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)))) { + if (all || (!(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)))) { error = trx_rollback_for_mysql(trx); - thd->transaction.all.innodb_active_trans = 0; + trx->active_trans = 0; } else { error = trx_rollback_last_sql_stat_for_mysql(trx); } @@ -1594,17 +1668,14 @@ innobase_rollback_trx( /********************************************************************* Rolls back a transaction to a savepoint. */ -int +static int innobase_rollback_to_savepoint( /*===========================*/ /* out: 0 if success, HA_ERR_NO_SAVEPOINT if no savepoint with the given name */ THD* thd, /* in: handle to the MySQL thread of the user whose transaction should be rolled back */ - char* savepoint_name, /* in: savepoint name */ - my_off_t* binlog_cache_pos)/* out: position which corresponds to the - savepoint in the binlog cache of this - transaction, not defined if error */ + void *savepoint) /* in: savepoint data */ { ib_longlong mysql_binlog_cache_pos; int error = 0; @@ -1620,10 +1691,10 @@ innobase_rollback_to_savepoint( innobase_release_stat_resources(trx); - error = trx_rollback_to_savepoint_for_mysql(trx, savepoint_name, + /* TODO: use provided savepoint data area to store savepoint data */ + char name[16]; sprintf(name, "s_%08lx", savepoint); + error = trx_rollback_to_savepoint_for_mysql(trx, name, &mysql_binlog_cache_pos); - *binlog_cache_pos = (my_off_t)mysql_binlog_cache_pos; - DBUG_RETURN(convert_error_code_to_mysql(error, NULL)); } @@ -1631,23 +1702,24 @@ innobase_rollback_to_savepoint( Release transaction savepoint name. */ int -innobase_release_savepoint_name( +innobase_release_savepoint( /*===========================*/ /* out: 0 if success, HA_ERR_NO_SAVEPOINT if no savepoint with the given name */ THD* thd, /* in: handle to the MySQL thread of the user whose transaction should be rolled back */ - char* savepoint_name) /* in: savepoint name */ + void *savepoint) /* in: savepoint data */ { - ib_longlong mysql_binlog_cache_pos; int error = 0; trx_t* trx; - DBUG_ENTER("innobase_release_savepoint_name"); + DBUG_ENTER("innobase_release_savepoint"); trx = check_trx_exists(thd); - error = trx_release_savepoint_for_mysql(trx, savepoint_name); + /* TODO: use provided savepoint data area to store savepoint data */ + char name[16]; sprintf(name, "s_%08lx", savepoint); + error = trx_release_savepoint_for_mysql(trx, name); DBUG_RETURN(convert_error_code_to_mysql(error, NULL)); } @@ -1655,17 +1727,12 @@ innobase_release_savepoint_name( /********************************************************************* Sets a transaction savepoint. */ -int +static int innobase_savepoint( /*===============*/ /* out: always 0, that is, always succeeds */ THD* thd, /* in: handle to the MySQL thread */ - char* savepoint_name, /* in: savepoint name */ - my_off_t binlog_cache_pos)/* in: offset up to which the current - transaction has cached log entries to its - binlog cache, not defined if no transaction - active, or we are in the autocommit state, or - binlogging is not switched on */ + void *savepoint) /* in: savepoint data */ { int error = 0; trx_t* trx; @@ -1686,14 +1753,12 @@ innobase_savepoint( innobase_release_stat_resources(trx); - /* Setting a savepoint starts a transaction inside InnoDB since - it allocates resources for it (memory to store the savepoint name, - for example) */ - - thd->transaction.all.innodb_active_trans = 1; + /* cannot happen outside of transaction */ + DBUG_ASSERT(trx->active_trans); - error = trx_savepoint_for_mysql(trx, savepoint_name, - (ib_longlong)binlog_cache_pos); + /* TODO: use provided savepoint data area to store savepoint data */ + char name[16]; sprintf(name, "s_%08lx", savepoint); + error = trx_savepoint_for_mysql(trx, name, (ib_longlong)0); DBUG_RETURN(convert_error_code_to_mysql(error, NULL)); } @@ -1701,25 +1766,14 @@ innobase_savepoint( /********************************************************************* Frees a possible InnoDB trx object associated with the current THD. */ -int +static int innobase_close_connection( /*======================*/ /* out: 0 or error number */ THD* thd) /* in: handle to the MySQL thread of the user - whose transaction should be rolled back */ + whose resources should be free'd */ { - trx_t* trx; - - trx = (trx_t*)thd->transaction.all.innobase_tid; - - if (NULL != trx) { - innobase_rollback(thd, (void*)trx); - - trx_free_for_mysql(trx); - - thd->transaction.all.innobase_tid = NULL; - } - + trx_free_for_mysql((trx_t*)thd->ha_data[innobase_hton.slot]); return(0); } @@ -2535,19 +2589,19 @@ ha_innobase::write_row( DBUG_ENTER("ha_innobase::write_row"); if (prebuilt->trx != - (trx_t*) current_thd->transaction.all.innobase_tid) { + (trx_t*) current_thd->ha_data[innobase_hton.slot]) { fprintf(stderr, "InnoDB: Error: the transaction object for the table handle is at\n" "InnoDB: %p, but for the current thread it is at %p\n", prebuilt->trx, - current_thd->transaction.all.innobase_tid); + (trx_t*) current_thd->ha_data[innobase_hton.slot]); fputs("InnoDB: Dump of 200 bytes around prebuilt: ", stderr); ut_print_buf(stderr, ((const byte*)prebuilt) - 100, 200); fputs("\n" "InnoDB: Dump of 200 bytes around transaction.all: ", stderr); ut_print_buf(stderr, - ((byte*)(&(current_thd->transaction.all))) - 100, 200); + ((byte*)(&(current_thd->ha_data[innobase_hton.slot]))) - 100, 200); putc('\n', stderr); ut_error; } @@ -2599,7 +2653,7 @@ ha_innobase::write_row( /* Altering to InnoDB format */ innobase_commit(user_thd, prebuilt->trx); /* Note that this transaction is still active. */ - user_thd->transaction.all.innodb_active_trans = 1; + prebuilt->trx->active_trans = 1; /* We will need an IX lock on the destination table. */ prebuilt->sql_stat_start = TRUE; } else { @@ -2614,7 +2668,7 @@ ha_innobase::write_row( locks, so they have to be acquired again. */ innobase_commit(user_thd, prebuilt->trx); /* Note that this transaction is still active. */ - user_thd->transaction.all.innodb_active_trans = 1; + prebuilt->trx->active_trans = 1; /* Re-acquire the table lock on the source table. */ row_lock_table_for_mysql(prebuilt, src_table, mode); /* We will need an IX lock on the destination table. */ @@ -2902,7 +2956,7 @@ ha_innobase::update_row( DBUG_ENTER("ha_innobase::update_row"); ut_ad(prebuilt->trx == - (trx_t*) current_thd->transaction.all.innobase_tid); + (trx_t*) current_thd->ha_data[innobase_hton.slot]); if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_UPDATE) table->timestamp_field->set_time(); @@ -2963,7 +3017,7 @@ ha_innobase::delete_row( DBUG_ENTER("ha_innobase::delete_row"); ut_ad(prebuilt->trx == - (trx_t*) current_thd->transaction.all.innobase_tid); + (trx_t*) current_thd->ha_data[innobase_hton.slot]); if (last_query_id != user_thd->query_id) { prebuilt->sql_stat_start = TRUE; @@ -3173,7 +3227,7 @@ ha_innobase::index_read( DBUG_ENTER("index_read"); ut_ad(prebuilt->trx == - (trx_t*) current_thd->transaction.all.innobase_tid); + (trx_t*) current_thd->ha_data[innobase_hton.slot]); statistic_increment(current_thd->status_var.ha_read_key_count, &LOCK_status); @@ -3288,7 +3342,7 @@ ha_innobase::change_active_index( ut_ad(user_thd == current_thd); ut_ad(prebuilt->trx == - (trx_t*) current_thd->transaction.all.innobase_tid); + (trx_t*) current_thd->ha_data[innobase_hton.slot]); active_index = keynr; @@ -3376,7 +3430,7 @@ ha_innobase::general_fetch( DBUG_ENTER("general_fetch"); ut_ad(prebuilt->trx == - (trx_t*) current_thd->transaction.all.innobase_tid); + (trx_t*) current_thd->ha_data[innobase_hton.slot]); innodb_srv_conc_enter_innodb(prebuilt->trx); @@ -3602,7 +3656,7 @@ ha_innobase::rnd_pos( &LOCK_status); ut_ad(prebuilt->trx == - (trx_t*) current_thd->transaction.all.innobase_tid); + (trx_t*) current_thd->ha_data[innobase_hton.slot]); if (prebuilt->clust_index_was_generated) { /* No primary key was defined for the table and we @@ -3651,7 +3705,7 @@ ha_innobase::position( uint len; ut_ad(prebuilt->trx == - (trx_t*) current_thd->transaction.all.innobase_tid); + (trx_t*) current_thd->ha_data[innobase_hton.slot]); if (prebuilt->clust_index_was_generated) { /* No primary key was defined for the table and we @@ -4141,7 +4195,7 @@ ha_innobase::discard_or_import_tablespace( ut_a(prebuilt->trx && prebuilt->trx->magic_n == TRX_MAGIC_N); ut_a(prebuilt->trx == - (trx_t*) current_thd->transaction.all.innobase_tid); + (trx_t*) current_thd->ha_data[innobase_hton.slot]); dict_table = prebuilt->table; trx = prebuilt->trx; @@ -4847,7 +4901,7 @@ ha_innobase::check( ut_a(prebuilt->trx && prebuilt->trx->magic_n == TRX_MAGIC_N); ut_a(prebuilt->trx == - (trx_t*) current_thd->transaction.all.innobase_tid); + (trx_t*) current_thd->ha_data[innobase_hton.slot]); if (prebuilt->mysql_template == NULL) { /* Build the template; we will use a dummy template @@ -5285,7 +5339,11 @@ ha_innobase::start_stmt( } /* Set the MySQL flag to mark that there is an active transaction */ - thd->transaction.all.innodb_active_trans = 1; + if (trx->active_trans == 0) { + + register_trans(thd); + trx->active_trans = 1; + } return(0); } @@ -5353,7 +5411,11 @@ ha_innobase::external_lock( /* Set the MySQL flag to mark that there is an active transaction */ - thd->transaction.all.innodb_active_trans = 1; + if (trx->active_trans == 0) { + + register_trans(thd); + trx->active_trans = 1; + } trx->n_mysql_tables_in_use++; prebuilt->mysql_has_locked = TRUE; @@ -5433,8 +5495,8 @@ ha_innobase::external_lock( innobase_release_stat_resources(trx); if (!(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) { - if (thd->transaction.all.innodb_active_trans != 0) { - innobase_commit(thd, trx); + if (trx->active_trans != 0) { + innobase_commit(thd, TRUE); } } else { if (trx->isolation_level <= TRX_ISO_READ_COMMITTED @@ -5917,7 +5979,7 @@ ha_innobase::innobase_read_and_init_auto_inc( ut_a(prebuilt); ut_a(prebuilt->trx == - (trx_t*) current_thd->transaction.all.innobase_tid); + (trx_t*) current_thd->ha_data[innobase_hton.slot]); ut_a(prebuilt->table); /* In case MySQL calls this in the middle of a SELECT query, release @@ -6027,37 +6089,6 @@ ha_innobase::get_auto_increment() return((ulonglong) nr); } -/*********************************************************************** -This function stores the binlog offset and flushes logs. */ - -void -innobase_store_binlog_offset_and_flush_log( -/*=======================================*/ - char *binlog_name, /* in: binlog name */ - longlong offset) /* in: binlog offset */ -{ - mtr_t mtr; - - assert(binlog_name != NULL); - - /* Start a mini-transaction */ - mtr_start_noninline(&mtr); - - /* Update the latest MySQL binlog name and offset info - in trx sys header */ - - trx_sys_update_mysql_binlog_offset( - binlog_name, - offset, - TRX_SYS_MYSQL_LOG_INFO, &mtr); - - /* Commits the mini-transaction */ - mtr_commit(&mtr); - - /* Syncronous flush of the log buffer to disk */ - log_buffer_flush_to_disk(); -} - int ha_innobase::cmp_ref( @@ -6252,9 +6283,7 @@ innobase_xa_prepare( trx = check_trx_exists(thd); - /* TODO: Get X/Open XA Transaction Identification from MySQL*/ - memset(&trx->xid, 0, sizeof(trx->xid)); - trx->xid.formatID = -1; + trx->xid=thd->transaction.xid; /* Release a possible FIFO ticket and search latch. Since we will reserve the kernel mutex, we have to release the search system latch @@ -6265,7 +6294,7 @@ innobase_xa_prepare( if (trx->active_trans == 0 && trx->conc_state != TRX_NOT_STARTED) { fprintf(stderr, -"InnoDB: Error: thd->transaction.all.innodb_active_trans == 0\n" +"InnoDB: Error: trx->active_trans == 0\n" "InnoDB: but trx->conc_state != TRX_NOT_STARTED\n"); } diff --git a/sql/ha_innodb.h b/sql/ha_innodb.h index 0672485f4fe..e5a43a64df0 100644 --- a/sql/ha_innodb.h +++ b/sql/ha_innodb.h @@ -245,38 +245,31 @@ extern ulong srv_thread_concurrency; extern TYPELIB innobase_lock_typelib; -bool innobase_init(void); +handlerton *innobase_init(void); bool innobase_end(void); bool innobase_flush_logs(void); uint innobase_get_free_space(void); -int innobase_commit(THD *thd, void* trx_handle); +/* + don't delete it - it may be re-enabled later + as an optimization for the most common case InnoDB+binlog +*/ +#if 0 int innobase_report_binlog_offset_and_commit( THD* thd, void* trx_handle, char* log_file_name, my_off_t end_offset); -int innobase_commit_complete( - void* trx_handle); -int innobase_rollback(THD *thd, void* trx_handle); -int innobase_rollback_to_savepoint( - THD* thd, - char* savepoint_name, - my_off_t* binlog_cache_pos); -int innobase_savepoint( - THD* thd, - char* savepoint_name, - my_off_t binlog_cache_pos); -int innobase_release_savepoint_name( - THD* thd, - char* savepoint_name); -int innobase_close_connection(THD *thd); +int innobase_commit_complete(void* trx_handle); +void innobase_store_binlog_offset_and_flush_log(char *binlog_name,longlong offset); +#endif + int innobase_drop_database(char *path); bool innodb_show_status(THD* thd); bool innodb_mutex_show_status(THD* thd); void innodb_export_status(void); -void innobase_release_temporary_latches(void* innobase_tid); +void innobase_release_temporary_latches(THD *thd); void innobase_store_binlog_offset_and_flush_log(char *binlog_name,longlong offset); diff --git a/sql/handler.cc b/sql/handler.cc index b4fed363e87..c3144d16ec0 100644 --- a/sql/handler.cc +++ b/sql/handler.cc @@ -57,10 +57,18 @@ static int NEAR_F delete_file(const char *name,const char *ext,int extflag); -ulong ha_read_count, ha_discover_count; - static SHOW_COMP_OPTION have_yes= SHOW_OPTION_YES; +/* list of all available storage engines (of their handlertons) */ +handlerton *handlertons[MAX_HA]={0}; + +/* number of entries in handlertons[] */ +ulong total_ha; +/* number of storage engines (from handlertons[]) that support 2pc */ +ulong total_ha_2pc; +/* size of savepoint storage area (see ha_init) */ +ulong savepoint_alloc_size; + struct show_table_type_st sys_table_types[]= { {"MyISAM", &have_yes, @@ -119,7 +127,7 @@ enum db_type ha_resolve_by_name(const char *name, uint namelen) if (thd && !my_strcasecmp(&my_charset_latin1, name, "DEFAULT")) { return (enum db_type) thd->variables.table_type; } - + show_table_type_st *types; for (types= sys_table_types; types->type; types++) { @@ -137,7 +145,7 @@ const char *ha_get_storage_engine(enum db_type db_type) if (db_type == types->db_type) return types->type; } - + return "none"; } @@ -234,8 +242,6 @@ handler *get_new_handler(TABLE *table, enum db_type db_type) } } - - /* Register handler error messages for use with my_error(). @@ -321,59 +327,89 @@ static int ha_finish_errors(void) my_free((gptr) errmsgs, MYF(0)); return 0; } + +static inline void ha_was_inited_ok(handlerton **ht) +{ + uint tmp= (*ht)->savepoint_offset; + (*ht)->savepoint_offset= savepoint_alloc_size; + savepoint_alloc_size+= tmp; + (*ht)->slot= total_ha++; + if ((*ht)->prepare) + total_ha_2pc++; +} int ha_init() { int error= 0; + handlerton **ht= handlertons; + total_ha= savepoint_alloc_size= 0; + if (ha_init_errors()) return 1; + + if (opt_bin_log) + { + if (!(*ht= binlog_init())) + { + mysql_bin_log.close(LOG_CLOSE_INDEX); + opt_bin_log= 0; + error= 1; + } + else + ha_was_inited_ok(ht++); + } #ifdef HAVE_BERKELEY_DB if (have_berkeley_db == SHOW_OPTION_YES) { - if (berkeley_init()) + if (!(*ht= berkeley_init())) { have_berkeley_db= SHOW_OPTION_DISABLED; // If we couldn't use handler error= 1; } else - opt_using_transactions=1; + ha_was_inited_ok(ht++); } #endif #ifdef HAVE_INNOBASE_DB if (have_innodb == SHOW_OPTION_YES) { - if (innobase_init()) + if (!(*ht= innobase_init())) { have_innodb= SHOW_OPTION_DISABLED; // If we couldn't use handler error= 1; } else - opt_using_transactions=1; + ha_was_inited_ok(ht++); } #endif #ifdef HAVE_NDBCLUSTER_DB if (have_ndbcluster == SHOW_OPTION_YES) { - if (ndbcluster_init()) + if (!(*ht= ndbcluster_init())) { have_ndbcluster= SHOW_OPTION_DISABLED; error= 1; } else - opt_using_transactions=1; + ha_was_inited_ok(ht++); } #endif #ifdef HAVE_ARCHIVE_DB if (have_archive_db == SHOW_OPTION_YES) { - if (archive_db_init()) + if (!(*ht= archive_db_init())) { have_archive_db= SHOW_OPTION_DISABLED; error= 1; } + else + ha_was_inited_ok(ht++); } #endif + DBUG_ASSERT(total_ha < MAX_HA); + opt_using_transactions= total_ha>opt_bin_log; + savepoint_alloc_size+= sizeof(SAVEPOINT); return error; } @@ -426,16 +462,220 @@ void ha_drop_database(char* path) #endif } +/* don't bother to rollback here, it's done already */ void ha_close_connection(THD* thd) { -#ifdef HAVE_INNOBASE_DB - if (have_innodb == SHOW_OPTION_YES) - innobase_close_connection(thd); + for (uint i=0; i < total_ha; i++) + if (thd->ha_data[i]) + (*handlertons[i]->close_connection)(thd); +} + +/* ======================================================================== + ======================= TRANSACTIONS ===================================*/ + +void trans_register_ha(THD *thd, bool all, handlerton *ht_arg) +{ + THD_TRANS *trans; + if (all) + { + trans= &thd->transaction.all; + thd->server_status|= SERVER_STATUS_IN_TRANS; + } + else + trans= &thd->transaction.stmt; + +#ifndef DBUG_OFF + handlerton **ht=trans->ht; + while (*ht) + { + DBUG_ASSERT(*ht != ht_arg); + ht++; + } #endif -#ifdef HAVE_NDBCLUSTER_DB - if (have_ndbcluster == SHOW_OPTION_YES) - ndbcluster_close_connection(thd); + trans->ht[trans->nht++]=ht_arg; + trans->no_2pc|=(ht_arg->prepare==0); + if (thd->transaction.xid.is_null()) + thd->transaction.xid.set(thd->query_id); +} + +/* + RETURN + -1 - cannot prepare + 0 - ok + 1 - error, transaction was rolled back +*/ +int ha_prepare(THD *thd) +{ + int error=0, all=1; + THD_TRANS *trans=all ? &thd->transaction.all : &thd->transaction.stmt; + handlerton **ht=trans->ht; + DBUG_ENTER("ha_prepare"); +#ifdef USING_TRANSACTIONS + if (trans->nht) + { + if (trans->no_2pc) + return -1; + for (; *ht; ht++) + { + int err; + statistic_increment(thd->status_var.ha_prepare_count,&LOCK_status); + if ((err= (*(*ht)->prepare)(thd, all))) + { + my_error(ER_ERROR_DURING_COMMIT, MYF(0), err); + ha_rollback_trans(thd, all); + error=1; + break; + } + } + } +#endif /* USING_TRANSACTIONS */ + DBUG_RETURN(error); +} + +/* + RETURN + 0 - ok + 1 - transaction was rolled back + 2 - error during commit, data may be inconsistent +*/ +int ha_commit_trans(THD *thd, bool all) +{ + int error= 0, cookie= 0; + THD_TRANS *trans= all ? &thd->transaction.all : &thd->transaction.stmt; + bool is_real_trans= all || thd->transaction.all.nht == 0; + handlerton **ht= trans->ht; + my_xid xid= thd->transaction.xid.get_my_xid(); + DBUG_ENTER("ha_commit_trans"); +#ifdef USING_TRANSACTIONS + if (trans->nht) + { + if (!trans->no_2pc && trans->nht > 1) + { + for (; *ht && !error; ht++) + { + int err; + if ((err= (*(*ht)->prepare)(thd, all))) + { + my_error(ER_ERROR_DURING_COMMIT, MYF(0), err); + error=1; + } + statistic_increment(thd->status_var.ha_prepare_count,&LOCK_status); + } + if (error || (is_real_trans && xid && + (error= !(cookie= tc_log->log(thd, xid))))) + { + ha_rollback_trans(thd, all); + return 1; + } + } + error=ha_commit_one_phase(thd, all) ? cookie ? 2 : 1 : 0; + if (cookie) + tc_log->unlog(cookie, xid); + } +#endif /* USING_TRANSACTIONS */ + DBUG_RETURN(error); +} + +int ha_commit_one_phase(THD *thd, bool all) +{ + int error=0; + THD_TRANS *trans=all ? &thd->transaction.all : &thd->transaction.stmt; + bool is_real_trans=all || thd->transaction.all.nht == 0; + handlerton **ht=trans->ht; + DBUG_ENTER("ha_commit_one_phase"); +#ifdef USING_TRANSACTIONS + if (trans->nht) + { + bool need_start_waiters= 0; + if (is_real_trans) + { + if ((error= wait_if_global_read_lock(thd, 0, 0))) + { + my_error(ER_ERROR_DURING_COMMIT, MYF(0), error); + error= 1; + } + else + need_start_waiters= 1; + } + + for (ht=trans->ht; *ht; ht++) + { + int err; + if ((err= (*(*ht)->commit)(thd, all))) + { + my_error(ER_ERROR_DURING_COMMIT, MYF(0), err); + error=1; + } + statistic_increment(thd->status_var.ha_commit_count,&LOCK_status); + *ht= 0; + } + trans->nht=0; + trans->no_2pc=0; + if (is_real_trans) + thd->transaction.xid.null(); + if (all) + { +#ifdef HAVE_QUERY_CACHE + if (thd->transaction.changed_tables) + query_cache.invalidate(thd->transaction.changed_tables); #endif + thd->variables.tx_isolation=thd->session_tx_isolation; + thd->transaction.cleanup(); + } + if (need_start_waiters) + start_waiting_global_read_lock(thd); + } +#endif /* USING_TRANSACTIONS */ + DBUG_RETURN(error); +} + + +int ha_rollback_trans(THD *thd, bool all) +{ + int error=0; + THD_TRANS *trans=all ? &thd->transaction.all : &thd->transaction.stmt; + bool is_real_trans=all || thd->transaction.all.nht == 0; + DBUG_ENTER("ha_rollback_trans"); +#ifdef USING_TRANSACTIONS + if (trans->nht) + { + for (handlerton **ht=trans->ht; *ht; ht++) + { + int err; + if ((err= (*(*ht)->rollback)(thd, all))) + { // cannot happen + my_error(ER_ERROR_DURING_ROLLBACK, MYF(0), err); + error=1; + } + statistic_increment(thd->status_var.ha_rollback_count,&LOCK_status); + *ht= 0; + } + trans->nht=0; + trans->no_2pc=0; + if (is_real_trans) + thd->transaction.xid.null(); + if (all) + { + thd->variables.tx_isolation=thd->session_tx_isolation; + thd->transaction.cleanup(); + } + } +#endif /* USING_TRANSACTIONS */ + /* + If a non-transactional table was updated, warn; don't warn if this is a + slave thread (because when a slave thread executes a ROLLBACK, it has + been read from the binary log, so it's 100% sure and normal to produce + error ER_WARNING_NOT_COMPLETE_ROLLBACK. If we sent the warning to the + slave SQL thread, it would not stop the thread but just be printed in + the error log; but we don't want users to wonder why they have this + message in the error log, so we don't send it. + */ + if (is_real_trans && (thd->options & OPTION_STATUS_NO_TRANS_UPDATE) && + !thd->slave_thread) + push_warning(thd, MYSQL_ERROR::WARN_LEVEL_WARN, + ER_WARNING_NOT_COMPLETE_ROLLBACK, + ER(ER_WARNING_NOT_COMPLETE_ROLLBACK)); + DBUG_RETURN(error); } /* @@ -451,7 +691,7 @@ int ha_autocommit_or_rollback(THD *thd, int error) { DBUG_ENTER("ha_autocommit_or_rollback"); #ifdef USING_TRANSACTIONS - if (opt_using_transactions) + if (thd->transaction.stmt.nht) { if (!error) { @@ -467,83 +707,137 @@ int ha_autocommit_or_rollback(THD *thd, int error) DBUG_RETURN(error); } -/* - This function is called when MySQL writes the log segment of a - transaction to the binlog. It is called when the LOCK_log mutex is - reserved. Here we communicate to transactional table handlers what - binlog position corresponds to the current transaction. The handler - can store it and in recovery print to the user, so that the user - knows from what position in the binlog to start possible - roll-forward, for example, if the crashed server was a slave in - replication. This function also calls the commit of the table - handler, because the order of transactions in the log of the table - handler must be the same as in the binlog. - NOTE that to eliminate the bottleneck of the group commit, we do not - flush the handler log files here, but only later in a call of - ha_commit_complete(). +int ha_commit_or_rollback_by_xid(LEX_STRING *ident, bool commit) +{ + XID xid; + handlerton **ht= handlertons, **end_ht=ht+total_ha; + int res= 1; + + xid.set(ident); + for ( ; ht < end_ht ; ht++) + if ((*ht)->recover) + res= res && + (*(commit ? (*ht)->commit_by_xid : (*ht)->rollback_by_xid))(&xid); + return res; +} - arguments: - thd: the thread handle of the current connection - log_file_name: latest binlog file name - end_offset: the offset in the binlog file up to which we wrote - return value: 0 if success, 1 if error +/* + recover() step of xa */ - -int ha_report_binlog_offset_and_commit(THD *thd, - char *log_file_name, - my_off_t end_offset) +int ha_recover(HASH *commit_list) { - int error= 0; -#ifdef HAVE_INNOBASE_DB - THD_TRANS *trans; - trans = &thd->transaction.all; - if (trans->innodb_active_trans) + int error= 0, len, got; + handlerton **ht= handlertons, **end_ht=ht+total_ha; + XID *list=0; + DBUG_ENTER("ha_recover"); + + DBUG_ASSERT(total_ha_2pc); + DBUG_ASSERT(commit_list || tc_heuristic_recover); + + for (len=commit_list ? commit_list->records : MAX_XID_LIST_SIZE ; + list==0 && len > MIN_XID_LIST_SIZE; len/=2) { - /* - If we updated some InnoDB tables (innodb_active_trans is true), the - binlog coords will be reported into InnoDB during the InnoDB commit - (innobase_report_binlog_offset_and_commit). But if we updated only - non-InnoDB tables, we need an explicit call to report it. - */ - if ((error=innobase_report_binlog_offset_and_commit(thd, - trans->innobase_tid, - log_file_name, - end_offset))) + list=(XID *)my_malloc(len*sizeof(XID), MYF(0)); + } + if (!list) + { + my_error(ER_OUTOFMEMORY, MYF(0), len); + DBUG_RETURN(1); + } + + for ( ; ht < end_ht ; ht++) + { + if (!(*ht)->recover) + continue; + while ((got=(*(*ht)->recover)(list, len)) > 0 ) { - my_error(ER_ERROR_DURING_COMMIT, MYF(0), error); - error=1; + for (int i=0; i < got; i ++) + { + my_xid x=list[i].get_my_xid(); + if (!x) // not "mine" - that is generated by external TM + continue; + if (commit_list ? + hash_search(commit_list, (char *)&x, sizeof(x)) != 0 : + tc_heuristic_recover == TC_HEURISTIC_RECOVER_COMMIT) + (*(*ht)->commit_by_xid)(list+i); + else + (*(*ht)->rollback_by_xid)(list+i); + } + if (got < len) + break; } } - else if (opt_innodb_safe_binlog) // Don't report if not useful - innobase_store_binlog_offset_and_flush_log(log_file_name, end_offset); -#endif - return error; + my_free((gptr)list, MYF(0)); + DBUG_RETURN(0); } /* - Flushes the handler log files (if my.cnf settings do not free us from it) - after we have called ha_report_binlog_offset_and_commit(). To eliminate - the bottleneck from the group commit, this should be called when - LOCK_log has been released in log.cc. + return the list of XID's to a client, the same way SHOW commands do - arguments: - thd: the thread handle of the current connection - return value: always 0 + NOTE + I didn't find in XA specs that an RM cannot return the same XID twice, + so mysql_xa_recover does not filter XID's to ensure uniqueness. + It can be easily fixed later, if necessary. */ - -int ha_commit_complete(THD *thd) +bool mysql_xa_recover(THD *thd) { -#ifdef HAVE_INNOBASE_DB - THD_TRANS *trans; - trans = &thd->transaction.all; - if (trans->innobase_tid) + List<Item> field_list; + Protocol *protocol= thd->protocol; + handlerton **ht= handlertons, **end_ht=ht+total_ha; + bool error=TRUE; + int len, got; + XID *list=0; + DBUG_ENTER("mysql_xa_recover"); + + field_list.push_back(new Item_int("formatID",0,11)); + field_list.push_back(new Item_int("gtrid_length",0,11)); + field_list.push_back(new Item_int("bqual_length",0,11)); + field_list.push_back(new Item_empty_string("data",XIDDATASIZE)); + + if (protocol->send_fields(&field_list, + Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF)) + DBUG_RETURN(TRUE); + + for (len= MAX_XID_LIST_SIZE ; list==0 && len > MIN_XID_LIST_SIZE; len/=2) + { + list=(XID *)my_malloc(len*sizeof(XID), MYF(0)); + } + if (!list) { - innobase_commit_complete(trans->innobase_tid); + my_error(ER_OUTOFMEMORY, MYF(0), len); + DBUG_RETURN(1); + } - trans->innodb_active_trans=0; + for ( ; ht < end_ht ; ht++) + { + if (!(*ht)->recover) + continue; + while ((got=(*(*ht)->recover)(list, len)) > 0 ) + { + XID *xid, *end; + for (xid=list, end=list+got; xid < end; xid++) + { + if (xid->get_my_xid()) + continue; // skip "our" xids + protocol->prepare_for_resend(); + protocol->store_long((longlong)xid->formatID); + protocol->store_long((longlong)xid->gtrid_length); + protocol->store_long((longlong)xid->bqual_length); + protocol->store(xid->data, xid->gtrid_length+xid->bqual_length, + &my_charset_bin); + if (protocol->write()) + goto err; + } + if (got < len) + break; + } } -#endif - return 0; + + error=FALSE; + send_eof(thd); +err: + my_free((gptr)list, MYF(0)); + DBUG_RETURN(error); } /* @@ -566,10 +860,7 @@ int ha_commit_complete(THD *thd) int ha_release_temporary_latches(THD *thd) { #ifdef HAVE_INNOBASE_DB - THD_TRANS *trans; - trans = &thd->transaction.all; - if (trans->innobase_tid) - innobase_release_temporary_latches(trans->innobase_tid); + innobase_release_temporary_latches(thd); #endif return 0; } @@ -589,336 +880,105 @@ int ha_update_statistics() return 0; } -int ha_commit_trans(THD *thd, THD_TRANS* trans) +int ha_rollback_to_savepoint(THD *thd, SAVEPOINT *sv) { int error=0; - DBUG_ENTER("ha_commit_trans"); -#ifdef USING_TRANSACTIONS - if (opt_using_transactions) - { - bool transaction_commited= 0; - bool operation_done= 0, need_start_waiters= 0; + THD_TRANS *trans=&thd->transaction.all; + handlerton **ht=trans->ht, **end_ht; + DBUG_ENTER("ha_rollback_to_savepoint"); + DBUG_ASSERT(thd->transaction.stmt.ht[0] == 0); - /* If transaction has done some updates to tables */ - if (trans == &thd->transaction.all && mysql_bin_log.is_open() && - my_b_tell(&thd->transaction.trans_log)) - { - if ((error= wait_if_global_read_lock(thd, 0, 0))) - { - /* - Note that ROLLBACK [TO SAVEPOINT] does not have this test; it's - because ROLLBACK never updates data, so needn't wait on the lock. - */ - my_error(ER_ERROR_DURING_COMMIT, MYF(0), error); - error= 1; - } - else - need_start_waiters= 1; - if (mysql_bin_log.is_open()) - { - mysql_bin_log.write(thd, &thd->transaction.trans_log, 1); - statistic_increment(binlog_cache_use, &LOCK_status); - if (thd->transaction.trans_log.disk_writes != 0) - { - /* - We have to do this after addition of trans_log to main binlog since - this operation can cause flushing of end of trans_log to disk. - */ - statistic_increment(binlog_cache_disk_use, &LOCK_status); - thd->transaction.trans_log.disk_writes= 0; - } - reinit_io_cache(&thd->transaction.trans_log, - WRITE_CACHE, (my_off_t) 0, 0, 1); - thd->transaction.trans_log.end_of_file= max_binlog_cache_size; - } - } -#ifdef HAVE_NDBCLUSTER_DB - if (trans->ndb_tid) - { - if ((error=ndbcluster_commit(thd,trans->ndb_tid))) - { - if (error == -1) - my_message(ER_ERROR_DURING_COMMIT, ER(ER_ERROR_DURING_COMMIT), - MYF(0)); - error=1; - } - if (trans == &thd->transaction.all) - operation_done= transaction_commited= 1; - trans->ndb_tid=0; - } -#endif -#ifdef HAVE_BERKELEY_DB - if (trans->bdb_tid) - { - if ((error=berkeley_commit(thd,trans->bdb_tid))) - { - my_error(ER_ERROR_DURING_COMMIT, MYF(0), error); - error=1; - } - else - if (!(thd->options & OPTION_BEGIN)) - transaction_commited= 1; - trans->bdb_tid=0; - } -#endif -#ifdef HAVE_INNOBASE_DB - if (trans->innobase_tid) - { - if ((error=innobase_commit(thd,trans->innobase_tid))) - { - my_error(ER_ERROR_DURING_COMMIT, MYF(0), error); - error=1; - } - trans->innodb_active_trans=0; - if (trans == &thd->transaction.all) - operation_done= transaction_commited= 1; - } -#endif -#ifdef HAVE_QUERY_CACHE - if (transaction_commited && thd->transaction.changed_tables) - query_cache.invalidate(thd->transaction.changed_tables); -#endif /*HAVE_QUERY_CACHE*/ - if (error && trans == &thd->transaction.all && mysql_bin_log.is_open()) - sql_print_error("Got error during commit; Binlog is not up to date!"); - thd->variables.tx_isolation=thd->session_tx_isolation; - if (operation_done) - { - statistic_increment(thd->status_var.ha_commit_count,&LOCK_status); - thd->transaction.cleanup(); + trans->nht=sv->nht; + trans->no_2pc=0; + end_ht=ht+sv->nht; + /* + rolling back to savepoint in all storage engines that were part of the + transaction when the savepoint was set + */ + for (; ht < end_ht; ht++) + { + int err; + DBUG_ASSERT((*ht)->savepoint_set); + if ((err= (*(*ht)->savepoint_rollback)(thd, (byte *)(sv+1)+(*ht)->savepoint_offset))) + { // cannot happen + my_error(ER_ERROR_DURING_ROLLBACK, MYF(0), err); + error=1; } - if (need_start_waiters) - start_waiting_global_read_lock(thd); + statistic_increment(thd->status_var.ha_savepoint_rollback_count,&LOCK_status); + trans->no_2pc|=(*ht)->prepare == 0; } -#endif // using transactions - DBUG_RETURN(error); -} - - -int ha_rollback_trans(THD *thd, THD_TRANS *trans) -{ - int error=0; - DBUG_ENTER("ha_rollback_trans"); -#ifdef USING_TRANSACTIONS - if (opt_using_transactions) + /* + rolling back the transaction in all storage engines that were not part of + the transaction when the savepoint was set + */ + for (; *ht ; ht++) { - bool operation_done=0; - /* - As rollback can be 30 times slower than insert in InnoDB, and user may - not know there's rollback (if it's because of a dupl row), better warn. - */ - const char *save_proc_info= thd->proc_info; - thd->proc_info= "Rolling back"; -#ifdef HAVE_NDBCLUSTER_DB - if (trans->ndb_tid) - { - if ((error=ndbcluster_rollback(thd, trans->ndb_tid))) - { - if (error == -1) - my_message(ER_ERROR_DURING_ROLLBACK, ER(ER_ERROR_DURING_ROLLBACK), - MYF(0)); - error=1; - } - trans->ndb_tid = 0; - operation_done=1; - } -#endif -#ifdef HAVE_BERKELEY_DB - if (trans->bdb_tid) - { - if ((error=berkeley_rollback(thd, trans->bdb_tid))) - { - my_error(ER_ERROR_DURING_ROLLBACK, MYF(0), error); - error=1; - } - trans->bdb_tid=0; - operation_done=1; - } -#endif -#ifdef HAVE_INNOBASE_DB - if (trans->innobase_tid) - { - if ((error=innobase_rollback(thd, trans->innobase_tid))) - { - my_error(ER_ERROR_DURING_ROLLBACK, MYF(0), error); - error=1; - } - trans->innodb_active_trans=0; - operation_done=1; - } -#endif - if ((trans == &thd->transaction.all) && mysql_bin_log.is_open()) - { - /* - Update the binary log with a BEGIN/ROLLBACK block if we have - cached some queries and we updated some non-transactional - table. Such cases should be rare (updating a - non-transactional table inside a transaction...). Count disk - writes to trans_log in any case. - */ - if (my_b_tell(&thd->transaction.trans_log)) - { - if (unlikely(thd->options & OPTION_STATUS_NO_TRANS_UPDATE)) - mysql_bin_log.write(thd, &thd->transaction.trans_log, 0); - statistic_increment(binlog_cache_use, &LOCK_status); - if (thd->transaction.trans_log.disk_writes != 0) - { - /* - We have to do this after addition of trans_log to main binlog since - this operation can cause flushing of end of trans_log to disk. - */ - statistic_increment(binlog_cache_disk_use, &LOCK_status); - thd->transaction.trans_log.disk_writes= 0; - } - } - /* Flushed or not, empty the binlog cache */ - reinit_io_cache(&thd->transaction.trans_log, - WRITE_CACHE, (my_off_t) 0, 0, 1); - thd->transaction.trans_log.end_of_file= max_binlog_cache_size; - if (operation_done) - thd->transaction.cleanup(); + int err; + if ((err= (*(*ht)->rollback)(thd, 1))) + { // cannot happen + my_error(ER_ERROR_DURING_ROLLBACK, MYF(0), err); + error=1; } - thd->variables.tx_isolation=thd->session_tx_isolation; - if (operation_done) - statistic_increment(thd->status_var.ha_rollback_count,&LOCK_status); - thd->proc_info= save_proc_info; + statistic_increment(thd->status_var.ha_rollback_count,&LOCK_status); + *ht=0; // keep it conveniently zero-filled } -#endif /* USING_TRANSACTIONS */ DBUG_RETURN(error); } - /* - Rolls the current transaction back to a savepoint. - Return value: 0 if success, 1 if there was not a savepoint of the given - name. - NOTE: how do we handle this (unlikely but legal) case: - [transaction] + [update to non-trans table] + [rollback to savepoint] ? - The problem occurs when a savepoint is before the update to the - non-transactional table. Then when there's a rollback to the savepoint, if we - simply truncate the binlog cache, we lose the part of the binlog cache where - the update is. If we want to not lose it, we need to write the SAVEPOINT - command and the ROLLBACK TO SAVEPOINT command to the binlog cache. The latter - is easy: it's just write at the end of the binlog cache, but the former - should be *inserted* to the place where the user called SAVEPOINT. The - solution is that when the user calls SAVEPOINT, we write it to the binlog - cache (so no need to later insert it). As transactions are never intermixed - in the binary log (i.e. they are serialized), we won't have conflicts with - savepoint names when using mysqlbinlog or in the slave SQL thread. - Then when ROLLBACK TO SAVEPOINT is called, if we updated some - non-transactional table, we don't truncate the binlog cache but instead write - ROLLBACK TO SAVEPOINT to it; otherwise we truncate the binlog cache (which - will chop the SAVEPOINT command from the binlog cache, which is good as in - that case there is no need to have it in the binlog). + note, that according to the sql standard (ISO/IEC 9075-2:2003) + section "4.33.4 SQL-statements and transaction states", + SAVEPOINT is *not* transaction-initiating SQL-statement */ -int ha_rollback_to_savepoint(THD *thd, char *savepoint_name) +int ha_savepoint(THD *thd, SAVEPOINT *sv) { - my_off_t binlog_cache_pos=0; - bool operation_done=0; int error=0; - DBUG_ENTER("ha_rollback_to_savepoint"); + THD_TRANS *trans=&thd->transaction.all; + handlerton **ht=trans->ht; + DBUG_ENTER("ha_savepoint"); + DBUG_ASSERT(thd->transaction.stmt.ht[0] == 0); #ifdef USING_TRANSACTIONS - if (opt_using_transactions) + for (; *ht; ht++) { -#ifdef HAVE_INNOBASE_DB - /* - Retrieve the trans_log binlog cache position corresponding to the - savepoint, and if the rollback is successful inside InnoDB reset the write - position in the binlog cache to what it was at the savepoint. - */ - if ((error=innobase_rollback_to_savepoint(thd, savepoint_name, - &binlog_cache_pos))) + int err; + if (! (*ht)->savepoint_set) { - my_error(ER_ERROR_DURING_ROLLBACK, MYF(0), error); + my_error(ER_CHECK_NOT_IMPLEMENTED, MYF(0), "SAVEPOINT"); error=1; + break; } - else if (mysql_bin_log.is_open()) - { - /* - Write ROLLBACK TO SAVEPOINT to the binlog cache if we have updated some - non-transactional table. Otherwise, truncate the binlog cache starting - from the SAVEPOINT command. - */ - if (unlikely((thd->options & OPTION_STATUS_NO_TRANS_UPDATE) && - my_b_tell(&thd->transaction.trans_log))) - { - Query_log_event qinfo(thd, thd->query, thd->query_length, TRUE, FALSE); - if (mysql_bin_log.write(&qinfo)) - error= 1; - } - else - reinit_io_cache(&thd->transaction.trans_log, WRITE_CACHE, - binlog_cache_pos, 0, 0); - } - operation_done=1; -#endif - if (operation_done) - statistic_increment(thd->status_var.ha_rollback_count,&LOCK_status); - } -#endif /* USING_TRANSACTIONS */ - - DBUG_RETURN(error); -} - -int ha_release_savepoint_name(THD *thd, char *savepoint_name) -{ - my_off_t binlog_cache_pos=0; - bool operation_done=0; - int error=0; - DBUG_ENTER("ha_release_savepoint_name"); -#ifdef USING_TRANSACTIONS - if (opt_using_transactions) - { -#ifdef HAVE_INNOBASE_DB - if ((error=innobase_release_savepoint_name(thd, savepoint_name))) - { - my_error(ER_ERROR_DURING_ROLLBACK, MYF(0), error); + if ((err= (*(*ht)->savepoint_set)(thd, (byte *)(sv+1)+(*ht)->savepoint_offset))) + { // cannot happen + my_error(ER_GET_ERRNO, MYF(0), err); error=1; } - else if (mysql_bin_log.is_open()) - { - Query_log_event qinfo(thd, thd->query, thd->query_length, TRUE, FALSE); - if (mysql_bin_log.write(&qinfo)) - error= 1; - } - operation_done=1; -#endif + statistic_increment(thd->status_var.ha_savepoint_count,&LOCK_status); } + sv->nht=trans->nht; #endif /* USING_TRANSACTIONS */ - DBUG_RETURN(error); } - -/* -Sets a transaction savepoint. -Return value: always 0, that is, succeeds always -*/ - -int ha_savepoint(THD *thd, char *savepoint_name) +int ha_release_savepoint(THD *thd, SAVEPOINT *sv) { int error=0; - DBUG_ENTER("ha_savepoint"); -#ifdef USING_TRANSACTIONS - if (opt_using_transactions) + handlerton **ht=thd->transaction.all.ht, **end_ht; + DBUG_ENTER("ha_release_savepoint"); + DBUG_ASSERT(thd->transaction.stmt.ht[0] == 0); + + end_ht=ht+sv->nht; + for (; ht < end_ht; ht++) { - /* Write it to the binary log (see comments of ha_rollback_to_savepoint) */ - if (mysql_bin_log.is_open()) - { -#ifdef HAVE_INNOBASE_DB - innobase_savepoint(thd,savepoint_name, - my_b_tell(&thd->transaction.trans_log)); -#endif - Query_log_event qinfo(thd, thd->query, thd->query_length, TRUE, FALSE); - if (mysql_bin_log.write(&qinfo)) - error= 1; + int err; + if (!(*ht)->savepoint_release) + continue; + if ((err= (*(*ht)->savepoint_release)(thd, (byte *)(sv+1)+(*ht)->savepoint_offset))) + { // cannot happen + my_error(ER_GET_ERRNO, MYF(0), err); + error=1; } -#ifdef HAVE_INNOBASE_DB - else - innobase_savepoint(thd,savepoint_name,0); -#endif } -#endif /* USING_TRANSACTIONS */ DBUG_RETURN(error); } @@ -984,61 +1044,6 @@ int ha_delete_table(enum db_type table_type, const char *path) return error; } - -void ha_store_ptr(byte *buff, uint pack_length, my_off_t pos) -{ - switch (pack_length) { -#if SIZEOF_OFF_T > 4 - case 8: mi_int8store(buff,pos); break; - case 7: mi_int7store(buff,pos); break; - case 6: mi_int6store(buff,pos); break; - case 5: mi_int5store(buff,pos); break; -#endif - case 4: mi_int4store(buff,pos); break; - case 3: mi_int3store(buff,pos); break; - case 2: mi_int2store(buff,(uint) pos); break; - case 1: buff[0]= (uchar) pos; break; - } - return; -} - -my_off_t ha_get_ptr(byte *ptr, uint pack_length) -{ - my_off_t pos; - switch (pack_length) { -#if SIZEOF_OFF_T > 4 - case 8: - pos= (my_off_t) mi_uint8korr(ptr); - break; - case 7: - pos= (my_off_t) mi_uint7korr(ptr); - break; - case 6: - pos= (my_off_t) mi_uint6korr(ptr); - break; - case 5: - pos= (my_off_t) mi_uint5korr(ptr); - break; -#endif - case 4: - pos= (my_off_t) mi_uint4korr(ptr); - break; - case 3: - pos= (my_off_t) mi_uint3korr(ptr); - break; - case 2: - pos= (my_off_t) mi_uint2korr(ptr); - break; - case 1: - pos= (my_off_t) mi_uint2korr(ptr); - break; - default: - pos=0; // Impossible - break; - } - return pos; -} - /**************************************************************************** ** General handler functions ****************************************************************************/ @@ -1051,8 +1056,8 @@ int handler::ha_open(const char *name, int mode, int test_if_locked) int error; DBUG_ENTER("handler::ha_open"); DBUG_PRINT("enter",("name: %s db_type: %d db_stat: %d mode: %d lock_test: %d", - name, table->s->db_type, table->db_stat, mode, - test_if_locked)); + name, table->s->db_type, table->db_stat, mode, + test_if_locked)); if ((error=open(name,mode,test_if_locked))) { @@ -1124,10 +1129,9 @@ int handler::read_first_row(byte * buf, uint primary_key) DBUG_RETURN(error); } - /* Generate the next auto-increment number based on increment and offset - + In most cases increment= offset= 1, in which case we get: 1,2,3,4,5,... If increment=10 and offset=5 and previous number is 1, we get: @@ -1451,7 +1455,7 @@ void handler::print_error(int error, myf errflag) else my_error(ER_GET_ERRMSG, MYF(0), error, str.ptr(), engine); } - else + else my_error(ER_GET_ERRNO,errflag,error); DBUG_VOID_RETURN; } @@ -1461,9 +1465,9 @@ void handler::print_error(int error, myf errflag) } -/* +/* Return an error message specific to this handler - + SYNOPSIS error error code previously returned by handler buf Pointer to String where to add error message @@ -1770,7 +1774,7 @@ int ha_discover(THD *thd, const char *db, const char *name, error= ndbcluster_discover(thd, db, name, frmblob, frmlen); #endif if (!error) - statistic_increment(ha_discover_count,&LOCK_status); + statistic_increment(thd->status_var.ha_discover_count,&LOCK_status); DBUG_RETURN(error); } diff --git a/sql/handler.h b/sql/handler.h index 64deab48b7d..5968b9a5e1d 100644 --- a/sql/handler.h +++ b/sql/handler.h @@ -93,6 +93,11 @@ #define HA_KEY_SWITCH_NONUNIQ_SAVE 2 #define HA_KEY_SWITCH_ALL_SAVE 3 +/* + Note: the following includes binlog and closing 0. + so: innodb+bdb+ndb+binlog+0 +*/ +#define MAX_HA 5 /* Bits in index_ddl_flags(KEY *wanted_index) @@ -192,16 +197,12 @@ enum row_type { ROW_TYPE_NOT_USED=-1, ROW_TYPE_DEFAULT, ROW_TYPE_FIXED, #define HA_CREATE_USED_COMMENT (1L << 16) #define HA_CREATE_USED_PASSWORD (1L << 17) -typedef struct st_thd_trans { - void *bdb_tid; - void *innobase_tid; - bool innodb_active_trans; - void *ndb_tid; -} THD_TRANS; - -#ifndef XIDDATASIZE /* no xa.h included */ +typedef ulonglong my_xid; +#define MYSQL_XID_PREFIX "MySQLXid" +#define MYSQL_XID_PREFIX_LEN 8 // must be a multiple of 8 +#define MYSQL_XID_OFFSET (MYSQL_XID_PREFIX_LEN+sizeof(server_id)) +#define MYSQL_XID_GTRID_LEN (MYSQL_XID_OFFSET+sizeof(my_xid)) -/* XXX - may be we should disable xa completely in this case ? */ #define XIDDATASIZE 128 #define MAXGTRIDSIZE 64 #define MAXBQUALSIZE 64 @@ -210,22 +211,106 @@ struct xid_t { long formatID; long gtrid_length; long bqual_length; - char data[XIDDATASIZE]; -}; + char data[XIDDATASIZE]; // not \0-terminated ! + bool eq(LEX_STRING *l) { return eq(l->length, 0, l->str); } + bool eq(long g, long b, const char *d) + { return g == gtrid_length && b == bqual_length && !memcmp(d, data, g+b); } + void set(LEX_STRING *l) { set(l->length, 0, l->str); } + void set(ulonglong l) + { + set(MYSQL_XID_PREFIX_LEN, 0, MYSQL_XID_PREFIX); + *(ulong*)(data+MYSQL_XID_PREFIX_LEN)=server_id; + *(my_xid*)(data+MYSQL_XID_OFFSET)=l; + gtrid_length=MYSQL_XID_GTRID_LEN; + } + void set(long g, long b, const char *d) + { + formatID=1; + gtrid_length= g; + bqual_length= b; + memcpy(data, d, g+b); + } + bool is_null() { return formatID == -1; } + void null() { formatID= -1; } + my_xid quick_get_my_xid() + { + return *(my_xid*)(data+MYSQL_XID_OFFSET); + } + my_xid get_my_xid() + { + return gtrid_length == MYSQL_XID_GTRID_LEN && bqual_length == 0 && + *(ulong*)(data+MYSQL_XID_PREFIX_LEN) == server_id && + !memcmp(data, MYSQL_XID_PREFIX, MYSQL_XID_PREFIX_LEN) ? + quick_get_my_xid() : 0; + } +}; typedef struct xid_t XID; +/* for recover() handlerton call */ +#define MIN_XID_LIST_SIZE 128 +#define MAX_XID_LIST_SIZE (1024*128) -#endif +/* + handlerton is a singleton structure - one instance per storage engine - + to provide access to storage engine functionality that works on + "global" level (unlike handler class that works on per-table basis) + + usually handlerton instance is defined statically in ha_xxx.cc as + static handlerton { ... } xxx_hton; + + savepoint_*, prepare, recover, and *_by_xid pointers can be 0. +*/ typedef struct { - byte slot; + /* + each storage engine has it's own memory area (actually a pointer) + in the thd, for storing per-connection information. + It is accessed as + + thd->ha_data[xxx_hton.slot] + + slot number is initialized by MySQL after xxx_init() is called. + */ + uint slot; + /* + to store per-savepoint data storage engine is provided with an area + of a requested size (0 is ok here). + savepoint_offset must be initialized statically to the size of + the needed memory to store per-savepoint information. + After xxx_init it is changed to be an offset to savepoint storage + area and need not be used by storage engine. + see binlog_hton and binlog_savepoint_set/rollback for an example. + */ uint savepoint_offset; + /* + handlerton methods: + + close_connection is only called if + thd->ha_data[xxx_hton.slot] is non-zero, so even if you don't need + this storage area - set it to something, so that MySQL would know + this storage engine was accessed in this connection + */ int (*close_connection)(THD *thd); + /* + sv points to an uninitialized storage area of requested size + (see savepoint_offset description) + */ int (*savepoint_set)(THD *thd, void *sv); + /* + sv points to a storage area, that was earlier passed + to the savepoint_set call + */ int (*savepoint_rollback)(THD *thd, void *sv); int (*savepoint_release)(THD *thd, void *sv); + /* + 'all' is true if it's a real commit, that makes persistent changes + 'all' is false if it's not in fact a commit but an end of the + statement that is part of the transaction. + NOTE 'all' is also false in auto-commit mode where 'end of statement' + and 'real commit' mean the same event. + */ int (*commit)(THD *thd, bool all); int (*rollback)(THD *thd, bool all); int (*prepare)(THD *thd, bool all); @@ -234,6 +319,16 @@ typedef struct int (*rollback_by_xid)(XID *xid); } handlerton; +typedef struct st_thd_trans +{ + /* number of entries in the ht[] */ + uint nht; + /* true is not all entries in the ht[] support 2pc */ + bool no_2pc; + /* storage engines that registered themselves for this transaction */ + handlerton *ht[MAX_HA]; +} THD_TRANS; + enum enum_tx_isolation { ISO_READ_UNCOMMITTED, ISO_READ_COMMITTED, ISO_REPEATABLE_READ, ISO_SERIALIZABLE}; @@ -268,6 +363,9 @@ typedef struct st_table TABLE; struct st_foreign_key_info; typedef struct st_foreign_key_info FOREIGN_KEY_INFO; +typedef struct st_savepoint SAVEPOINT; +extern ulong savepoint_alloc_size; + typedef struct st_ha_check_opt { ulong sort_buffer_size; @@ -626,57 +724,72 @@ public: extern struct show_table_type_st sys_table_types[]; extern const char *ha_row_type[]; extern TYPELIB tx_isolation_typelib; +extern handlerton *handlertons[MAX_HA]; +extern ulong total_ha, total_ha_2pc; /* Wrapper functions */ -#define ha_commit_stmt(thd) (ha_commit_trans((thd), &((thd)->transaction.stmt))) -#define ha_rollback_stmt(thd) (ha_rollback_trans((thd), &((thd)->transaction.stmt))) -#define ha_commit(thd) (ha_commit_trans((thd), &((thd)->transaction.all))) -#define ha_rollback(thd) (ha_rollback_trans((thd), &((thd)->transaction.all))) +#define ha_commit_stmt(thd) (ha_commit_trans((thd), FALSE)) +#define ha_rollback_stmt(thd) (ha_rollback_trans((thd), FALSE)) +#define ha_commit(thd) (ha_commit_trans((thd), TRUE)) +#define ha_rollback(thd) (ha_rollback_trans((thd), TRUE)) #define ha_supports_generate(T) (T != DB_TYPE_INNODB && \ T != DB_TYPE_BERKELEY_DB && \ T != DB_TYPE_NDBCLUSTER) +/* lookups */ enum db_type ha_resolve_by_name(const char *name, uint namelen); const char *ha_get_storage_engine(enum db_type db_type); handler *get_new_handler(TABLE *table, enum db_type db_type); -my_off_t ha_get_ptr(byte *ptr, uint pack_length); -void ha_store_ptr(byte *buff, uint pack_length, my_off_t pos); +enum db_type ha_checktype(enum db_type database_type); + +/* basic stuff */ int ha_init(void); +TYPELIB *ha_known_exts(void); int ha_panic(enum ha_panic_function flag); +int ha_update_statistics(); void ha_close_connection(THD* thd); -enum db_type ha_checktype(enum db_type database_type); +bool ha_flush_logs(void); +void ha_drop_database(char* path); int ha_create_table(const char *name, HA_CREATE_INFO *create_info, bool update_create_info); +int ha_delete_table(enum db_type db_type, const char *path); + +/* discovery */ int ha_create_table_from_engine(THD* thd, const char *db, const char *name, bool create_if_found); -int ha_delete_table(enum db_type db_type, const char *path); -void ha_drop_database(char* path); +int ha_discover(THD* thd, const char* dbname, const char* name, + const void** frmblob, uint* frmlen); +int ha_find_files(THD *thd,const char *db,const char *path, + const char *wild, bool dir,List<char>* files); +int ha_table_exists(THD* thd, const char* db, const char* name); + +/* key cache */ int ha_init_key_cache(const char *name, KEY_CACHE *key_cache); int ha_resize_key_cache(KEY_CACHE *key_cache); int ha_change_key_cache_param(KEY_CACHE *key_cache); +int ha_change_key_cache(KEY_CACHE *old_key_cache, KEY_CACHE *new_key_cache); int ha_end_key_cache(KEY_CACHE *key_cache); -int ha_start_stmt(THD *thd); -int ha_report_binlog_offset_and_commit(THD *thd, char *log_file_name, - my_off_t end_offset); -int ha_commit_complete(THD *thd); + +/* weird stuff */ int ha_release_temporary_latches(THD *thd); -int ha_update_statistics(); -int ha_commit_trans(THD *thd, THD_TRANS *trans); -int ha_rollback_trans(THD *thd, THD_TRANS *trans); -int ha_rollback_to_savepoint(THD *thd, char *savepoint_name); -int ha_savepoint(THD *thd, char *savepoint_name); -int ha_release_savepoint_name(THD *thd, char *savepoint_name); + +/* transactions: interface to handlerton functions */ +int ha_start_consistent_snapshot(THD *thd); +int ha_commit_or_rollback_by_xid(LEX_STRING *ident, bool commit); +int ha_commit_one_phase(THD *thd, bool all); +int ha_rollback_trans(THD *thd, bool all); +int ha_prepare(THD *thd); +int ha_recover(HASH *commit_list); + +/* transactions: these functions never call handlerton functions directly */ +int ha_commit_trans(THD *thd, bool all); int ha_autocommit_or_rollback(THD *thd, int error); -void ha_set_spin_retries(uint retries); -bool ha_flush_logs(void); int ha_enable_transaction(THD *thd, bool on); -int ha_change_key_cache(KEY_CACHE *old_key_cache, - KEY_CACHE *new_key_cache); -int ha_discover(THD* thd, const char* dbname, const char* name, - const void** frmblob, uint* frmlen); -int ha_find_files(THD *thd,const char *db,const char *path, - const char *wild, bool dir,List<char>* files); -int ha_table_exists(THD* thd, const char* db, const char* name); -TYPELIB *ha_known_exts(void); -int ha_start_consistent_snapshot(THD *thd); +void trans_register_ha(THD *thd, bool all, handlerton *ht); + +/* savepoints */ +int ha_rollback_to_savepoint(THD *thd, SAVEPOINT *sv); +int ha_savepoint(THD *thd, SAVEPOINT *sv); +int ha_release_savepoint(THD *thd, SAVEPOINT *sv); + diff --git a/sql/item_func.cc b/sql/item_func.cc index 1e61474f412..1a3e75821ca 100644 --- a/sql/item_func.cc +++ b/sql/item_func.cc @@ -2823,7 +2823,7 @@ void item_user_lock_release(User_level_lock *ull) tmp.copy(command, strlen(command), tmp.charset()); tmp.append(ull->key,ull->key_length); tmp.append("\")", 2); - Query_log_event qev(current_thd, tmp.ptr(), tmp.length(),1, FALSE); + Query_log_event qev(current_thd, tmp.ptr(), tmp.length(), 0, FALSE); qev.error_code=0; // this query is always safe to run on slave mysql_bin_log.write(&qev); } diff --git a/sql/lex.h b/sql/lex.h index 7b03950b35a..3894e8f6274 100644 --- a/sql/lex.h +++ b/sql/lex.h @@ -19,11 +19,11 @@ #include "lex_symbol.h" -/* We don't want to include sql_yacc.h into gen_lex_hash */ SYM_GROUP sym_group_common= {"", ""}; SYM_GROUP sym_group_geom= {"Spatial extentions", "HAVE_SPATIAL"}; SYM_GROUP sym_group_rtree= {"RTree keys", "HAVE_RTREE_KEYS"}; +/* We don't want to include sql_yacc.h into gen_lex_hash */ #ifdef NO_YACC_SYMBOLS #define SYM_OR_NULL(A) 0 #else @@ -314,6 +314,7 @@ static SYMBOL symbols[] = { { "MERGE", SYM(MERGE_SYM)}, { "MICROSECOND", SYM(MICROSECOND_SYM)}, { "MIDDLEINT", SYM(MEDIUMINT)}, /* For powerbuilder */ + { "MIGRATE", SYM(MIGRATE_SYM)}, { "MINUTE", SYM(MINUTE_SYM)}, { "MINUTE_MICROSECOND", SYM(MINUTE_MICROSECOND_SYM)}, { "MINUTE_SECOND", SYM(MINUTE_SECOND_SYM)}, @@ -346,6 +347,7 @@ static SYMBOL symbols[] = { { "OFFSET", SYM(OFFSET_SYM)}, { "OLD_PASSWORD", SYM(OLD_PASSWORD)}, { "ON", SYM(ON)}, + { "ONE", SYM(ONE_SYM)}, { "ONE_SHOT", SYM(ONE_SHOT_SYM)}, { "OPEN", SYM(OPEN_SYM)}, { "OPTIMIZE", SYM(OPTIMIZE)}, @@ -359,6 +361,7 @@ static SYMBOL symbols[] = { { "PACK_KEYS", SYM(PACK_KEYS_SYM)}, { "PARTIAL", SYM(PARTIAL)}, { "PASSWORD", SYM(PASSWORD)}, + { "PHASE", SYM(PHASE_SYM)}, { "POINT", SYM(POINT_SYM)}, { "POLYGON", SYM(POLYGON)}, { "PRECISION", SYM(PRECISION)}, @@ -380,6 +383,7 @@ static SYMBOL symbols[] = { { "READ", SYM(READ_SYM)}, { "READS", SYM(READS_SYM)}, { "REAL", SYM(REAL)}, + { "RECOVER", SYM(RECOVER_SYM)}, { "REDUNDANT", SYM(REDUNDANT_SYM)}, { "REFERENCES", SYM(REFERENCES)}, { "REGEXP", SYM(REGEXP)}, @@ -398,6 +402,7 @@ static SYMBOL symbols[] = { { "RESET", SYM(RESET_SYM)}, { "RESTORE", SYM(RESTORE_SYM)}, { "RESTRICT", SYM(RESTRICT)}, + { "RESUME", SYM(RESUME_SYM)}, { "RETURN", SYM(RETURN_SYM)}, { "RETURNS", SYM(RETURNS_SYM)}, { "REVOKE", SYM(REVOKE)}, @@ -467,6 +472,7 @@ static SYMBOL symbols[] = { { "STRIPED", SYM(RAID_STRIPED_SYM)}, { "SUBJECT", SYM(SUBJECT_SYM)}, { "SUPER", SYM(SUPER_SYM)}, + { "SUSPEND", SYM(SUSPEND_SYM)}, { "TABLE", SYM(TABLE_SYM)}, { "TABLES", SYM(TABLES)}, { "TABLESPACE", SYM(TABLESPACE)}, @@ -528,6 +534,7 @@ static SYMBOL symbols[] = { { "WRITE", SYM(WRITE_SYM)}, { "X509", SYM(X509_SYM)}, { "XOR", SYM(XOR)}, + { "XA", SYM(XA_SYM)}, { "YEAR", SYM(YEAR_SYM)}, { "YEAR_MONTH", SYM(YEAR_MONTH_SYM)}, { "ZEROFILL", SYM(ZEROFILL)}, diff --git a/sql/log.cc b/sql/log.cc index d21979a707c..ca9cb6e3238 100644 --- a/sql/log.cc +++ b/sql/log.cc @@ -24,7 +24,6 @@ #include "mysql_priv.h" #include "sql_repl.h" -#include "ha_innodb.h" // necessary to cut the binlog when crash recovery #include <my_dir.h> #include <stdarg.h> @@ -39,23 +38,243 @@ ulong sync_binlog_counter= 0; static bool test_if_number(const char *str, long *res, bool allow_wildcards); +static int binlog_close_connection(THD *thd); +static int binlog_savepoint_set(THD *thd, void *sv); +static int binlog_savepoint_rollback(THD *thd, void *sv); +static int binlog_commit(THD *thd, bool all); +static int binlog_rollback(THD *thd, bool all); +static int binlog_prepare(THD *thd, bool all); + +static handlerton binlog_hton = { + 0, + sizeof(my_off_t), /* savepoint size = binlog offset */ + binlog_close_connection, + binlog_savepoint_set, + binlog_savepoint_rollback, + NULL, /* savepoint_release */ + binlog_commit, + binlog_rollback, + binlog_prepare, + NULL, /* recover */ + NULL, /* commit_by_xid */ + NULL /* rollback_by_xid */ +}; + +/* + this function is mostly a placeholder. + conceptually, binlog initialization (now mostly done in MYSQL_LOG::open) + should be moved here. + + for now, we fail if binlog is closed (mysql_bin_log.open() failed for some + reason) - it'll make mysqld to shutdown. +*/ + +handlerton *binlog_init() +{ + return mysql_bin_log.is_open() : &binlog_hton : 0; +} + +static int binlog_close_connection(THD *thd) +{ + IO_CACHE *trans_log= (IO_CACHE*)thd->ha_data[binlog_hton.slot]; + DBUG_ASSERT(mysql_bin_log.is_open() && !my_b_tell(trans_log)); + close_cached_file(trans_log); + my_free((gptr)trans_log, MYF(0)); + return 0; +} + +static inline void binlog_cleanup_trans(IO_CACHE *trans_log) +{ + statistic_increment(binlog_cache_use, &LOCK_status); + if (trans_log->disk_writes != 0) + { + statistic_increment(binlog_cache_disk_use, &LOCK_status); + trans_log->disk_writes= 0; + } + reinit_io_cache(trans_log, WRITE_CACHE, (my_off_t) 0, 0, 1); // cannot fail + trans_log->end_of_file= max_binlog_cache_size; +} + +static int binlog_prepare(THD *thd, bool all) +{ + /* + do nothing. + just pretend we can do 2pc, so that MySQL won't + switch to 1pc. + real work will be done in MYSQL_LOG::log() + */ + return 0; +} + +static int binlog_commit(THD *thd, bool all) +{ + int error; + IO_CACHE *trans_log= (IO_CACHE*)thd->ha_data[binlog_hton.slot]; + DBUG_ENTER("binlog_commit"); + DBUG_ASSERT(mysql_bin_log.is_open() && + (all || !(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)))); + + if (!my_b_tell(trans_log)) + { + // we're here because trans_log was flushed in MYSQL_LOG::log() + DBUG_RETURN(0); + } + + /* Update the binary log as we have cached some queries */ + error= mysql_bin_log.write(thd, trans_log); + binlog_cleanup_trans(trans_log); + DBUG_RETURN(error); +} + +static int binlog_rollback(THD *thd, bool all) +{ + int error=0; + IO_CACHE *trans_log= (IO_CACHE*)thd->ha_data[binlog_hton.slot]; + DBUG_ENTER("binlog_rollback"); + /* + first two conditions here are guaranteed - see trans_register_ha() + call below. The third one must be true. If it is not, we're registering + unnecessary, doing extra work. The cause should be found and eliminated + */ + DBUG_ASSERT(all && mysql_bin_log.is_open() && my_b_tell(trans_log)); + /* + Update the binary log with a BEGIN/ROLLBACK block if we have + cached some queries and we updated some non-transactional + table. Such cases should be rare (updating a + non-transactional table inside a transaction...) + */ + if (unlikely(thd->options & OPTION_STATUS_NO_TRANS_UPDATE)) + { + Query_log_event qev(thd, "ROLLBACK", 8, TRUE, FALSE); + qev.write(trans_log); + error= mysql_bin_log.write(thd, trans_log); + } + binlog_cleanup_trans(trans_log); + DBUG_RETURN(error); +} + +/* + NOTE: how do we handle this (unlikely but legal) case: + [transaction] + [update to non-trans table] + [rollback to savepoint] ? + The problem occurs when a savepoint is before the update to the + non-transactional table. Then when there's a rollback to the savepoint, if we + simply truncate the binlog cache, we lose the part of the binlog cache where + the update is. If we want to not lose it, we need to write the SAVEPOINT + command and the ROLLBACK TO SAVEPOINT command to the binlog cache. The latter + is easy: it's just write at the end of the binlog cache, but the former + should be *inserted* to the place where the user called SAVEPOINT. The + solution is that when the user calls SAVEPOINT, we write it to the binlog + cache (so no need to later insert it). As transactions are never intermixed + in the binary log (i.e. they are serialized), we won't have conflicts with + savepoint names when using mysqlbinlog or in the slave SQL thread. + Then when ROLLBACK TO SAVEPOINT is called, if we updated some + non-transactional table, we don't truncate the binlog cache but instead write + ROLLBACK TO SAVEPOINT to it; otherwise we truncate the binlog cache (which + will chop the SAVEPOINT command from the binlog cache, which is good as in + that case there is no need to have it in the binlog). +*/ + +static int binlog_savepoint_set(THD *thd, void *sv) +{ + IO_CACHE *trans_log= (IO_CACHE*)thd->ha_data[binlog_hton.slot]; + DBUG_ENTER("binlog_savepoint_set"); + DBUG_ASSERT(mysql_bin_log.is_open() && my_b_tell(trans_log)); + + *(my_off_t *)sv= my_b_tell(trans_log); + /* Write it to the binary log */ + Query_log_event qinfo(thd, thd->query, thd->query_length, TRUE, FALSE); + DBUG_RETURN(mysql_bin_log.write(&qinfo)); +} + +static int binlog_savepoint_rollback(THD *thd, void *sv) +{ + IO_CACHE *trans_log= (IO_CACHE*)thd->ha_data[binlog_hton.slot]; + DBUG_ENTER("binlog_savepoint_rollback"); + DBUG_ASSERT(mysql_bin_log.is_open() && my_b_tell(trans_log)); + + /* + Write ROLLBACK TO SAVEPOINT to the binlog cache if we have updated some + non-transactional table. Otherwise, truncate the binlog cache starting + from the SAVEPOINT command. + */ + if (unlikely(thd->options & OPTION_STATUS_NO_TRANS_UPDATE)) + { + Query_log_event qinfo(thd, thd->query, thd->query_length, TRUE, FALSE); + DBUG_RETURN(mysql_bin_log.write(&qinfo)); + } + reinit_io_cache(trans_log, WRITE_CACHE, *(my_off_t *)sv, 0, 0); + DBUG_RETURN(0); +} + +int check_binlog_magic(IO_CACHE* log, const char** errmsg) +{ + char magic[4]; + DBUG_ASSERT(my_b_tell(log) == 0); + + if (my_b_read(log, (byte*) magic, sizeof(magic))) + { + *errmsg = "I/O error reading the header from the binary log"; + sql_print_error("%s, errno=%d, io cache code=%d", *errmsg, my_errno, + log->error); + return 1; + } + if (memcmp(magic, BINLOG_MAGIC, sizeof(magic))) + { + *errmsg = "Binlog has bad magic number; It's not a binary log file that can be used by this version of MySQL"; + return 1; + } + return 0; +} + +File open_binlog(IO_CACHE *log, const char *log_file_name, const char **errmsg) +{ + File file; + DBUG_ENTER("open_binlog"); + + if ((file = my_open(log_file_name, O_RDONLY | O_BINARY, MYF(MY_WME))) < 0) + { + sql_print_error("Failed to open log (file '%s', errno %d)", + log_file_name, my_errno); + *errmsg = "Could not open log file"; + goto err; + } + if (init_io_cache(log, file, IO_SIZE*2, READ_CACHE, 0, 0, + MYF(MY_WME|MY_DONT_CHECK_FILESIZE))) + { + sql_print_error("Failed to create a cache on log (file '%s')", + log_file_name); + *errmsg = "Could not open log file"; + goto err; + } + if (check_binlog_magic(log,errmsg)) + goto err; + DBUG_RETURN(file); + +err: + if (file >= 0) + { + my_close(file,MYF(0)); + end_io_cache(log); + } + DBUG_RETURN(-1); +} #ifdef __NT__ static int eventSource = 0; -void setup_windows_event_source() +void setup_windows_event_source() { - HKEY hRegKey= NULL; + HKEY hRegKey= NULL; DWORD dwError= 0; TCHAR szPath[MAX_PATH]; DWORD dwTypes; - + if (eventSource) // Ensure that we are only called once return; eventSource= 1; // Create the event source registry key - dwError= RegCreateKey(HKEY_LOCAL_MACHINE, + dwError= RegCreateKey(HKEY_LOCAL_MACHINE, "SYSTEM\\CurrentControlSet\\Services\\EventLog\\Application\\MySQL", &hRegKey); @@ -63,9 +282,8 @@ void setup_windows_event_source() GetModuleFileName(NULL, szPath, MAX_PATH); /* Register EventMessageFile */ - dwError = RegSetValueEx(hRegKey, "EventMessageFile", 0, REG_EXPAND_SZ, + dwError = RegSetValueEx(hRegKey, "EventMessageFile", 0, REG_EXPAND_SZ, (PBYTE) szPath, strlen(szPath)+1); - /* Register supported event types */ dwTypes= (EVENTLOG_ERROR_TYPE | EVENTLOG_WARNING_TYPE | @@ -128,14 +346,14 @@ static int find_uniq_filename(char *name) MYSQL_LOG::MYSQL_LOG() :bytes_written(0), last_time(0), query_start(0), name(0), file_id(1), open_count(1), log_type(LOG_CLOSED), write_error(0), inited(0), - need_start_event(1), description_event_for_exec(0), + need_start_event(1), prepared_xids(0), description_event_for_exec(0), description_event_for_queue(0) { /* We don't want to initialize LOCK_Log here as such initialization depends on safe_mutex (when using safe_mutex) which depends on MY_INIT(), which is called only in main(). Doing initialization here would make it happen - before main(). + before main(). */ index_file_name[0] = 0; bzero((char*) &log_file,sizeof(log_file)); @@ -156,7 +374,7 @@ void MYSQL_LOG::cleanup() if (inited) { inited= 0; - close(LOG_CLOSE_INDEX); + close(LOG_CLOSE_INDEX|LOG_CLOSE_STOP_EVENT); delete description_event_for_queue; delete description_event_for_exec; (void) pthread_mutex_destroy(&LOCK_log); @@ -168,7 +386,7 @@ void MYSQL_LOG::cleanup() int MYSQL_LOG::generate_new_name(char *new_name, const char *log_name) -{ +{ fn_format(new_name,log_name,mysql_data_home,"",4); if (log_type != LOG_NORMAL) { @@ -209,6 +427,66 @@ void MYSQL_LOG::init_pthread_objects() (void) pthread_cond_init(&update_cond, 0); } +const char *MYSQL_LOG::generate_name(const char *log_name, + const char *suffix, + bool strip_ext, char *buff) +{ + if (!log_name || !log_name[0]) + { + /* + TODO: The following should be using fn_format(); We just need to + first change fn_format() to cut the file name if it's too long. + */ + strmake(buff,glob_hostname,FN_REFLEN-5); + strmov(fn_ext(buff),suffix); + return (const char *)buff; + } + // get rid of extension if the log is binary to avoid problems + if (strip_ext) + { + char *p = fn_ext(log_name); + uint length=(uint) (p-log_name); + strmake(buff,log_name,min(length,FN_REFLEN)); + return (const char*)buff; + } + return log_name; +} + +bool MYSQL_LOG::open_index_file(const char *index_file_name_arg, + const char *log_name) +{ + File index_file_nr= -1; + DBUG_ASSERT(!my_b_inited(&index_file)); + + /* + First open of this class instance + Create an index file that will hold all file names uses for logging. + Add new entries to the end of it. + */ + myf opt= MY_UNPACK_FILENAME; + if (!index_file_name_arg) + { + index_file_name_arg= log_name; // Use same basename for index file + opt= MY_UNPACK_FILENAME | MY_REPLACE_EXT; + } + fn_format(index_file_name, index_file_name_arg, mysql_data_home, + ".index", opt); + if ((index_file_nr= my_open(index_file_name, + O_RDWR | O_CREAT | O_BINARY , + MYF(MY_WME))) < 0 || + my_sync(index_file_nr, MYF(MY_WME)) || + init_io_cache(&index_file, index_file_nr, + IO_SIZE, WRITE_CACHE, + my_seek(index_file_nr,0L,MY_SEEK_END,MYF(0)), + 0, MYF(MY_WME | MY_WAIT_IF_FULL))) + { + if (index_file_nr >= 0) + my_close(index_file_nr,MYF(0)); + return TRUE; + } + return FALSE; +} + /* Open a (new) log file. @@ -224,35 +502,39 @@ void MYSQL_LOG::init_pthread_objects() 1 error */ -bool MYSQL_LOG::open(const char *log_name, enum_log_type log_type_arg, - const char *new_name, const char *index_file_name_arg, - enum cache_type io_cache_type_arg, - bool no_auto_events_arg, +bool MYSQL_LOG::open(const char *log_name, + enum_log_type log_type_arg, + const char *new_name, + enum cache_type io_cache_type_arg, + bool no_auto_events_arg, ulong max_size_arg, bool null_created_arg) { - char buff[512]; - File file= -1, index_file_nr= -1; - int open_flags = O_CREAT | O_APPEND | O_BINARY; + char buff[FN_REFLEN]; + File file= -1; + int open_flags = O_CREAT | O_BINARY; DBUG_ENTER("MYSQL_LOG::open"); - DBUG_PRINT("enter",("log_type: %d",(int) log_type)); + DBUG_PRINT("enter",("log_type: %d",(int) log_type_arg)); last_time=query_start=0; write_error=0; init(log_type_arg,io_cache_type_arg,no_auto_events_arg,max_size_arg); - + if (!(name=my_strdup(log_name,MYF(MY_WME)))) + { + name= (char *)log_name; // for the error message goto err; + } if (new_name) strmov(log_file_name,new_name); else if (generate_new_name(log_file_name, name)) goto err; - + if (io_cache_type == SEQ_READ_APPEND) - open_flags |= O_RDWR; + open_flags |= O_RDWR | O_APPEND; else - open_flags |= O_WRONLY; + open_flags |= O_WRONLY | (log_type == LOG_BIN ? 0 : O_APPEND); db[0]=0; open_count++; @@ -311,13 +593,6 @@ bool MYSQL_LOG::open(const char *log_name, enum_log_type log_type_arg, { bool write_file_name_to_index_file=0; - myf opt= MY_UNPACK_FILENAME; - if (!index_file_name_arg) - { - index_file_name_arg= name; // Use same basename for index file - opt= MY_UNPACK_FILENAME | MY_REPLACE_EXT; - } - if (!my_b_filelength(&log_file)) { /* @@ -333,33 +608,9 @@ bool MYSQL_LOG::open(const char *log_name, enum_log_type log_type_arg, write_file_name_to_index_file= 1; } - if (!my_b_inited(&index_file)) - { - /* - First open of this class instance - Create an index file that will hold all file names uses for logging. - Add new entries to the end of it. - Index file (and binlog) are so critical for recovery/replication - that we create them with MY_WAIT_IF_FULL. - */ - fn_format(index_file_name, index_file_name_arg, mysql_data_home, - ".index", opt); - if ((index_file_nr= my_open(index_file_name, - O_RDWR | O_CREAT | O_BINARY , - MYF(MY_WME))) < 0 || - my_sync(index_file_nr, MYF(MY_WME)) || - init_io_cache(&index_file, index_file_nr, - IO_SIZE, WRITE_CACHE, - my_seek(index_file_nr,0L,MY_SEEK_END,MYF(0)), - 0, MYF(MY_WME | MY_WAIT_IF_FULL))) - goto err; - } - else - { - safe_mutex_assert_owner(&LOCK_index); - reinit_io_cache(&index_file, WRITE_CACHE, my_b_filelength(&index_file), - 0, 0); - } + DBUG_ASSERT(my_b_inited(&index_file)); + reinit_io_cache(&index_file, WRITE_CACHE, + my_b_filelength(&index_file), 0, 0); if (need_start_event && !no_auto_events) { /* @@ -367,6 +618,7 @@ bool MYSQL_LOG::open(const char *log_name, enum_log_type log_type_arg, even if this is not the very first binlog. */ Format_description_log_event s(BINLOG_VERSION); + s.flags|= LOG_EVENT_BINLOG_IN_USE_F; if (!s.is_valid()) goto err; if (null_created_arg) @@ -401,7 +653,7 @@ bool MYSQL_LOG::open(const char *log_name, enum_log_type log_type_arg, description_event_for_queue->created= 0; /* Don't set log_pos in event header */ description_event_for_queue->artificial_event=1; - + if (description_event_for_queue->write(&log_file)) goto err; bytes_written+= description_event_for_queue->data_written; @@ -436,11 +688,9 @@ err: sql_print_error("Could not use %s for logging (error %d). \ Turning logging off for the whole duration of the MySQL server process. \ To turn it on again: fix the cause, \ -shutdown the MySQL server and restart it.", log_name, errno); +shutdown the MySQL server and restart it.", name, errno); if (file >= 0) my_close(file,MYF(0)); - if (index_file_nr >= 0) - my_close(index_file_nr,MYF(0)); end_io_cache(&log_file); end_io_cache(&index_file); safeFree(name); @@ -546,8 +796,8 @@ int MYSQL_LOG::find_log_pos(LOG_INFO *linfo, const char *log_name, DBUG_PRINT("enter",("log_name: %s", log_name ? log_name : "NULL")); /* - Mutex needed because we need to make sure the file pointer does not move - from under our feet + Mutex needed because we need to make sure the file pointer does not + move from under our feet */ if (need_lock) pthread_mutex_lock(&LOCK_index); @@ -630,7 +880,7 @@ int MYSQL_LOG::find_next_log(LOG_INFO* linfo, bool need_lock) error = !index_file.error ? LOG_INFO_EOF : LOG_INFO_IO; goto err; } - fname[length-1]=0; // kill /n + fname[length-1]=0; // kill \n linfo->index_file_offset = my_b_tell(&index_file); err: @@ -686,7 +936,7 @@ bool MYSQL_LOG::reset_logs(THD* thd) error=1; goto err; } - + for (;;) { my_delete(linfo.log_file_name, MYF(MY_WME)); @@ -699,11 +949,12 @@ bool MYSQL_LOG::reset_logs(THD* thd) my_delete(index_file_name, MYF(MY_WME)); // Reset (open will update) if (!thd->slave_thread) need_start_event=1; - open(save_name, save_log_type, 0, index_file_name, + open_index_file(index_file_name, 0); + open(save_name, save_log_type, 0, io_cache_type, no_auto_events, max_size, 0); my_free((gptr) save_name, MYF(0)); -err: +err: pthread_mutex_unlock(&LOCK_index); pthread_mutex_unlock(&LOCK_log); DBUG_RETURN(error); @@ -722,7 +973,7 @@ err: rli->group_relay_log_name are deleted ; if true, the latter is deleted too (i.e. all relay logs read by the SQL slave thread are deleted). - + NOTE - This is only called from the slave-execute thread when it has read all commands from a relay log and want to switch to a new relay log. @@ -1040,10 +1291,28 @@ void MYSQL_LOG::new_file(bool need_lock) { pthread_mutex_lock(&LOCK_log); pthread_mutex_lock(&LOCK_index); - } + } safe_mutex_assert_owner(&LOCK_log); safe_mutex_assert_owner(&LOCK_index); + /* + if binlog is used as tc log, be sure all xids are "unlogged", + so that on recover we only need to scan one - latest - binlog file + for prepared xids. As this is expected to be a rare event, + simple wait strategy is enough. We're locking LOCK_log to be sure no + new Xid_log_event's are added to the log (and prepared_xids is not + increased), and waiting on COND_prep_xids for late threads to + catch up. + */ + if (prepared_xids) + { + tc_log_page_waits++; + pthread_mutex_lock(&LOCK_prep_xids); + while (prepared_xids) + pthread_cond_wait(&COND_prep_xids, &LOCK_prep_xids); + pthread_mutex_unlock(&LOCK_prep_xids); + } + /* Reuse old name if not binlog and not update log */ new_name_ptr= name; @@ -1055,7 +1324,7 @@ void MYSQL_LOG::new_file(bool need_lock) if (generate_new_name(new_name, name)) goto end; new_name_ptr=new_name; - + if (log_type == LOG_BIN) { if (!no_auto_events) @@ -1074,30 +1343,28 @@ void MYSQL_LOG::new_file(bool need_lock) log rotation should give the waiting thread a signal to discover EOF and move on to the next log. */ - signal_update(); + signal_update(); } old_name=name; save_log_type=log_type; name=0; // Don't free name close(LOG_CLOSE_TO_BE_OPENED); - /* + /* Note that at this point, log_type != LOG_CLOSED (important for is_open()). */ - /* + /* new_file() is only used for rotation (in FLUSH LOGS or because size > - max_binlog_size or max_relay_log_size). + max_binlog_size or max_relay_log_size). If this is a binary log, the Format_description_log_event at the beginning of the new file should have created=0 (to distinguish with the Format_description_log_event written at server startup, which should trigger temp tables deletion on slaves. - */ + */ - open(old_name, save_log_type, new_name_ptr, index_file_name, io_cache_type, - no_auto_events, max_size, 1); - if (this == &mysql_bin_log) - report_pos_in_innodb(); + open(old_name, save_log_type, new_name_ptr, + io_cache_type, no_auto_events, max_size, 1); my_free(old_name,MYF(0)); end: @@ -1286,8 +1553,7 @@ inline bool sync_binlog(IO_CACHE *cache) bool MYSQL_LOG::write(Log_event* event_info) { THD *thd=event_info->thd; - bool called_handler_commit=0; - bool error=0; + bool error=1; bool should_rotate = 0; DBUG_ENTER("MYSQL_LOG::write(event)"); @@ -1298,26 +1564,10 @@ bool MYSQL_LOG::write(Log_event* event_info) mostly called if is_open() *was* true a few instructions before, but it could have changed since. */ - if (is_open()) + if (likely(is_open())) { const char *local_db= event_info->get_db(); IO_CACHE *file= &log_file; -#ifdef USING_TRANSACTIONS - /* - Should we write to the binlog cache or to the binlog on disk? - Write to the binlog cache if: - - it is already not empty (meaning we're in a transaction; note that the - present event could be about a non-transactional table, but still we need - to write to the binlog cache in that case to handle updates to mixed - trans/non-trans table types the best possible in binlogging) - - or if the event asks for it (cache_stmt == true). - */ - if (opt_using_transactions && - (event_info->get_cache_stmt() || - (thd && my_b_tell(&thd->transaction.trans_log)))) - file= &thd->transaction.trans_log; -#endif - DBUG_PRINT("info",("event type=%d",event_info->get_type_code())); #ifdef HAVE_REPLICATION /* In the future we need to add to the following if tests like @@ -1333,7 +1583,50 @@ bool MYSQL_LOG::write(Log_event* event_info) } #endif /* HAVE_REPLICATION */ - error=1; +#ifdef USING_TRANSACTIONS + /* + Should we write to the binlog cache or to the binlog on disk? + Write to the binlog cache if: + - it is already not empty (meaning we're in a transaction; note that the + present event could be about a non-transactional table, but still we need + to write to the binlog cache in that case to handle updates to mixed + trans/non-trans table types the best possible in binlogging) + - or if the event asks for it (cache_stmt == true). + */ + if (opt_using_transactions && thd) + { + IO_CACHE *trans_log= (IO_CACHE*)thd->ha_data[binlog_hton.slot]; + + if (event_info->get_cache_stmt()) + { + if (!trans_log) + { + thd->ha_data[binlog_hton.slot]= trans_log= (IO_CACHE *) + my_malloc(sizeof(IO_CACHE), MYF(MY_ZEROFILL)); + if (!trans_log || open_cached_file(trans_log, mysql_tmpdir, LOG_PREFIX, + binlog_cache_size, MYF(MY_WME))) + { + my_free((gptr)trans_log, MYF(MY_ALLOW_ZERO_PTR)); + thd->ha_data[binlog_hton.slot]= trans_log= 0; + goto err; + } + trans_log->end_of_file= max_binlog_cache_size; + trans_register_ha(thd, + thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN), + &binlog_hton); + } + else if (!my_b_tell(trans_log)) + trans_register_ha(thd, + thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN), + &binlog_hton); + file= trans_log; + } + else if (trans_log && my_b_tell(trans_log)) + file= trans_log; + } +#endif + DBUG_PRINT("info",("event type=%d",event_info->get_type_code())); + /* No check for auto events flag here - this write method should never be called if auto-events are enabled @@ -1432,17 +1725,6 @@ COLLATION_CONNECTION=%u,COLLATION_DATABASE=%u,COLLATION_SERVER=%u", goto err; } } -#ifdef TO_BE_REMOVED - if (thd->variables.convert_set) - { - char buf[256], *p; - p= strmov(strmov(buf, "SET CHARACTER SET "), - thd->variables.convert_set->name); - Query_log_event e(thd, buf, (ulong) (p - buf), 0, FALSE); - if (e.write(file)) - goto err; - } -#endif } /* Write the SQL command */ @@ -1450,71 +1732,12 @@ COLLATION_CONNECTION=%u,COLLATION_DATABASE=%u,COLLATION_SERVER=%u", if (event_info->write(file)) goto err; - /* - Tell for transactional table handlers up to which position in the - binlog file we wrote. The table handler can store this info, and - after crash recovery print for the user the offset of the last - transactions which were recovered. Actually, we must also call - the table handler commit here, protected by the LOCK_log mutex, - because otherwise the transactions may end up in a different order - in the table handler log! - - Note that we will NOT call ha_report_binlog_offset_and_commit() if - there are binlog events cached in the transaction cache. That is - because then the log event which we write to the binlog here is - not a transactional event. In versions < 4.0.13 before this fix this - caused an InnoDB transaction to be committed if in the middle there - was a MyISAM event! - */ - if (file == &log_file) // we are writing to the real log (disk) { if (flush_io_cache(file) || sync_binlog(file)) goto err; - if (opt_using_transactions && - !(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) - { - /* - LOAD DATA INFILE in AUTOCOMMIT=1 mode writes to the binlog - chunks also before it is successfully completed. We only report - the binlog write and do the commit inside the transactional table - handler if the log event type is appropriate. - */ - - if (event_info->get_type_code() == QUERY_EVENT || - event_info->get_type_code() == EXEC_LOAD_EVENT) - { -#ifndef DBUG_OFF - if (unlikely(opt_crash_binlog_innodb)) - { - /* - This option is for use in rpl_crash_binlog_innodb.test. - 1st we want to verify that Binlog_dump thread cannot send the - event now (because of LOCK_log): we here tell the Binlog_dump - thread to wake up, sleep for the slave to have time to possibly - receive data from the master (it should not), and then crash. - 2nd we want to verify that at crash recovery the rolled back - event is cut from the binlog. - */ - if (!(--opt_crash_binlog_innodb)) - { - signal_update(); - sleep(2); - fprintf(stderr,"This is a normal crash because of" - " --crash-binlog-innodb\n"); - assert(0); - } - DBUG_PRINT("info",("opt_crash_binlog_innodb: %d", - opt_crash_binlog_innodb)); - } -#endif - error = ha_report_binlog_offset_and_commit(thd, log_file_name, - file->pos_in_file); - called_handler_commit=1; - } - } - /* We wrote to the real log, check automatic rotation; */ + /* check automatic rotation; */ DBUG_PRINT("info",("max_size: %lu",max_size)); should_rotate= (my_b_tell(file) >= (my_off_t) max_size); } @@ -1533,7 +1756,7 @@ err: signal_update(); if (should_rotate) { - pthread_mutex_lock(&LOCK_index); + pthread_mutex_lock(&LOCK_index); new_file(0); // inside mutex pthread_mutex_unlock(&LOCK_index); } @@ -1541,15 +1764,6 @@ err: pthread_mutex_unlock(&LOCK_log); - /* - Flush the transactional handler log file now that we have released - LOCK_log; the flush is placed here to eliminate the bottleneck on the - group commit - */ - - if (called_handler_commit) - ha_commit_complete(thd); - #ifdef HAVE_REPLICATION if (should_rotate && expire_logs_days) { @@ -1577,16 +1791,18 @@ uint MYSQL_LOG::next_file_id() SYNOPSIS write() - thd + thd cache The cache to copy to the binlog - commit_or_rollback If true, will write "COMMIT" in the end, if false will - write "ROLLBACK". NOTE - We only come here if there is something in the cache. - The thing in the cache is always a complete transaction - 'cache' needs to be reinitialized after this functions returns. + TODO + fix it to become atomic - either the complete cache is added to binlog + or nothing (other storage engines rely on this, doing a ROLLBACK) + IMPLEMENTATION - To support transaction over replication, we wrap the transaction with BEGIN/COMMIT or BEGIN/ROLLBACK in the binary log. @@ -1595,29 +1811,21 @@ uint MYSQL_LOG::next_file_id() same updates are run on the slave. */ -bool MYSQL_LOG::write(THD *thd, IO_CACHE *cache, bool commit_or_rollback) +bool MYSQL_LOG::write(THD *thd, IO_CACHE *cache) { bool should_rotate= 0, error= 0; VOID(pthread_mutex_lock(&LOCK_log)); DBUG_ENTER("MYSQL_LOG::write(cache"); - if (is_open()) // Should always be true + if (likely(is_open())) // Should always be true { uint length; /* - Add the "BEGIN" and "COMMIT" in the binlog around transactions - which may contain more than 1 SQL statement. If we run with - AUTOCOMMIT=1, then MySQL immediately writes each SQL statement to - the binlog when the statement has been completed. No need to add - "BEGIN" ... "COMMIT" around such statements. Otherwise, MySQL uses - thd->transaction.trans_log to cache the SQL statements until the - explicit commit, and at the commit writes the contents in .trans_log - to the binlog. - - We write the "BEGIN" mark first in the buffer (.trans_log) where we - store the SQL statements for a transaction. At the transaction commit - we will add the "COMMIT mark and write the buffer to the binlog. + Log "BEGIN" at the beginning of the transaction. + which may contain more than 1 SQL statement. + There is no need to append "COMMIT", as it's already in the 'cache' + (in fact, Xid_log_event is there which does the commit on slaves) */ { Query_log_event qinfo(thd, "BEGIN", 5, TRUE, FALSE); @@ -1643,6 +1851,7 @@ bool MYSQL_LOG::write(THD *thd, IO_CACHE *cache, bool commit_or_rollback) if (reinit_io_cache(cache, READ_CACHE, 0, 0, 0)) goto err; length=my_b_bytes_in_cache(cache); + DBUG_EXECUTE_IF("half_binlogged_transaction", length-=100;); do { /* Write data to the binary log file */ @@ -1651,46 +1860,15 @@ bool MYSQL_LOG::write(THD *thd, IO_CACHE *cache, bool commit_or_rollback) cache->read_pos=cache->read_end; // Mark buffer used up } while ((length=my_b_fill(cache))); - /* - We write the command "COMMIT" as the last SQL command in the - binlog segment cached for this transaction - */ - - { - Query_log_event qinfo(thd, - commit_or_rollback ? "COMMIT" : "ROLLBACK", - commit_or_rollback ? 6 : 8, - TRUE, FALSE); - qinfo.error_code= 0; - if (qinfo.write(&log_file) || flush_io_cache(&log_file) || - sync_binlog(&log_file)) + if (flush_io_cache(&log_file) || sync_binlog(&log_file)) goto err; - } + DBUG_EXECUTE_IF("half_binlogged_transaction", abort();); if (cache->error) // Error on read { sql_print_error(ER(ER_ERROR_ON_READ), cache->file_name, errno); write_error=1; // Don't give more errors goto err; } -#ifndef DBUG_OFF - if (unlikely(opt_crash_binlog_innodb)) - { - /* see the previous MYSQL_LOG::write() method for a comment */ - if (!(--opt_crash_binlog_innodb)) - { - signal_update(); - sleep(2); - fprintf(stderr, "This is a normal crash because of" - " --crash-binlog-innodb\n"); - assert(0); - } - DBUG_PRINT("info",("opt_crash_binlog_innodb: %d", - opt_crash_binlog_innodb)); - } -#endif - if ((ha_report_binlog_offset_and_commit(thd, log_file_name, - log_file.pos_in_file))) - goto err; signal_update(); DBUG_PRINT("info",("max_size: %lu",max_size)); if (should_rotate= (my_b_tell(&log_file) >= (my_off_t) max_size)) @@ -1703,12 +1881,6 @@ bool MYSQL_LOG::write(THD *thd, IO_CACHE *cache, bool commit_or_rollback) } VOID(pthread_mutex_unlock(&LOCK_log)); - /* Flush the transactional handler log file now that we have released - LOCK_log; the flush is placed here to eliminate the bottleneck on the - group commit */ - - ha_commit_complete(thd); - #ifdef HAVE_REPLICATION if (should_rotate && expire_logs_days) { @@ -1894,11 +2066,11 @@ void MYSQL_LOG::wait_for_update(THD* thd, bool master_or_slave) SYNOPSIS close() - exiting Bitmask for one or more of the following bits: - LOG_CLOSE_INDEX if we should close the index file - LOG_CLOSE_TO_BE_OPENED if we intend to call open - at once after close. - LOG_CLOSE_STOP_EVENT write a 'stop' event to the log + exiting Bitmask for one or more of the following bits: + LOG_CLOSE_INDEX if we should close the index file + LOG_CLOSE_TO_BE_OPENED if we intend to call open + at once after close. + LOG_CLOSE_STOP_EVENT write a 'stop' event to the log NOTES One can do an open on the object at once after doing a close. @@ -1922,6 +2094,15 @@ void MYSQL_LOG::close(uint exiting) } #endif /* HAVE_REPLICATION */ end_io_cache(&log_file); + + /* don't pwrite in a file opened with O_APPEND - it doesn't work */ + if (log_file.type == WRITE_CACHE && log_type == LOG_BIN) + { + my_off_t offset= BIN_LOG_HEADER_SIZE + FLAGS_OFFSET; + char flags=0; // clearing LOG_EVENT_BINLOG_IN_USE_F + my_pwrite(log_file.file, &flags, 1, offset, MYF(0)); + } + if (my_close(log_file.file,MYF(0)) < 0 && ! write_error) { write_error=1; @@ -2106,145 +2287,6 @@ bool flush_error_log() return result; } - -/* - If the server has InnoDB on, and InnoDB has published the position of the - last committed transaction (which happens only if a crash recovery occured at - this startup) then truncate the previous binary log at the position given by - InnoDB. If binlog is shorter than the position, print a message to the error - log. - - SYNOPSIS - cut_spurious_tail() - - RETURN VALUES - 1 Error - 0 Ok -*/ - -bool MYSQL_LOG::cut_spurious_tail() -{ - int error= 0; - DBUG_ENTER("cut_spurious_tail"); - -#ifdef HAVE_INNOBASE_DB - if (have_innodb != SHOW_OPTION_YES) - DBUG_RETURN(0); - /* - This is the place where we use information from InnoDB to cut the - binlog. - */ - char *name= ha_innobase::get_mysql_bin_log_name(); - ulonglong pos= ha_innobase::get_mysql_bin_log_pos(); - ulonglong actual_size; - char llbuf1[22], llbuf2[22]; - - if (name[0] == 0 || pos == ULONGLONG_MAX) - { - DBUG_PRINT("info", ("InnoDB has not set binlog info")); - DBUG_RETURN(0); - } - /* The binlog given by InnoDB normally is never an active binlog */ - if (is_open() && is_active(name)) - { - sql_print_error("Warning: after InnoDB crash recovery, InnoDB says that " - "the binary log of the previous run has the same name " - "'%s' as the current one; this is likely to be abnormal.", - name); - DBUG_RETURN(1); - } - sql_print_error("After InnoDB crash recovery, checking if the binary log " - "'%s' contains rolled back transactions which must be " - "removed from it...", name); - /* If we have a too long binlog, cut. If too short, print error */ - int fd= my_open(name, O_EXCL | O_APPEND | O_BINARY | O_WRONLY, MYF(MY_WME)); - if (fd < 0) - { - int save_errno= my_errno; - sql_print_error("Could not open the binary log '%s' for truncation.", - name); - if (save_errno != ENOENT) - sql_print_error("The binary log '%s' should not be used for " - "replication.", name); - DBUG_RETURN(1); - } - - if (pos > (actual_size= my_seek(fd, 0L, MY_SEEK_END, MYF(MY_WME)))) - { - /* - Note that when we have MyISAM rollback this error message should be - reconsidered. - */ - sql_print_error("The binary log '%s' is shorter than its expected size " - "(actual: %s, expected: %s) so it misses at least one " - "committed transaction; so it should not be used for " - "replication or point-in-time recovery. You would need " - "to restart slaves from a fresh master's data " - "snapshot ", - name, llstr(actual_size, llbuf1), - llstr(pos, llbuf2)); - error= 1; - goto err; - } - if (pos < actual_size) - { - sql_print_error("The binary log '%s' is bigger than its expected size " - "(actual: %s, expected: %s) so it contains a rolled back " - "transaction; now truncating that.", name, - llstr(actual_size, llbuf1), llstr(pos, llbuf2)); - /* - As on some OS, my_chsize() can only pad with 0s instead of really - truncating. Then mysqlbinlog (and Binlog_dump thread) will error on - these zeroes. This is annoying, but not more (you just need to manually - switch replication to the next binlog). Fortunately, in my_chsize.c, it - says that all modern machines support real ftruncate(). - - */ - if ((error= my_chsize(fd, pos, 0, MYF(MY_WME)))) - goto err; - } -err: - if (my_close(fd, MYF(MY_WME))) - error= 1; -#endif - DBUG_RETURN(error); -} - - -/* - If the server has InnoDB on, store the binlog name and position into - InnoDB. This function is used every time we create a new binlog. - - SYNOPSIS - report_pos_in_innodb() - - NOTES - This cannot simply be done in MYSQL_LOG::open(), because when we create - the first binlog at startup, we have not called ha_init() yet so we cannot - write into InnoDB yet. - - RETURN VALUES - 1 Error - 0 Ok -*/ - -void MYSQL_LOG::report_pos_in_innodb() -{ - DBUG_ENTER("report_pos_in_innodb"); -#ifdef HAVE_INNOBASE_DB - if (is_open() && have_innodb == SHOW_OPTION_YES) - { - DBUG_PRINT("info", ("Reporting binlog info into InnoDB - " - "name: '%s' position: %d", - log_file_name, my_b_tell(&log_file))); - innobase_store_binlog_offset_and_flush_log(log_file_name, - my_b_tell(&log_file)); - } -#endif - DBUG_VOID_RETURN; -} - - void MYSQL_LOG::signal_update() { DBUG_ENTER("MYSQL_LOG::signal_update"); @@ -2309,7 +2351,7 @@ void print_buffer_to_nt_eventlog(enum loglevel level, char *buff, vprint_msg_to_log() event_type Type of event to write (Error, Warning, or Info) format Printf style format of message - args va_list list of arguments for the message + args va_list list of arguments for the message NOTE @@ -2375,3 +2417,630 @@ void sql_print_information(const char *format, ...) DBUG_VOID_RETURN; } + +/********* transaction coordinator log for 2pc - mmap() based solution *******/ + +/* + the log consists of a file, mmapped to a memory. + file is divided on pages of tc_log_page_size size. + (usable size of the first page is smaller because of log header) + there's PAGE control structure for each page + each page (or rather PAGE control structure) can be in one of three + states - active, syncing, pool. + there could be only one page in active or syncing states, + but many in pool - pool is fifo queue. + usual lifecycle of a page is pool->active->syncing->pool + "active" page - is a page where new xid's are logged. + the page stays active as long as syncing slot is taken. + "syncing" page is being synced to disk. no new xid can be added to it. + when the sync is done the page is moved to a pool and an active page + becomes "syncing". + + the result of such an architecture is a natural "commit grouping" - + If commits are coming faster than the system can sync, they do not + stall. Instead, all commit that came since the last sync are + logged to the same page, and they all are synced with the next - + one - sync. Thus, thought individual commits are delayed, throughput + is not decreasing. + + when a xid is added to an active page, the thread of this xid waits + for a page's condition until the page is synced. when syncing slot + becomes vacant one of these waiters is awaken to take care of syncing. + it syncs the page and signals all waiters that the page is synced. + PAGE::waiters is used to count these waiters, and a page may never + become active again until waiters==0 (that is all waiters from the + previous sync have noticed the sync was completed) + + note, that the page becomes "dirty" and has to be synced only when a + new xid is added into it. Removing a xid from a page does not make it + dirty - we don't sync removals to disk. +*/ +#define TC_LOG_HEADER_SIZE (sizeof(tc_log_magic)+1) + +static const char tc_log_magic[]={254, 0x23, 0x05, 0x74}; + +uint opt_tc_log_size=TC_LOG_MIN_SIZE; +uint tc_log_max_pages_used=0, tc_log_page_size=0, + tc_log_page_waits=0, tc_log_cur_pages_used=0; + +TC_LOG *tc_log; +TC_LOG_MMAP tc_log_mmap; +TC_LOG_DUMMY tc_log_dummy; + +int TC_LOG_MMAP::open(const char *opt_name) +{ + uint i; + bool crashed=FALSE; + PAGE *pg; + + DBUG_ASSERT(total_ha_2pc > 1); + DBUG_ASSERT(opt_name && opt_name[0]); + +#ifdef HAVE_GETPAGESIZE + tc_log_page_size=getpagesize(); + DBUG_ASSERT(TC_LOG_PAGE_SIZE % tc_log_page_size == 0); +#else + tc_log_page_size=TC_LOG_PAGE_SIZE; +#endif + + fn_format(logname,opt_name,mysql_data_home,"",MY_UNPACK_FILENAME); + fd= my_open(logname, O_RDWR, MYF(0)); + if (fd == -1) + { + if (my_errno != ENOENT) + goto err; + if (using_heuristic_recover()) + return 1; + fd= my_create(logname, O_RDWR, 0, MYF(MY_WME)); + if (fd == -1) + goto err; + inited=1; + file_length= opt_tc_log_size; + if (my_chsize(fd, file_length, 0, MYF(MY_WME))) + goto err; + } + else + { + inited= 1; + crashed= TRUE; + sql_print_information("Recovering after a crash"); + if (tc_heuristic_recover) + { + sql_print_error("Cannot perform automatic crash recovery when " + "--tc-heuristic-recover is used"); + goto err; + } + file_length= my_seek(fd, 0L, MY_SEEK_END, MYF(MY_WME+MY_FAE)); + if (file_length == MY_FILEPOS_ERROR || file_length % tc_log_page_size) + goto err; + } + + data= (uchar *)my_mmap(0, file_length, PROT_READ|PROT_WRITE, + MAP_NOSYNC|MAP_SHARED, fd, 0); + if (data == MAP_FAILED) + { + my_errno=errno; + goto err; + } + inited=2; + + npages=file_length/tc_log_page_size; + DBUG_ASSERT(npages >= 3); // to guarantee non-empty pool + if (!(pages=(PAGE *)my_malloc(npages*sizeof(PAGE), MYF(MY_WME|MY_ZEROFILL)))) + goto err; + inited=3; + for (pg=pages, i=0; i < npages; i++, pg++) + { + pg->next=pg+1; + pg->waiters=0; + pg->state=POOL; + pthread_mutex_init(&pg->lock, MY_MUTEX_INIT_FAST); + pthread_cond_init (&pg->cond, 0); + pg->start=(my_xid *)(data + i*tc_log_page_size); + pg->end=(my_xid *)(pg->start + tc_log_page_size); + pg->size=pg->free=tc_log_page_size/sizeof(my_xid); + } + pages[0].size=pages[0].free= + (tc_log_page_size-TC_LOG_HEADER_SIZE)/sizeof(my_xid); + pages[0].start=pages[0].end-pages[0].size; + pages[npages-1].next=0; + inited=4; + + if (crashed && recover()) + goto err; + + memcpy(data, tc_log_magic, sizeof(tc_log_magic)); + data[sizeof(tc_log_magic)]= total_ha_2pc; + my_msync(fd, data, tc_log_page_size, MS_SYNC); + inited=5; + + pthread_mutex_init(&LOCK_sync, MY_MUTEX_INIT_FAST); + pthread_mutex_init(&LOCK_active, MY_MUTEX_INIT_FAST); + pthread_mutex_init(&LOCK_pool, MY_MUTEX_INIT_FAST); + pthread_cond_init(&COND_active, 0); + pthread_cond_init(&COND_pool, 0); + + inited=6; + + syncing= 0; + active=pages; + pool=pages+1; + pool_last=pages+npages-1; + + return 0; + +err: + close(); + return 1; +} + +/* + there is no active page, let's got one from the pool + + two strategies here: + 1. take the first from the pool + 2. if there're waiters - take the one with the most free space + + TODO page merging. try to allocate adjacent page first, + so that they can be flushed both in one sync +*/ +void TC_LOG_MMAP::get_active_from_pool() +{ + PAGE **p, **best_p=0; + int best_free; + + if (syncing) + pthread_mutex_lock(&LOCK_pool); + + do + { + best_p= p= &pool; + if ((*p)->waiters == 0) // can the first page be used ? + break; // yes - take it. + + best_free=0; // no - trying second strategy + for (p=&(*p)->next; *p; p=&(*p)->next) + { + if ((*p)->waiters == 0 && (*p)->free > best_free) + { + best_free=(*p)->free; + best_p=p; + } + } + } + while ((*best_p == 0 || best_free == 0) && overflow()); + + active=*best_p; + if (active->free == active->size) // we've chosen an empty page + { + tc_log_cur_pages_used++; + set_if_bigger(tc_log_max_pages_used, tc_log_cur_pages_used); + } + + if ((*best_p)->next) // unlink the page from the pool + *best_p=(*best_p)->next; + else + pool_last=*best_p; + + if (syncing) + pthread_mutex_unlock(&LOCK_pool); +} + +int TC_LOG_MMAP::overflow() +{ + /* + simple overflow handling - just wait + TODO perhaps, increase log size ? + let's check the behaviour of tc_log_page_waits first + */ + tc_log_page_waits++; + pthread_cond_wait(&COND_pool, &LOCK_pool); + return 1; // always return 1 +} + +/* + all access to active page is serialized but it's not a problem, as + we're assuming that fsync() will be a main bottleneck. + That is, parallelizing writes to log pages we'll decrease number of + threads waiting for a page, but then all these threads will be waiting + for a fsync() anyway + + RETURN + 0 - error + otherwise - "cookie", a number that will be passed as an argument + to unlog() call. tc_log can define it any way it wants, + and use for whatever purposes. TC_LOG_MMAP sets it + to the position in memory where xid was logged to. +*/ + +int TC_LOG_MMAP::log(THD *thd, my_xid xid) +{ + int err; + PAGE *p; + ulong cookie; + + pthread_mutex_lock(&LOCK_active); + + /* + if active page is full - just wait... + frankly speaking, active->free here accessed outside of mutex + protection, but it's safe, because it only means we may miss an + unlog() for the active page, and we're not waiting for it here - + unlog() does not signal COND_active. + */ + while (unlikely(active && active->free == 0)) + pthread_cond_wait(&COND_active, &LOCK_active); + + /* no active page ? take one from the pool */ + if (active == 0) + get_active_from_pool(); + + p=active; + pthread_mutex_lock(&p->lock); + + /* searching for an empty slot */ + while (*p->ptr) + { + p->ptr++; + DBUG_ASSERT(p->ptr < p->end); // because p->free > 0 + } + + /* found! store xid there and mark the page dirty */ + cookie= (ulong)((uchar *)p->ptr - data); // can never be zero + *p->ptr++= xid; + p->free--; + p->state= DIRTY; + + /* to sync or not to sync - this is the question */ + pthread_mutex_unlock(&LOCK_active); + pthread_mutex_lock(&LOCK_sync); + pthread_mutex_unlock(&p->lock); + + if (syncing) + { // somebody's syncing. let's wait + p->waiters++; + /* + note - it must be while(), not do ... while() here + as p->state may be not DIRTY when we come here + */ + while (p->state == DIRTY && syncing) + pthread_cond_wait(&p->cond, &LOCK_sync); + p->waiters--; + err= p->state == ERROR; + if (p->state != DIRTY) // page was synced + { + if (p->waiters == 0) + pthread_cond_signal(&COND_pool); // in case somebody's waiting + pthread_mutex_unlock(&LOCK_sync); + goto done; // we're done + } + } // page was not synced! do it now + DBUG_ASSERT(active == p && syncing == 0); + pthread_mutex_lock(&LOCK_active); + syncing=p; // place is vacant - take it + active=0; // page is not active anymore + pthread_cond_broadcast(&COND_active); // in case somebody's waiting + pthread_mutex_unlock(&LOCK_active); + pthread_mutex_unlock(&LOCK_sync); + err= sync(); + +done: + return err ? 0 : cookie; +} + +int TC_LOG_MMAP::sync() +{ + int err; + + DBUG_ASSERT(syncing != active); + + /* + sit down and relax - this can take a while... + note - no locks are held at this point + */ + err= my_msync(fd, syncing->start, 1, MS_SYNC); + + /* page is synced. let's move it to the pool */ + pthread_mutex_lock(&LOCK_pool); + pool_last->next=syncing; + pool_last=syncing; + syncing->next=0; + syncing->state= err ? ERROR : POOL; + pthread_cond_broadcast(&syncing->cond); // signal "sync done" + pthread_cond_signal(&COND_pool); // in case somebody's waiting + pthread_mutex_unlock(&LOCK_pool); + + /* marking 'syncing' slot free */ + pthread_mutex_lock(&LOCK_sync); + syncing=0; + pthread_cond_signal(&active->cond); // wake up a new syncer + pthread_mutex_unlock(&LOCK_sync); + return err; +} + +/* + erase xid from the page, update page free space counters/pointers. + cookie points directly to the memory where xid was logged +*/ +void TC_LOG_MMAP::unlog(ulong cookie, my_xid xid) +{ + PAGE *p=pages+(cookie/tc_log_page_size); + my_xid *x=(my_xid *)(data+cookie); + + DBUG_ASSERT(*x == xid); + DBUG_ASSERT(x >= p->start && x < p->end); + *x=0; + + pthread_mutex_lock(&p->lock); + p->free++; + DBUG_ASSERT(p->free <= p->size); + set_if_smaller(p->ptr, x); + if (p->free == p->size) // the page is completely empty + statistic_decrement(tc_log_cur_pages_used, &LOCK_status); + if (p->waiters == 0) // the page is in pool and ready to rock + pthread_cond_signal(&COND_pool); // ping ... for overflow() + pthread_mutex_unlock(&p->lock); +} + +void TC_LOG_MMAP::close() +{ + switch (inited) { + case 6: + pthread_mutex_destroy(&LOCK_sync); + pthread_mutex_destroy(&LOCK_active); + pthread_mutex_destroy(&LOCK_pool); + pthread_cond_destroy(&COND_pool); + case 5: + data[0]='A'; // garble the first (signature) byte, in case my_delete fails + case 4: + for (uint i=0; i < npages; i++) + { + if (pages[i].ptr == 0) + break; + pthread_mutex_destroy(&pages[i].lock); + pthread_cond_destroy(&pages[i].cond); + } + case 3: + my_free((gptr)pages, MYF(0)); + case 2: + my_munmap(data, file_length); + case 1: + my_close(fd, MYF(0)); + } + if (inited>=5) // cannot do in the switch because of Windows + my_delete(logname, MYF(MY_WME)); + inited=0; +} + +int TC_LOG_MMAP::recover() +{ + HASH xids; + PAGE *p=pages, *end_p=pages+npages; + + if (memcmp(data, tc_log_magic, sizeof(tc_log_magic))) + { + sql_print_error("Bad magic header in tc log"); + goto err1; + } + + /* + the first byte after magic signature is set to current + number of storage engines on startup + */ + if (data[sizeof(tc_log_magic)] != total_ha_2pc) + { + sql_print_error("Recovery failed! You must have enabled " + "exactly %d storage engines that support " + "two-phase commit protocol", + data[sizeof(tc_log_magic)]); + goto err1; + } + + if (hash_init(&xids, &my_charset_bin, tc_log_page_size/3, 0, + sizeof(my_xid), 0, 0, MYF(0))) + goto err1; + + for ( ; p < end_p ; p++) + { + for (my_xid *x=p->start; x < p->end; x++) + if (*x && my_hash_insert(&xids, (byte *)x)) + goto err2; // OOM + } + + if (ha_recover(&xids)) + goto err2; + + hash_free(&xids); + bzero(data, file_length); + return 0; + +err2: + hash_free(&xids); +err1: + sql_print_error("Crash recovery failed. Either correct the problem " + "(if it's, for example, out of memory error) and restart, " + "or delete tc log and start mysqld with " + "--tc-heuristic-recover={commit|rollback}"); + return 1; +} + +/* + Perform heuristic recovery, if --tc-heuristic-recover was used + + RETURN VALUE + 0 no heuristic recovery was requested + 1 heuristic recovery was performed + + NOTE + no matter whether heuristic recovery was successful or not + mysqld must exit. So, return value is the same in both cases. +*/ + +int TC_LOG::using_heuristic_recover() +{ + if (!tc_heuristic_recover) + return 0; + + sql_print_information("Heuristic crash recovery mode"); + if (ha_recover(0)) + sql_print_error("Heuristic crash recovery failed"); + sql_print_information("Please restart mysqld without --tc-heuristic-recover"); + return 1; +} + +/****** transaction coordinator log for 2pc - binlog() based solution ******/ +#define TC_LOG_BINLOG MYSQL_LOG + +/* + TODO keep in-memory list of prepared transactions + (add to list in log(), remove on unlog()) + and copy it to the new binlog if rotated + but let's check the behaviour of tc_log_page_waits first! +*/ + +int TC_LOG_BINLOG::open(const char *opt_name) +{ + LOG_INFO log_info; + int error= 1; + + DBUG_ASSERT(total_ha_2pc > 1); + DBUG_ASSERT(opt_name && opt_name[0]); + + pthread_mutex_init(&LOCK_prep_xids, MY_MUTEX_INIT_FAST); + pthread_cond_init (&COND_prep_xids, 0); + + if (using_heuristic_recover()) + return 1; + + if ((error= find_log_pos(&log_info, NullS, 1))) + { + if (error != LOG_INFO_EOF) + sql_print_error("find_log_pos() failed (error: %d)", error); + else + error= 0; + goto err; + } + + { + const char *errmsg; + char last_event_type=UNKNOWN_EVENT; + IO_CACHE log; + File file; + Log_event *ev=0; + Format_description_log_event fdle(BINLOG_VERSION); + char log_name[FN_REFLEN]; + + if (! fdle.is_valid()) + goto err; + + for (error= 0; !error ;) + { + strnmov(log_name, log_info.log_file_name, sizeof(log_name)); + if ((error= find_next_log(&log_info, 1)) != LOG_INFO_EOF) + { + sql_print_error("find_log_pos() failed (error: %d)", error); + goto err; + } + } + + if ((file= open_binlog(&log, log_name, &errmsg)) < 0) + { + sql_print_error("%s", errmsg); + goto err; + } + + if ((ev= Log_event::read_log_event(&log, 0, &fdle)) && + ev->get_type_code() == FORMAT_DESCRIPTION_EVENT && + ev->flags & LOG_EVENT_BINLOG_IN_USE_F) + error= recover(&log, (Format_description_log_event *)ev); + else + error=0; + + delete ev; + end_io_cache(&log); + my_close(file, MYF(MY_WME)); + + if (error) + goto err; + } + +err: + return error; +} + +/* this is called on shutdown, after ha_panic */ +void TC_LOG_BINLOG::close() +{ + DBUG_ASSERT(prepared_xids==0); + pthread_mutex_destroy(&LOCK_prep_xids); + pthread_cond_destroy (&COND_prep_xids); +} + +/* + TODO group commit + + RETURN + 0 - error + 1 - success +*/ +int TC_LOG_BINLOG::log(THD *thd, my_xid xid) +{ + Xid_log_event xle(thd, xid); + if (xle.write((IO_CACHE*)thd->ha_data[binlog_hton.slot])) + return 0; + thread_safe_increment(prepared_xids, &LOCK_prep_xids); + return !binlog_commit(thd,1); // invert return value +} + +void TC_LOG_BINLOG::unlog(ulong cookie, my_xid xid) +{ + if (thread_safe_dec_and_test(prepared_xids, &LOCK_prep_xids)) + pthread_cond_signal(&COND_prep_xids); +} + +int TC_LOG_BINLOG::recover(IO_CACHE *log, Format_description_log_event *fdle) +{ + Log_event *ev; + HASH xids; + MEM_ROOT mem_root; + + if (! fdle->is_valid() || + hash_init(&xids, &my_charset_bin, tc_log_page_size/3, 0, + sizeof(my_xid), 0, 0, MYF(0))) + goto err1; + + init_alloc_root(&mem_root, tc_log_page_size, tc_log_page_size); + + fdle->flags&= ~LOG_EVENT_BINLOG_IN_USE_F; // abort on the first error + + while ((ev= Log_event::read_log_event(log,0,fdle)) && ev->is_valid()) + { + if (ev->get_type_code() == XID_EVENT) + { + Xid_log_event *xev=(Xid_log_event *)ev; + byte *x=memdup_root(&mem_root, (char *)& xev->xid, sizeof(xev->xid)); + if (! x) + goto err2; + my_hash_insert(&xids, x); + } + delete ev; + } + + if (ha_recover(&xids)) + goto err2; + + free_root(&mem_root, MYF(0)); + hash_free(&xids); + return 0; + +err2: + free_root(&mem_root, MYF(0)); + hash_free(&xids); +err1: + sql_print_error("Crash recovery failed. Either correct the problem " + "(if it's, for example, out of memory error) and restart, " + "or delete (or rename) binary log and start mysqld with " + "--tc-heuristic-recover={commit|rollback}"); + return 1; +} + diff --git a/sql/log_event.cc b/sql/log_event.cc index d2a0e8642f9..142e58ec9d5 100644 --- a/sql/log_event.cc +++ b/sql/log_event.cc @@ -269,9 +269,10 @@ const char* Log_event::get_type_str() case DELETE_FILE_EVENT: return "Delete_file"; case EXEC_LOAD_EVENT: return "Exec_load"; case RAND_EVENT: return "RAND"; + case XID_EVENT: return "Xid"; case USER_VAR_EVENT: return "User var"; case FORMAT_DESCRIPTION_EVENT: return "Format_desc"; - default: return "Unknown"; /* impossible */ + default: return "Unknown"; /* impossible */ } } @@ -286,16 +287,15 @@ Log_event::Log_event(THD* thd_arg, uint16 flags_arg, bool using_trans) { server_id= thd->server_id; when= thd->start_time; - cache_stmt= (using_trans && - (thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))); + cache_stmt= using_trans; } /* - This minimal constructor is for when you are not even sure that there is a - valid THD. For example in the server when we are shutting down or flushing - logs after receiving a SIGHUP (then we must write a Rotate to the binlog but - we have no THD, so we need this minimal constructor). + This minimal constructor is for when you are not even sure that there + is a valid THD. For example in the server when we are shutting down or + flushing logs after receiving a SIGHUP (then we must write a Rotate to + the binlog but we have no THD, so we need this minimal constructor). */ Log_event::Log_event() @@ -314,12 +314,12 @@ Log_event::Log_event() */ Log_event::Log_event(const char* buf, - const Format_description_log_event* description_event) + const Format_description_log_event* description_event) :temp_buf(0), cache_stmt(0) { #ifndef MYSQL_CLIENT thd = 0; -#endif +#endif when = uint4korr(buf); server_id = uint4korr(buf + SERVER_ID_OFFSET); if (description_event->binlog_version==1) @@ -331,14 +331,14 @@ Log_event::Log_event(const char* buf, /* 4.0 or newer */ log_pos= uint4korr(buf + LOG_POS_OFFSET); /* - If the log is 4.0 (so here it can only be a 4.0 relay log read by the SQL - thread or a 4.0 master binlog read by the I/O thread), log_pos is the - beginning of the event: we transform it into the end of the event, which is - more useful. - But how do you know that the log is 4.0: you know it if description_event - is version 3 *and* you are not reading a Format_desc (remember that - mysqlbinlog starts by assuming that 5.0 logs are in 4.0 format, until it - finds a Format_desc). + If the log is 4.0 (so here it can only be a 4.0 relay log read by + the SQL thread or a 4.0 master binlog read by the I/O thread), + log_pos is the beginning of the event: we transform it into the end + of the event, which is more useful. + But how do you know that the log is 4.0: you know it if + description_event is version 3 *and* you are not reading a + Format_desc (remember that mysqlbinlog starts by assuming that 5.0 + logs are in 4.0 format, until it finds a Format_desc). */ if (description_event->binlog_version==3 && buf[EVENT_TYPE_OFFSET]<FORMAT_DESCRIPTION_EVENT && log_pos) @@ -346,13 +346,13 @@ Log_event::Log_event(const char* buf, /* If log_pos=0, don't change it. log_pos==0 is a marker to mean "don't change rli->group_master_log_pos" (see - inc_group_relay_log_pos()). As it is unreal log_pos, adding the event - len's is nonsense. For example, a fake Rotate event should + inc_group_relay_log_pos()). As it is unreal log_pos, adding the + event len's is nonsense. For example, a fake Rotate event should not have its log_pos (which is 0) changed or it will modify - Exec_master_log_pos in SHOW SLAVE STATUS, displaying a nonsense value - of (a non-zero offset which does not exist in the master's binlog, so - which will cause problems if the user uses this value in - CHANGE MASTER). + Exec_master_log_pos in SHOW SLAVE STATUS, displaying a nonsense + value of (a non-zero offset which does not exist in the master's + binlog, so which will cause problems if the user uses this value + in CHANGE MASTER). */ log_pos+= uint4korr(buf + EVENT_LEN_OFFSET); } @@ -363,16 +363,17 @@ Log_event::Log_event(const char* buf, (buf[EVENT_TYPE_OFFSET] == ROTATE_EVENT)) { /* - These events always have a header which stops here (i.e. their header is - FROZEN). + These events always have a header which stops here (i.e. their + header is FROZEN). */ /* - Initialization to zero of all other Log_event members as they're not - specified. Currently there are no such members; in the future there will - be an event UID (but Format_description and Rotate don't need this UID, - as they are not propagated through --log-slave-updates (remember the UID - is used to not play a query twice when you have two masters which are - slaves of a 3rd master). Then we are done. + Initialization to zero of all other Log_event members as they're + not specified. Currently there are no such members; in the future + there will be an event UID (but Format_description and Rotate + don't need this UID, as they are not propagated through + --log-slave-updates (remember the UID is used to not play a query + twice when you have two masters which are slaves of a 3rd master). + Then we are done. */ return; } @@ -405,10 +406,10 @@ int Log_event::exec_event(struct st_relay_log_info* rli) if (rli) { /* - If in a transaction, and if the slave supports transactions, - just inc_event_relay_log_pos(). We only have to check for OPTION_BEGIN - (not OPTION_NOT_AUTOCOMMIT) as transactions are logged - with BEGIN/COMMIT, not with SET AUTOCOMMIT= . + If in a transaction, and if the slave supports transactions, just + inc_event_relay_log_pos(). We only have to check for OPTION_BEGIN + (not OPTION_NOT_AUTOCOMMIT) as transactions are logged with + BEGIN/COMMIT, not with SET AUTOCOMMIT= . CAUTION: opt_using_transactions means innodb || bdb ; suppose the master supports InnoDB and BDB, @@ -416,17 +417,18 @@ int Log_event::exec_event(struct st_relay_log_info* rli) will arise: - suppose an InnoDB table is created on the master, - then it will be MyISAM on the slave - - but as opt_using_transactions is true, the slave will believe he is - transactional with the MyISAM table. And problems will come when one - does START SLAVE; STOP SLAVE; START SLAVE; (the slave will resume at - BEGIN whereas there has not been any rollback). This is the problem of - using opt_using_transactions instead of a finer - "does the slave support _the_transactional_handler_used_on_the_master_". - - More generally, we'll have problems when a query mixes a transactional - handler and MyISAM and STOP SLAVE is issued in the middle of the - "transaction". START SLAVE will resume at BEGIN while the MyISAM table - has already been updated. + - but as opt_using_transactions is true, the slave will believe he + is transactional with the MyISAM table. And problems will come + when one does START SLAVE; STOP SLAVE; START SLAVE; (the slave + will resume at BEGIN whereas there has not been any rollback). + This is the problem of using opt_using_transactions instead of a + finer "does the slave support + _the_transactional_handler_used_on_the_master_". + + More generally, we'll have problems when a query mixes a + transactional handler and MyISAM and STOP SLAVE is issued in the + middle of the "transaction". START SLAVE will resume at BEGIN + while the MyISAM table has already been updated. */ if ((thd->options & OPTION_BEGIN) && opt_using_transactions) rli->inc_event_relay_log_pos(); @@ -435,8 +437,8 @@ int Log_event::exec_event(struct st_relay_log_info* rli) rli->inc_group_relay_log_pos(log_pos); flush_relay_log_info(rli); /* - Note that Rotate_log_event::exec_event() does not call this function, - so there is no chance that a fake rotate event resets + Note that Rotate_log_event::exec_event() does not call this + function, so there is no chance that a fake rotate event resets last_master_timestamp. Note that we update without mutex (probably ok - except in some very rare cases, only consequence is that value may take some time to @@ -649,11 +651,9 @@ end: #ifndef MYSQL_CLIENT #define UNLOCK_MUTEX if (log_lock) pthread_mutex_unlock(log_lock); #define LOCK_MUTEX if (log_lock) pthread_mutex_lock(log_lock); -#define max_allowed_packet current_thd->variables.max_allowed_packet #else #define UNLOCK_MUTEX #define LOCK_MUTEX -#define max_allowed_packet (*mysql_get_parameters()->p_max_allowed_packet) #endif /* @@ -670,16 +670,17 @@ Log_event* Log_event::read_log_event(IO_CACHE* file, #else Log_event* Log_event::read_log_event(IO_CACHE* file, const Format_description_log_event *description_event) -#endif +#endif { + DBUG_ENTER("Log_event::read_log_event(IO_CACHE *, Format_description_log_event *"); DBUG_ASSERT(description_event); char head[LOG_EVENT_MINIMAL_HEADER_LEN]; /* First we only want to read at most LOG_EVENT_MINIMAL_HEADER_LEN, just to check the event for sanity and to know its length; no need to really parse it. We say "at most" because this could be a 3.23 master, which has header - of 13 bytes, whereas LOG_EVENT_MINIMAL_HEADER_LEN is 19 bytes (it's "minimal" - over the set {MySQL >=4.0}). + of 13 bytes, whereas LOG_EVENT_MINIMAL_HEADER_LEN is 19 bytes (it's + "minimal" over the set {MySQL >=4.0}). */ uint header_size= min(description_event->common_header_len, LOG_EVENT_MINIMAL_HEADER_LEN); @@ -692,17 +693,21 @@ Log_event* Log_event::read_log_event(IO_CACHE* file, failed my_b_read")); UNLOCK_MUTEX; /* - No error here; it could be that we are at the file's end. However if the - next my_b_read() fails (below), it will be an error as we were able to - read the first bytes. + No error here; it could be that we are at the file's end. However + if the next my_b_read() fails (below), it will be an error as we + were able to read the first bytes. */ - return 0; + DBUG_RETURN(0); } uint data_len = uint4korr(head + EVENT_LEN_OFFSET); char *buf= 0; const char *error= 0; Log_event *res= 0; +#ifndef max_allowed_packet + THD *thd=current_thd; + uint max_allowed_packet= thd ? thd->variables.max_allowed_packet : ~0; +#endif if (data_len > max_allowed_packet) { @@ -729,16 +734,16 @@ failed my_b_read")); error = "read error"; goto err; } - if ((res= read_log_event(buf, data_len, &error, - description_event))) + if ((res= read_log_event(buf, data_len, &error, description_event))) res->register_temp_buf(buf); err: UNLOCK_MUTEX; - if (error) + if (!res) { - sql_print_error("\ -Error in Log_event::read_log_event(): '%s', data_len: %d, event_type: %d", + DBUG_ASSERT(error); + sql_print_error("Error in Log_event::read_log_event(): " + "'%s', data_len: %d, event_type: %d", error,data_len,head[EVENT_TYPE_OFFSET]); my_free(buf, MYF(MY_ALLOW_ZERO_PTR)); /* @@ -751,7 +756,7 @@ Error in Log_event::read_log_event(): '%s', data_len: %d, event_type: %d", */ file->error= -1; } - return res; + DBUG_RETURN(res); } @@ -775,7 +780,7 @@ Log_event* Log_event::read_log_event(const char* buf, uint event_len, *error="Sanity check failed"; // Needed to free buffer DBUG_RETURN(NULL); // general sanity check - will fail on a partial read } - + switch(buf[EVENT_TYPE_OFFSET]) { case QUERY_EVENT: ev = new Query_log_event(buf, event_len, description_event); @@ -809,14 +814,15 @@ Log_event* Log_event::read_log_event(const char* buf, uint event_len, case START_EVENT_V3: /* this is sent only by MySQL <=4.x */ ev = new Start_log_event_v3(buf, description_event); break; -#ifdef HAVE_REPLICATION case STOP_EVENT: ev = new Stop_log_event(buf, description_event); break; -#endif /* HAVE_REPLICATION */ case INTVAR_EVENT: ev = new Intvar_log_event(buf, description_event); break; + case XID_EVENT: + ev = new Xid_log_event(buf, description_event); + break; case RAND_EVENT: ev = new Rand_log_event(buf, description_event); break; @@ -831,14 +837,15 @@ Log_event* Log_event::read_log_event(const char* buf, uint event_len, ev= NULL; break; } + /* - is_valid() are small event-specific sanity tests which are important; for - example there are some my_malloc() in constructors - (e.g. Query_log_event::Query_log_event(char*...)); when these my_malloc() - fail we can't return an error out of the constructor (because constructor - is "void") ; so instead we leave the pointer we wanted to allocate - (e.g. 'query') to 0 and we test it in is_valid(). Same for - Format_description_log_event, member 'post_header_len'. + is_valid() are small event-specific sanity tests which are + important; for example there are some my_malloc() in constructors + (e.g. Query_log_event::Query_log_event(char*...)); when these + my_malloc() fail we can't return an error out of the constructor + (because constructor is "void") ; so instead we leave the pointer we + wanted to allocate (e.g. 'query') to 0 and we test it in is_valid(). + Same for Format_description_log_event, member 'post_header_len'. */ if (!ev || !ev->is_valid()) { @@ -1082,8 +1089,8 @@ bool Query_log_event::write(IO_CACHE* file) return (write_header(file, event_length) || my_b_safe_write(file, (byte*) buf, (uint) (start-buf)) || - my_b_safe_write(file, (db) ? (byte*) db : (byte*)"", db_len + 1) || - my_b_safe_write(file, (byte*) query, q_len)) ? 1 : 0; + my_b_safe_write(file, (db) ? (byte*) db : (byte*)"", db_len + 1) || + my_b_safe_write(file, (byte*) query, q_len)) ? 1 : 0; } @@ -1095,7 +1102,7 @@ bool Query_log_event::write(IO_CACHE* file) Query_log_event::Query_log_event(THD* thd_arg, const char* query_arg, ulong query_length, bool using_trans, bool suppress_use) - :Log_event(thd_arg, + :Log_event(thd_arg, ((thd_arg->tmp_table_used ? LOG_EVENT_THREAD_SPECIFIC_F : 0) | (suppress_use ? LOG_EVENT_SUPPRESS_USE_F : 0)), using_trans), @@ -1303,18 +1310,12 @@ void Query_log_event::print(FILE* file, bool short_form, my_fwrite(file, (byte*) buff, (uint) (end-buff),MYF(MY_NABP | MY_WME)); if (flags & LOG_EVENT_THREAD_SPECIFIC_F) fprintf(file,"SET @@session.pseudo_thread_id=%lu;\n",(ulong)thread_id); + /* - Now the session variables; - it's more efficient to pass SQL_MODE as a number instead of a - comma-separated list. - FOREIGN_KEY_CHECKS, SQL_AUTO_IS_NULL, UNIQUE_CHECKS are session-only - variables (they have no global version; they're not listed in sql_class.h), - The tests below work for pure binlogs or pure relay logs. Won't work for - mixed relay logs but we don't create mixed relay logs (that is, there is no - relay log with a format change except within the 3 first events, which - mysqlbinlog handles gracefully). So this code should always be good. + If flags2_inited==0, this is an event from 3.23 or 4.0; nothing to + print (remember we don't produce mixed relay logs so there cannot be + 5.0 events before that one so there is nothing to reset). */ - if (likely(flags2_inited)) /* likely as this will mainly read 5.0 logs */ { /* tmp is a bitmask of bits which have changed. */ @@ -1343,9 +1344,16 @@ void Query_log_event::print(FILE* file, bool short_form, } /* - If flags2_inited==0, this is an event from 3.23 or 4.0; nothing to print - (remember we don't produce mixed relay logs so there cannot be 5.0 events - before that one so there is nothing to reset). + Now the session variables; + it's more efficient to pass SQL_MODE as a number instead of a + comma-separated list. + FOREIGN_KEY_CHECKS, SQL_AUTO_IS_NULL, UNIQUE_CHECKS are session-only + variables (they have no global version; they're not listed in + sql_class.h), The tests below work for pure binlogs or pure relay + logs. Won't work for mixed relay logs but we don't create mixed + relay logs (that is, there is no relay log with a format change + except within the 3 first events, which mysqlbinlog handles + gracefully). So this code should always be good. */ if (likely(sql_mode_inited)) @@ -1439,7 +1447,7 @@ int Query_log_event::exec_event(struct st_relay_log_info* rli) thd->query_length= q_len; thd->query = (char*)query; VOID(pthread_mutex_lock(&LOCK_thread_count)); - thd->query_id = query_id++; + thd->query_id = next_query_id(); VOID(pthread_mutex_unlock(&LOCK_thread_count)); thd->variables.pseudo_thread_id= thread_id; // for temp tables mysql_log.write(thd,COM_QUERY,"%s",thd->query); @@ -1759,15 +1767,19 @@ int Start_log_event_v3::exec_event(struct st_relay_log_info* rli) } /* As a transaction NEVER spans on 2 or more binlogs: - if we have an active transaction at this point, the master died while - writing the transaction to the binary log, i.e. while flushing the binlog - cache to the binlog. As the write was started, the transaction had been - committed on the master, so we lack of information to replay this - transaction on the slave; all we can do is stop with error. - Note: this event could be sent by the master to inform us of the format - of its binlog; in other words maybe it is not at its original place when - it comes to us; we'll know this by checking log_pos ("artificial" events - have log_pos == 0). + if we have an active transaction at this point, the master died + while writing the transaction to the binary log, i.e. while + flushing the binlog cache to the binlog. As the write was started, + the transaction had been committed on the master, so we lack of + information to replay this transaction on the slave; all we can do + is stop with error. + Note: this event could be sent by the master to inform us of the + format of its binlog; in other words maybe it is not at its + original place when it comes to us; we'll know this by checking + log_pos ("artificial" events have log_pos == 0). + + TODO test whether it's really necessary, as slave.cc does ROLLBACK + itself */ if (!artificial_event && (thd->options & OPTION_BEGIN)) { @@ -1836,8 +1848,7 @@ binary log."); */ Format_description_log_event:: -Format_description_log_event(uint8 binlog_ver, - const char* server_ver) +Format_description_log_event(uint8 binlog_ver, const char* server_ver) :Start_log_event_v3() { created= when; @@ -1849,7 +1860,7 @@ Format_description_log_event(uint8 binlog_ver, number_of_event_types= LOG_EVENT_TYPES; /* we'll catch my_malloc() error in is_valid() */ post_header_len=(uint8*) my_malloc(number_of_event_types*sizeof(uint8), - MYF(0)); + MYF(MY_ZEROFILL)); /* This long list of assignments is not beautiful, but I see no way to make it nicer, as the right members are #defines, not array members, so @@ -1859,18 +1870,13 @@ Format_description_log_event(uint8 binlog_ver, { post_header_len[START_EVENT_V3-1]= START_V3_HEADER_LEN; post_header_len[QUERY_EVENT-1]= QUERY_HEADER_LEN; - post_header_len[STOP_EVENT-1]= 0; post_header_len[ROTATE_EVENT-1]= ROTATE_HEADER_LEN; - post_header_len[INTVAR_EVENT-1]= 0; post_header_len[LOAD_EVENT-1]= LOAD_HEADER_LEN; - post_header_len[SLAVE_EVENT-1]= 0; post_header_len[CREATE_FILE_EVENT-1]= CREATE_FILE_HEADER_LEN; post_header_len[APPEND_BLOCK_EVENT-1]= APPEND_BLOCK_HEADER_LEN; post_header_len[EXEC_LOAD_EVENT-1]= EXEC_LOAD_HEADER_LEN; post_header_len[DELETE_FILE_EVENT-1]= DELETE_FILE_HEADER_LEN; post_header_len[NEW_LOAD_EVENT-1]= post_header_len[LOAD_EVENT-1]; - post_header_len[RAND_EVENT-1]= 0; - post_header_len[USER_VAR_EVENT-1]= 0; post_header_len[FORMAT_DESCRIPTION_EVENT-1]= FORMAT_DESCRIPTION_HEADER_LEN; } break; @@ -1957,8 +1963,7 @@ Format_description_log_event(const char* buf, /* If alloc fails, we'll detect it in is_valid() */ post_header_len= (uint8*) my_memdup((byte*)buf+ST_COMMON_HEADER_LEN_OFFSET+1, number_of_event_types* - sizeof(*post_header_len), - MYF(0)); + sizeof(*post_header_len), MYF(0)); DBUG_VOID_RETURN; } @@ -2038,7 +2043,7 @@ int Format_description_log_event::exec_event(struct st_relay_log_info* rli) /************************************************************************** Load_log_event methods General note about Load_log_event: the binlogging of LOAD DATA INFILE is - going to be changed in 5.0 (or maybe in 4.1; not decided yet). + going to be changed in 5.0 (or maybe in 5.1; not decided yet). However, the 5.0 slave could still have to read such events (from a 4.x master), convert them (which just means maybe expand the header, when 5.0 servers have a UID in events) (remember that whatever is after the header @@ -2553,7 +2558,7 @@ int Load_log_event::exec_event(NET* net, struct st_relay_log_info* rli, { thd->set_time((time_t)when); VOID(pthread_mutex_lock(&LOCK_thread_count)); - thd->query_id = query_id++; + thd->query_id = next_query_id(); VOID(pthread_mutex_unlock(&LOCK_thread_count)); /* Initing thd->row_count is not necessary in theory as this variable has no @@ -3069,6 +3074,74 @@ int Rand_log_event::exec_event(struct st_relay_log_info* rli) /************************************************************************** + Xid_log_event methods +**************************************************************************/ + +#if defined(HAVE_REPLICATION) && !defined(MYSQL_CLIENT) +void Xid_log_event::pack_info(Protocol *protocol) +{ + char buf[128], *pos; + pos= strmov(buf, "COMMIT /* xid="); + pos= longlong10_to_str(xid, pos, 10); + pos= strmov(pos, " */"); + protocol->store(buf, (uint) (pos-buf), &my_charset_bin); +} +#endif + +/* + NOTE it's ok not to use int8store here, + as long as xid_t::set(ulonglong) and + xid_t::get_my_xid doesn't do it either + + we don't care about actual values of xids as long as + identical numbers compare identically +*/ +Xid_log_event::Xid_log_event(const char* buf, + const Format_description_log_event* description_event) + :Log_event(buf, description_event) +{ + buf+= description_event->common_header_len; + xid=*((my_xid *)buf); +} + + +bool Xid_log_event::write(IO_CACHE* file) +{ + return write_header(file, sizeof(xid)) || + my_b_safe_write(file, (byte*) &xid, sizeof(xid)); +} + + +#ifdef MYSQL_CLIENT +void Xid_log_event::print(FILE* file, bool short_form, LAST_EVENT_INFO* last_event_info) +{ + if (!short_form) + { + char buf[64]; + longlong10_to_str(xid, buf, 10); + + print_header(file); + fprintf(file, "\tXid = %s\n", buf); + fflush(file); + } + fprintf(file, "COMMIT;\n"); +} +#endif /* MYSQL_CLIENT */ + + +#if defined(HAVE_REPLICATION) && !defined(MYSQL_CLIENT) +int Xid_log_event::exec_event(struct st_relay_log_info* rli) +{ + rli->inc_event_relay_log_pos(); + /* For a slave Xid_log_event is COMMIT */ + thd->options&= ~(ulong) (OPTION_BEGIN | OPTION_STATUS_NO_TRANS_UPDATE); + thd->server_status&= ~SERVER_STATUS_IN_TRANS; + return ha_commit(thd); +} +#endif /* !MYSQL_CLIENT */ + + +/************************************************************************** User_var_log_event methods **************************************************************************/ diff --git a/sql/log_event.h b/sql/log_event.h index 7f04582a32d..f108b890829 100644 --- a/sql/log_event.h +++ b/sql/log_event.h @@ -302,6 +302,41 @@ struct sql_ex_info #define LOG_EVENT_TIME_F 0x1 #define LOG_EVENT_FORCED_ROTATE_F 0x2 #endif + +/* + This flag only makes sense for Format_description_log_event. It is set + when the event is written, and *reset* when a binlog file is + closed (yes, it's the only case when MySQL modifies already written + part of binlog). Thus it is a reliable indicator that binlog was + closed correctly. (Stop_log_event is not enough, there's always a + small chance that mysqld crashes in the middle of insert and end of + the binlog would look like a Stop_log_event). + + This flag is used to detect a restart after a crash, + and to provide "unbreakable" binlog. The problem is that on a crash + storage engines rollback automatically, while binlog does not. + To solve this we use this flag and automatically append ROLLBACK + to every non-closed binlog (append virtually, on reading, file itself + is not changed). If this flag is found, mysqlbinlog simply prints "ROLLBACK" + Replication master does not abort on binlog corruption, but takes it as EOF, + and replication slave forces a rollback in this case (see below). + + Note, that old binlogs does not have this flag set, so we get a + a backward-compatible behaviour. +*/ + +#define LOG_EVENT_BINLOG_IN_USE_F 0x1 + +/* + This flag is only used for fake Rotate_log_event. When a master, doing + binlog dump, reaches the end of the binlog and fakes a rotate to make + the slave to go to a new file, this flag is used if there was no + "natural" Rotate_log_event. + If this flag is set, slave will execute ROLLBACK before going further +*/ + +#define LOG_EVENT_FORCE_ROLLBACK_F 0x1 + /* If the query depends on the thread (for example: TEMPORARY TABLE). Currently this is used by mysqlbinlog to know it must print @@ -311,26 +346,6 @@ struct sql_ex_info #define LOG_EVENT_THREAD_SPECIFIC_F 0x4 /* - OPTIONS_WRITTEN_TO_BIN_LOG are the bits of thd->options which must be written - to the binlog. OPTIONS_WRITTEN_TO_BINLOG could be written into the - Format_description_log_event, so that if later we don't want to replicate a - variable we did replicate, or the contrary, it's doable. But it should not be - too hard to decide once for all of what we replicate and what we don't, among - the fixed 32 bits of thd->options. - I (Guilhem) have read through every option's usage, and it looks like - OPTION_AUTO_IS_NULL and OPTION_NO_FOREIGN_KEYS are the only ones which alter - how the query modifies the table. It's good to replicate - OPTION_RELAXED_UNIQUE_CHECKS too because otherwise, the slave may insert data - slower than the master, in InnoDB. - OPTION_BIG_SELECTS is not needed (the slave thread runs with - max_join_size=HA_POS_ERROR) and OPTION_BIG_TABLES is not needed either, as - the manual says (because a too big in-memory temp table is automatically - written to disk). -*/ -#define OPTIONS_WRITTEN_TO_BIN_LOG (OPTION_AUTO_IS_NULL | \ -OPTION_NO_FOREIGN_KEY_CHECKS | OPTION_RELAXED_UNIQUE_CHECKS) - -/* Suppress the generation of 'USE' statements before the actual statement. This flag should be set for any events that does not need the current database set to function correctly. Most notable cases @@ -343,24 +358,51 @@ OPTION_NO_FOREIGN_KEY_CHECKS | OPTION_RELAXED_UNIQUE_CHECKS) */ #define LOG_EVENT_SUPPRESS_USE_F 0x8 +/* + OPTIONS_WRITTEN_TO_BIN_LOG are the bits of thd->options which must be + written to the binlog. OPTIONS_WRITTEN_TO_BINLOG could be written + into the Format_description_log_event, so that if later we don't want + to replicate a variable we did replicate, or the contrary, it's + doable. But it should not be too hard to decide once for all of what + we replicate and what we don't, among the fixed 32 bits of + thd->options. + I (Guilhem) have read through every option's usage, and it looks like + OPTION_AUTO_IS_NULL and OPTION_NO_FOREIGN_KEYS are the only ones + which alter how the query modifies the table. It's good to replicate + OPTION_RELAXED_UNIQUE_CHECKS too because otherwise, the slave may + insert data slower than the master, in InnoDB. + OPTION_BIG_SELECTS is not needed (the slave thread runs with + max_join_size=HA_POS_ERROR) and OPTION_BIG_TABLES is not needed + either, as the manual says (because a too big in-memory temp table is + automatically written to disk). +*/ +#define OPTIONS_WRITTEN_TO_BIN_LOG (OPTION_AUTO_IS_NULL | \ +OPTION_NO_FOREIGN_KEY_CHECKS | OPTION_RELAXED_UNIQUE_CHECKS) + enum Log_event_type { /* Every time you update this enum (when you add a type), you have to - update the code of Format_description_log_event::Format_description_log_event(). - Make sure you always insert new types ***BEFORE*** ENUM_END_EVENT. + fix Format_description_log_event::Format_description_log_event(). */ UNKNOWN_EVENT= 0, START_EVENT_V3, QUERY_EVENT, STOP_EVENT, ROTATE_EVENT, INTVAR_EVENT, LOAD_EVENT, SLAVE_EVENT, CREATE_FILE_EVENT, APPEND_BLOCK_EVENT, EXEC_LOAD_EVENT, DELETE_FILE_EVENT, /* - NEW_LOAD_EVENT is like LOAD_EVENT except that it has a longer sql_ex, - allowing multibyte TERMINATED BY etc; both types share the same class - (Load_log_event) + NEW_LOAD_EVENT is like LOAD_EVENT except that it has a longer + sql_ex, allowing multibyte TERMINATED BY etc; both types share the + same class (Load_log_event) */ NEW_LOAD_EVENT, RAND_EVENT, USER_VAR_EVENT, FORMAT_DESCRIPTION_EVENT, + XID_EVENT, + + /* + add new events here - right above this comment! + existing events should never change their numbers + */ + ENUM_END_EVENT /* end marker */ }; @@ -461,16 +503,15 @@ public: ulong data_written; /* - The master's server id (is preserved in the relay log; used to prevent from - infinite loops in circular replication). + The master's server id (is preserved in the relay log; used to + prevent from infinite loops in circular replication). */ uint32 server_id; /* - Some 16 flags. Only one is really used now; look above for - LOG_EVENT_TIME_F, LOG_EVENT_FORCED_ROTATE_F, - LOG_EVENT_THREAD_SPECIFIC_F, and LOG_EVENT_SUPPRESS_USE_F for - notes. + Some 16 flags. Look above for LOG_EVENT_TIME_F, + LOG_EVENT_FORCED_ROTATE_F, LOG_EVENT_THREAD_SPECIFIC_F, and + LOG_EVENT_SUPPRESS_USE_F for notes. */ uint16 flags; @@ -566,11 +607,11 @@ public: temp_buf = 0; } } - virtual int get_data_size() { return 0;} /* Get event length for simple events. For complicated events the length is calculated during write() */ + virtual int get_data_size() { return 0;} static Log_event* read_log_event(const char* buf, uint event_len, const char **error, const Format_description_log_event @@ -1037,6 +1078,40 @@ class Rand_log_event: public Log_event bool is_valid() const { return 1; } }; +/***************************************************************************** + + Xid Log Event class + + Logs xid of the transaction-to-be-committed in the 2pc protocol. + Has no meaning in replication, slaves ignore it. + + ****************************************************************************/ +#ifdef MYSQL_CLIENT +typedef ulong my_xid; +#endif + +class Xid_log_event: public Log_event +{ + public: + my_xid xid; + +#ifndef MYSQL_CLIENT + Xid_log_event(THD* thd_arg, my_xid x): Log_event(thd_arg,0,0), xid(x) {} +#ifdef HAVE_REPLICATION + void pack_info(Protocol* protocol); + int exec_event(struct st_relay_log_info* rli); +#endif /* HAVE_REPLICATION */ +#else + void print(FILE* file, bool short_form = 0, LAST_EVENT_INFO* last_event_info= 0); +#endif + + Xid_log_event(const char* buf, const Format_description_log_event* description_event); + ~Xid_log_event() {} + Log_event_type get_type_code() { return XID_EVENT;} + int get_data_size() { return sizeof(xid); } + bool write(IO_CACHE* file); + bool is_valid() const { return 1; } +}; /***************************************************************************** @@ -1086,8 +1161,6 @@ public: Stop Log Event class ****************************************************************************/ -#ifdef HAVE_REPLICATION - class Stop_log_event: public Log_event { public: @@ -1107,14 +1180,11 @@ public: bool is_valid() const { return 1; } }; -#endif /* HAVE_REPLICATION */ - - /***************************************************************************** Rotate Log Event class - This will be depricated when we move to using sequence ids. + This will be deprecated when we move to using sequence ids. ****************************************************************************/ diff --git a/sql/mysql_priv.h b/sql/mysql_priv.h index fdcf061ab7a..2442f68503f 100644 --- a/sql/mysql_priv.h +++ b/sql/mysql_priv.h @@ -409,9 +409,16 @@ inline THD *_current_thd(void) } #define current_thd _current_thd() +/* + External variables +*/ +extern ulong server_id, concurrency; + + typedef my_bool (*qc_engine_callback)(THD *thd, char *table_key, uint key_length, ulonglong *engine_data); + #include "sql_string.h" #include "sql_list.h" #include "sql_map.h" @@ -572,6 +579,8 @@ bool mysql_preload_keys(THD* thd, TABLE_LIST* table_list); int reassign_keycache_tables(THD* thd, KEY_CACHE *src_cache, KEY_CACHE *dst_cache); +bool mysql_xa_recover(THD *thd); + bool check_simple_select(); SORT_FIELD * make_unireg_sortorder(ORDER *order, uint *length); @@ -944,11 +953,9 @@ void sql_print_information(const char *format, ...); bool fn_format_relative_to_data_home(my_string to, const char *name, const char *dir, const char *extension); -bool open_log(MYSQL_LOG *log, const char *hostname, - const char *opt_name, const char *extension, - const char *index_file_name, - enum_log_type type, bool read_append, - bool no_auto_events, ulong max_size); +File open_binlog(IO_CACHE *log, const char *log_file_name, + const char **errmsg); +handlerton *binlog_init(); /* mysqld.cc */ extern void yyerror(const char*); @@ -1003,7 +1010,7 @@ extern double last_query_cost; extern double log_10[32]; extern ulonglong log_10_int[20]; extern ulonglong keybuff_size; -extern ulong refresh_version,flush_version, thread_id,query_id; +extern ulong refresh_version,flush_version, thread_id; extern ulong binlog_cache_use, binlog_cache_disk_use; extern ulong aborted_threads,aborted_connects; extern ulong delayed_insert_timeout; @@ -1013,8 +1020,6 @@ extern ulong delayed_rows_in_use,delayed_insert_errors; extern ulong slave_open_temp_tables; extern ulong query_cache_size, query_cache_min_res_unit; extern ulong thd_startup_options, slow_launch_threads, slow_launch_time; -extern ulong server_id, concurrency; -extern ulong ha_read_count, ha_discover_count; extern ulong table_cache_size; extern ulong max_connections,max_connect_errors, connect_timeout; extern ulong slave_net_timeout; @@ -1056,6 +1061,7 @@ extern uint opt_large_page_size; extern MYSQL_LOG mysql_log,mysql_slow_log,mysql_bin_log; extern FILE *bootstrap_file; +extern int bootstrap_error; extern pthread_key(MEM_ROOT**,THR_MALLOC); extern pthread_mutex_t LOCK_mysql_create_db,LOCK_Acl,LOCK_open, LOCK_thread_count,LOCK_mapped_file,LOCK_user_locks, LOCK_status, @@ -1259,6 +1265,14 @@ SQL_CRYPT *get_crypt_for_frm(void); #include "sql_view.h" +/* query_id */ + +typedef ulonglong query_id_t; +extern query_id_t query_id; + +/* increment query_id and return it. */ +inline query_id_t next_query_id() { return query_id++; } + /* Some inline functions for more speed */ inline bool add_item_to_list(THD *thd, Item *item) diff --git a/sql/mysqld.cc b/sql/mysqld.cc index 2998d367a36..8b505ccb2fa 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -54,7 +54,8 @@ #endif #ifdef HAVE_NDBCLUSTER_DB #define OPT_NDBCLUSTER_DEFAULT 0 -#if defined(NOT_ENOUGH_TESTED) && defined(NDB_SHM_TRANSPORTER) && MYSQL_VERSION_ID >= 50000 +#if defined(NOT_ENOUGH_TESTED) \ + && defined(NDB_SHM_TRANSPORTER) && MYSQL_VERSION_ID >= 50000 #define OPT_NDB_SHM_DEFAULT 1 #else #define OPT_NDB_SHM_DEFAULT 0 @@ -147,11 +148,11 @@ int deny_severity = LOG_WARNING; #include <sys/mman.h> #endif +#ifdef __NETWARE__ #define zVOLSTATE_ACTIVE 6 #define zVOLSTATE_DEACTIVE 2 #define zVOLSTATE_MAINTENANCE 3 -#ifdef __NETWARE__ #include <nks/vm.h> #include <library.h> #include <monitor.h> @@ -245,6 +246,10 @@ const char *sql_mode_names[] = }; TYPELIB sql_mode_typelib= { array_elements(sql_mode_names)-1,"", sql_mode_names, NULL }; +const char *tc_heuristic_recover_names[]= { "COMMIT", "ROLLBACK", NullS }; +TYPELIB tc_heuristic_recover_typelib= + { array_elements(tc_heuristic_recover_names)-1,"", + tc_heuristic_recover_names, NULL }; const char *first_keyword= "first", *binary_keyword= "BINARY"; const char *my_localhost= "localhost", *delayed_user= "DELAYED"; #if SIZEOF_OFF_T > 4 && defined(BIG_TABLES) @@ -300,14 +305,13 @@ my_bool opt_secure_auth= 0; my_bool opt_short_log_format= 0; my_bool opt_log_queries_not_using_indexes= 0; my_bool lower_case_file_system= 0; -my_bool opt_innodb_safe_binlog= 0; my_bool opt_large_pages= 0; -uint opt_large_page_size= 0; +uint opt_large_page_size= 0; my_bool opt_old_style_user_limits= 0; /* - True if there is at least one per-hour limit for some user, so we should check - them before each query (and possibly reset counters when hour is changed). - False otherwise. + True if there is at least one per-hour limit for some user, so we should + check them before each query (and possibly reset counters when hour is + changed). False otherwise. */ volatile bool mqh_used = 0; my_bool sp_automatic_privileges= 1; @@ -315,7 +319,7 @@ my_bool sp_automatic_privileges= 1; uint mysqld_port, test_flags, select_errors, dropping_tables, ha_open_options; uint delay_key_write_options, protocol_version; uint lower_case_table_names; -uint opt_crash_binlog_innodb; +uint tc_heuristic_recover= 0; uint volatile thread_count, thread_running, kill_cached_threads, wake_thread; ulong back_log, connect_timeout, concurrency; @@ -327,7 +331,7 @@ ulong slave_net_timeout; ulong thread_cache_size=0, binlog_cache_size=0, max_binlog_cache_size=0; ulong query_cache_size=0; ulong refresh_version, flush_version; /* Increments on each reload */ -ulong query_id; +query_id_t query_id; ulong aborted_threads, killed_threads, aborted_connects; ulong delayed_insert_timeout, delayed_insert_limit, delayed_queue_size; ulong delayed_insert_threads, delayed_insert_writes, delayed_rows_in_use; @@ -384,6 +388,7 @@ Le_creator le_creator; FILE *bootstrap_file; +int bootstrap_error; I_List<i_string_pair> replicate_rewrite_db; I_List<i_string> replicate_do_db, replicate_ignore_db; @@ -445,7 +450,7 @@ static my_bool opt_do_pstack, opt_noacl, opt_bootstrap, opt_myisam_log; static int cleanup_done; static ulong opt_specialflag, opt_myisam_block_size; static char *opt_logname, *opt_update_logname, *opt_binlog_index_name; -static char *opt_slow_logname; +static char *opt_slow_logname, *opt_tc_log_file, *opt_tc_heuristic_recover; static char *mysql_home_ptr, *pidfile_name_ptr; static char **defaults_argv; static char *opt_bin_logname; @@ -527,7 +532,7 @@ static char *get_relative_path(const char *path); static void fix_paths(void); extern "C" pthread_handler_decl(handle_connections_sockets,arg); extern "C" pthread_handler_decl(kill_server_thread,arg); -static int bootstrap(FILE *file); +static void bootstrap(FILE *file); static void close_server_sock(); static bool read_init_file(char *file_name); #ifdef __NT__ @@ -969,6 +974,8 @@ void clean_up(bool print_message) udf_free(); #endif (void) ha_panic(HA_PANIC_CLOSE); /* close all tables and logs */ + if (tc_log) + tc_log->close(); delete_elements(&key_caches, (void (*)(const char*, gptr)) free_key_cache); multi_keycache_free(); end_thr_alarm(1); /* Free allocated memory */ @@ -1576,11 +1583,11 @@ void mysql_down_server_cb(void *, void *) // destroy callback resources void mysql_cb_destroy(void *) -{ - UnRegisterEventNotification(eh); // cleanup down event notification +{ + UnRegisterEventNotification(eh); // cleanup down event notification NX_UNWRAP_INTERFACE(ref); - /* Deregister NSS volume deactivation event */ - NX_UNWRAP_INTERFACE(refneb); + /* Deregister NSS volume deactivation event */ + NX_UNWRAP_INTERFACE(refneb); if (neb_consumer_id) UnRegisterConsumer(neb_consumer_id, NULL); } @@ -1703,7 +1710,6 @@ ulong neb_event_callback(struct EventBlock *eblock) nw_panic = TRUE; event_flag= TRUE; kill_server(0); - } } return 0; @@ -1761,7 +1767,7 @@ static void getvolumeID(BYTE *volumeName) datavolid.clockSeqLow= info.vol.volumeID.clockSeqLow; /* This is guranteed to be 6-byte length (but sizeof() would be better) */ memcpy(datavolid.node, info.vol.volumeID.node, (unsigned int) 6); - + exit: if (rootKey) zClose(rootKey); @@ -2174,8 +2180,8 @@ static void check_data_home(const char *path) /* - All global error messages are sent here where the first one is stored for - the client + All global error messages are sent here where the first one is stored + for the client */ @@ -2211,7 +2217,7 @@ extern "C" int my_message_sql(uint error, const char *str, myf MyFlags) (thd->lex->current_select ? thd->lex->current_select->no_error : 0), (int) thd->is_fatal_error)); - + push_warning(thd, MYSQL_ERROR::WARN_LEVEL_ERROR, error, str); } else @@ -2301,45 +2307,14 @@ extern "C" pthread_handler_decl(handle_shutdown,arg) #endif -const char *load_default_groups[]= { +const char *load_default_groups[]= { #ifdef HAVE_NDBCLUSTER_DB "mysql_cluster", #endif -"mysqld","server",MYSQL_BASE_VERSION,0,0}; +"mysqld","server", MYSQL_BASE_VERSION, 0, 0}; static const int load_default_groups_sz= sizeof(load_default_groups)/sizeof(load_default_groups[0]); -bool open_log(MYSQL_LOG *log, const char *hostname, - const char *opt_name, const char *extension, - const char *index_file_name, - enum_log_type type, bool read_append, - bool no_auto_events, ulong max_size) -{ - char tmp[FN_REFLEN]; - if (!opt_name || !opt_name[0]) - { - /* - TODO: The following should be using fn_format(); We just need to - first change fn_format() to cut the file name if it's too long. - */ - strmake(tmp,hostname,FN_REFLEN-5); - strmov(fn_ext(tmp),extension); - opt_name=tmp; - } - // get rid of extension if the log is binary to avoid problems - if (type == LOG_BIN) - { - char *p = fn_ext(opt_name); - uint length=(uint) (p-opt_name); - strmake(tmp,opt_name,min(length,FN_REFLEN)); - opt_name=tmp; - } - return log->open(opt_name, type, 0, index_file_name, - (read_append) ? SEQ_READ_APPEND : WRITE_CACHE, - no_auto_events, max_size, 0); -} - - /* Initialize one of the global date/time format variables @@ -2347,7 +2322,7 @@ bool open_log(MYSQL_LOG *log, const char *hostname, init_global_datetime_format() format_type What kind of format should be supported var_ptr Pointer to variable that should be updated - + NOTES The default value is taken from either opt_date_time_formats[] or the ISO format (ANSI SQL) @@ -2630,8 +2605,7 @@ static int init_server_components() #endif /* Setup log files */ if (opt_log) - open_log(&mysql_log, glob_hostname, opt_logname, ".log", NullS, - LOG_NORMAL, 0, 0, 0); + mysql_log.open_query_log(opt_logname); if (opt_update_log) { /* @@ -2684,58 +2658,16 @@ version 5.0 and above. It is replaced by the binary log. Now starting MySQL \ with --log-bin instead."); } } - if (opt_slow_log) - open_log(&mysql_slow_log, glob_hostname, opt_slow_logname, "-slow.log", - NullS, LOG_NORMAL, 0, 0, 0); - - if (opt_bin_log) - { - /* If we fail to open binlog, it's going to hinder our recovery, so die */ - if (open_log(&mysql_bin_log, glob_hostname, opt_bin_logname, "-bin", - opt_binlog_index_name, LOG_BIN, 0, 0, max_binlog_size)) - unireg_abort(1); - using_update_log=1; -#ifdef HAVE_REPLICATION - if (expire_logs_days) - { - long purge_time= time(0) - expire_logs_days*24*60*60; - if (purge_time >= 0) - mysql_bin_log.purge_logs_before_date(purge_time); - } -#endif - if (!opt_bin_logname && !opt_binlog_index_name) - { - /* - User didn't give us info to name the binlog index file. - Picking `hostname`-bin.index like did in 4.x, causes replication to - fail if the hostname is changed later. So, we would like to instead - require a name. But as we don't want to break many existing setups, we - only give warning, not error. - */ - sql_print_warning("\ -No argument was provided to --log-bin, and --log-bin-index was not used; \ -so replication may break when this MySQL server acts as a master and \ -has his hostname changed!! Please use '--log-bin=%s' to avoid \ -this problem.", - mysql_bin_log.get_name()); - } - } - else + if (opt_log_slave_updates && !opt_bin_log) { - if (opt_log_slave_updates) - { - sql_print_error("\ -You need to use --log-bin=# to make --log-slave-updates work."); + sql_print_warning("You need to use --log-bin to make " + "--log-slave-updates work."); unireg_abort(1); - } - if (opt_binlog_index_name) - { - sql_print_error("\ -You need to use --log-bin=# to make --log-bin-index work."); - unireg_abort(1); - } } + if (opt_slow_log) + mysql_slow_log.open_slow_log(opt_slow_logname); + #ifdef HAVE_REPLICATION if (opt_log_slave_updates && replicate_same_server_id) { @@ -2766,39 +2698,33 @@ server."); } } - if (opt_innodb_safe_binlog) + if (opt_bin_log) { - if (have_innodb != SHOW_OPTION_YES) - sql_print_warning("--innodb-safe-binlog is meaningful only if " - "the InnoDB storage engine is enabled in the server."); -#ifdef HAVE_INNOBASE_DB - if (innobase_flush_log_at_trx_commit != 1) - { - sql_print_warning("--innodb-safe-binlog is meaningful only if " - "innodb_flush_log_at_trx_commit is 1; now setting it " - "to 1."); - innobase_flush_log_at_trx_commit= 1; - } - if (innobase_unix_file_flush_method) + char buf[FN_REFLEN]; + const char *ln; + ln= mysql_bin_log.generate_name(opt_bin_logname, "-bin", 1, buf); + if (!opt_bin_logname && !opt_binlog_index_name) { /* - This option has so many values that it's hard to know which value is - good (especially "littlesync", and on Windows... see - srv/srv0start.c). + User didn't give us info to name the binlog index file. + Picking `hostname`-bin.index like did in 4.x, causes replication to + fail if the hostname is changed later. So, we would like to instead + require a name. But as we don't want to break many existing setups, we + only give warning, not error. */ - sql_print_warning("--innodb-safe-binlog requires that " - "the innodb_flush_method actually synchronizes the " - "InnoDB log to disk; it is your responsibility " - "to verify that the method you chose does it."); + sql_print_warning("No argument was provided to --log-bin, and " + "--log-bin-index was not used; so replication " + "may break when this MySQL server acts as a " + "master and has his hostname changed!! Please " + "use '--log-bin=%s' to avoid this problem.", ln); } - if (sync_binlog_period != 1) + if (ln == buf) { - sql_print_warning("--innodb-safe-binlog is meaningful only if " - "the global sync_binlog variable is 1; now setting it " - "to 1."); - sync_binlog_period= 1; + my_free(opt_bin_logname, MYF(MY_ALLOW_ZERO_PTR)); + opt_bin_logname=my_strdup(buf, MYF(0)); } -#endif + mysql_bin_log.open_index_file(opt_binlog_index_name, ln); + using_update_log=1; } if (ha_init()) @@ -2806,20 +2732,32 @@ server."); sql_print_error("Can't init databases"); unireg_abort(1); } - if (opt_myisam_log) - (void) mi_log(1); + tc_log= total_ha_2pc > 1 ? opt_bin_log ? + (TC_LOG *)&mysql_bin_log : + (TC_LOG *)&tc_log_mmap : + (TC_LOG *)&tc_log_dummy; - /* - Now that InnoDB is initialized, we can know the last good binlog position - and cut the binlog if needed. This function does nothing if there was no - crash recovery by InnoDB. - */ - if (opt_innodb_safe_binlog) + if (tc_log->open(opt_tc_log_file)) + { + sql_print_error("Can't init tc log"); + unireg_abort(1); + } + + if (opt_bin_log) + mysql_bin_log.open(opt_bin_logname, LOG_BIN, 0, + WRITE_CACHE, 0, max_binlog_size, 0); + +#ifdef HAVE_REPLICATION + if (opt_bin_log && expire_logs_days) { - /* not fatal if fails (but print errors) */ - mysql_bin_log.cut_spurious_tail(); + long purge_time= time(0) - expire_logs_days*24*60*60; + if (purge_time >= 0) + mysql_bin_log.purge_logs_before_date(purge_time); } - mysql_bin_log.report_pos_in_innodb(); +#endif + + if (opt_myisam_log) + (void) mi_log(1); /* call ha_init_key_cache() on all key caches to init them */ process_key_caches(&ha_init_key_cache); @@ -3137,9 +3075,9 @@ we force server id to 2, but this MySQL server will not act as a slave."); if (opt_bootstrap) { - int error=bootstrap(stdin); + bootstrap(stdin); end_thr_alarm(1); // Don't allow alarms - unireg_abort(error ? 1 : 0); + unireg_abort(bootstrap_error ? 1 : 0); } if (opt_init_file) { @@ -3327,10 +3265,10 @@ int main(int argc, char **argv) /* When several instances are running on the same machine, we need to have an unique named hEventShudown through the application PID e.g.: MySQLShutdown1890; MySQLShutdown2342 - */ + */ int10_to_str((int) GetCurrentProcessId(),strmov(shutdown_event_name, "MySQLShutdown"), 10); - + /* Must be initialized early for comparison of service name */ system_charset_info= &my_charset_utf8_general_ci; @@ -3429,7 +3367,7 @@ int main(int argc, char **argv) create MySQL privilege tables without having to start a full MySQL server. */ -static int bootstrap(FILE *file) +static void bootstrap(FILE *file) { int error= 0; DBUG_ENTER("bootstrap"); @@ -3449,7 +3387,8 @@ static int bootstrap(FILE *file) (void*) thd)) { sql_print_warning("Can't create thread to handle bootstrap"); - DBUG_RETURN(-1); + bootstrap_error=-1; + DBUG_VOID_RETURN; } /* Wait for thread to die */ (void) pthread_mutex_lock(&LOCK_thread_count); @@ -3464,13 +3403,7 @@ static int bootstrap(FILE *file) handle_bootstrap((void *)thd); #endif - error= thd->is_fatal_error; -#ifndef EMBEDDED_LIBRARY - net_end(&thd->net); -#endif - thd->cleanup(); - delete thd; - DBUG_RETURN(error); + DBUG_VOID_RETURN; } @@ -3481,7 +3414,7 @@ static bool read_init_file(char *file_name) DBUG_PRINT("enter",("name: %s",file_name)); if (!(file=my_fopen(file_name,O_RDONLY,MYF(MY_WME)))) return(1); - bootstrap(file); /* Ignore errors from this */ + bootstrap(file); (void) my_fclose(file,MYF(MY_WME)); return 0; } @@ -3960,7 +3893,7 @@ pthread_handler_decl(handle_connections_shared_memory,arg) /* it can be after shutdown command */ - if (abort_loop) + if (abort_loop) goto error; HANDLE handle_client_file_map= 0; @@ -4140,7 +4073,7 @@ enum options_mysqld OPT_MASTER_HOST, OPT_MASTER_USER, OPT_MASTER_PASSWORD, OPT_MASTER_PORT, OPT_MASTER_INFO_FILE, OPT_MASTER_CONNECT_RETRY, - OPT_MASTER_RETRY_COUNT, + OPT_MASTER_RETRY_COUNT, OPT_LOG_TC, OPT_LOG_TC_SIZE, OPT_MASTER_SSL, OPT_MASTER_SSL_KEY, OPT_MASTER_SSL_CERT, OPT_MASTER_SSL_CAPATH, OPT_MASTER_SSL_CIPHER, OPT_MASTER_SSL_CA, @@ -4154,7 +4087,7 @@ enum options_mysqld OPT_SAFEMALLOC_MEM_LIMIT, OPT_REPLICATE_DO_TABLE, OPT_REPLICATE_IGNORE_TABLE, OPT_REPLICATE_WILD_DO_TABLE, OPT_REPLICATE_WILD_IGNORE_TABLE, OPT_REPLICATE_SAME_SERVER_ID, - OPT_DISCONNECT_SLAVE_EVENT_COUNT, + OPT_DISCONNECT_SLAVE_EVENT_COUNT, OPT_TC_HEURISTIC_RECOVER, OPT_ABORT_SLAVE_EVENT_COUNT, OPT_INNODB_DATA_HOME_DIR, OPT_INNODB_DATA_FILE_PATH, @@ -4577,6 +4510,14 @@ Disable with --skip-isam.", "Log slow queries to this log file. Defaults logging to hostname-slow.log file.", (gptr*) &opt_slow_logname, (gptr*) &opt_slow_logname, 0, GET_STR, OPT_ARG, 0, 0, 0, 0, 0, 0}, + {"log-tc", OPT_LOG_TC, + "Path to transaction coordinator log (used for transactions that affect " + "more than one storage engine, when binary log is disabled)", + (gptr*) &opt_tc_log_file, (gptr*) &opt_tc_log_file, 0, GET_STR, + REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + {"log-tc-size", OPT_LOG_TC_SIZE, "Size of transaction coordinator log.", + (gptr*) &opt_tc_log_size, (gptr*) &opt_tc_log_size, 0, GET_ULONG, + REQUIRED_ARG, TC_LOG_MIN_SIZE, TC_LOG_MIN_SIZE, ~0, 0, TC_LOG_PAGE_SIZE, 0}, {"log-update", OPT_UPDATE_LOG, "The update log is deprecated since version 5.0, is replaced by the binary \ log and this option justs turns on --log-bin instead.", @@ -4912,6 +4853,10 @@ log and this option does nothing anymore.", {"symbolic-links", 's', "Enable symbolic link support.", (gptr*) &my_use_symdir, (gptr*) &my_use_symdir, 0, GET_BOOL, NO_ARG, IF_PURIFY(0,1), 0, 0, 0, 0, 0}, + {"tc-heuristic-recover", OPT_TC_HEURISTIC_RECOVER, + "Decision to use in heuristic recover process. Possible values are COMMIT or ROLLBACK", + (gptr*) &opt_tc_heuristic_recover, (gptr*) &opt_tc_heuristic_recover, + 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, {"temp-pool", OPT_TEMP_POOL, "Using this option will cause most temporary files created to use a small set of names, rather than a unique name for each new file.", (gptr*) &use_temp_pool, (gptr*) &use_temp_pool, 0, GET_BOOL, NO_ARG, 1, @@ -4982,12 +4927,6 @@ log and this option does nothing anymore.", "The number of seconds the mysqld server is waiting for a connect packet before responding with 'Bad handshake'.", (gptr*) &connect_timeout, (gptr*) &connect_timeout, 0, GET_ULONG, REQUIRED_ARG, CONNECT_TIMEOUT, 2, LONG_TIMEOUT, 0, 1, 0 }, -#ifdef HAVE_REPLICATION - {"crash_binlog_innodb", OPT_CRASH_BINLOG_INNODB, - "Used only for testing, to crash when writing Nth event to binlog.", - (gptr*) &opt_crash_binlog_innodb, (gptr*) &opt_crash_binlog_innodb, - 0, GET_UINT, REQUIRED_ARG, 0, 0, ~(uint)0, 0, 1, 0}, -#endif { "date_format", OPT_DATE_FORMAT, "The DATE format (For future).", (gptr*) &opt_date_time_formats[MYSQL_TIMESTAMP_DATE], @@ -5108,26 +5047,6 @@ log and this option does nothing anymore.", "How many files at the maximum InnoDB keeps open at the same time.", (gptr*) &innobase_open_files, (gptr*) &innobase_open_files, 0, GET_LONG, REQUIRED_ARG, 300L, 10L, ~0L, 0, 1L, 0}, -#ifdef HAVE_REPLICATION - /* - Disabled for the 4.1.3 release. Disabling just this paragraph of code is - enough, as then user can't set it to 1 so it will always be ignored in the - rest of code. - */ -#if MYSQL_VERSION_ID >= 40103 - /* - innodb_safe_binlog is not a variable, just an option. Does not make - sense to make it a variable, as it is only used at startup (and so the - value would be lost at next startup, so setting it on the fly would have no - effect). - */ - {"innodb_safe_binlog", OPT_INNODB_SAFE_BINLOG, - "After a crash recovery by InnoDB, truncate the binary log after the last " - "not-rolled-back statement/transaction.", - (gptr*) &opt_innodb_safe_binlog, (gptr*) &opt_innodb_safe_binlog, - 0, GET_BOOL, NO_ARG, 0, 0, 1, 0, 1, 0}, -#endif -#endif {"innodb_sync_spin_loops", OPT_INNODB_SYNC_SPIN_LOOPS, "Count of spin-loop rounds in InnoDB mutexes", (gptr*) &srv_n_spin_wait_rounds, @@ -5529,12 +5448,9 @@ struct show_var_st status_vars[]= { {"Aborted_connects", (char*) &aborted_connects, SHOW_LONG}, {"Binlog_cache_disk_use", (char*) &binlog_cache_disk_use, SHOW_LONG}, {"Binlog_cache_use", (char*) &binlog_cache_use, SHOW_LONG}, - {"Bytes_received", (char*) offsetof(STATUS_VAR, bytes_received), - SHOW_LONG_STATUS}, - {"Bytes_sent", (char*) offsetof(STATUS_VAR, bytes_sent), - SHOW_LONG_STATUS}, - {"Com_admin_commands", (char*) offsetof(STATUS_VAR, com_other), - SHOW_LONG_STATUS}, + {"Bytes_received", (char*) offsetof(STATUS_VAR, bytes_received), SHOW_LONG_STATUS}, + {"Bytes_sent", (char*) offsetof(STATUS_VAR, bytes_sent), SHOW_LONG_STATUS}, + {"Com_admin_commands", (char*) offsetof(STATUS_VAR, com_other), SHOW_LONG_STATUS}, {"Com_alter_db", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_ALTER_DB]), SHOW_LONG_STATUS}, {"Com_alter_table", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_ALTER_TABLE]), SHOW_LONG_STATUS}, {"Com_analyze", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_ANALYZE]), SHOW_LONG_STATUS}, @@ -5621,62 +5537,45 @@ struct show_var_st status_vars[]= { {"Com_unlock_tables", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_UNLOCK_TABLES]), SHOW_LONG_STATUS}, {"Com_update", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_UPDATE]), SHOW_LONG_STATUS}, {"Com_update_multi", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_UPDATE_MULTI]), SHOW_LONG_STATUS}, + {"Com_xa_commit", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_XA_COMMIT]),SHOW_LONG_STATUS}, + {"Com_xa_end", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_XA_END]),SHOW_LONG_STATUS}, + {"Com_xa_prepare", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_XA_PREPARE]),SHOW_LONG_STATUS}, + {"Com_xa_recover", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_XA_RECOVER]),SHOW_LONG_STATUS}, + {"Com_xa_rollback", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_XA_ROLLBACK]),SHOW_LONG_STATUS}, + {"Com_xa_start", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_XA_START]),SHOW_LONG_STATUS}, {"Connections", (char*) &thread_id, SHOW_LONG_CONST}, - {"Created_tmp_disk_tables", (char*) offsetof(STATUS_VAR, - created_tmp_disk_tables), - SHOW_LONG_STATUS}, + {"Created_tmp_disk_tables", (char*) offsetof(STATUS_VAR, created_tmp_disk_tables), SHOW_LONG_STATUS}, {"Created_tmp_files", (char*) &my_tmp_file_created, SHOW_LONG}, - {"Created_tmp_tables", (char*) offsetof(STATUS_VAR, - created_tmp_tables), - SHOW_LONG_STATUS}, + {"Created_tmp_tables", (char*) offsetof(STATUS_VAR, created_tmp_tables), SHOW_LONG_STATUS}, {"Delayed_errors", (char*) &delayed_insert_errors, SHOW_LONG}, {"Delayed_insert_threads", (char*) &delayed_insert_threads, SHOW_LONG_CONST}, {"Delayed_writes", (char*) &delayed_insert_writes, SHOW_LONG}, {"Flush_commands", (char*) &refresh_version, SHOW_LONG_CONST}, - {"Handler_commit", (char*) offsetof(STATUS_VAR, ha_commit_count), - SHOW_LONG_STATUS}, - {"Handler_delete", (char*) offsetof(STATUS_VAR, ha_delete_count), - SHOW_LONG_STATUS}, - {"Handler_discover", (char*) &ha_discover_count, SHOW_LONG}, - {"Handler_read_first", (char*) offsetof(STATUS_VAR, - ha_read_first_count), - SHOW_LONG_STATUS}, - {"Handler_read_key", (char*) offsetof(STATUS_VAR, ha_read_key_count), - SHOW_LONG_STATUS}, - {"Handler_read_next", (char*) offsetof(STATUS_VAR, - ha_read_next_count), - SHOW_LONG_STATUS}, - {"Handler_read_prev", (char*) offsetof(STATUS_VAR, - ha_read_prev_count), - SHOW_LONG_STATUS}, - {"Handler_read_rnd", (char*) offsetof(STATUS_VAR, ha_read_rnd_count), - SHOW_LONG_STATUS}, - {"Handler_read_rnd_next", (char*) offsetof(STATUS_VAR, - ha_read_rnd_next_count), - SHOW_LONG_STATUS}, - {"Handler_rollback", (char*) offsetof(STATUS_VAR, ha_rollback_count), - SHOW_LONG_STATUS}, - {"Handler_update", (char*) offsetof(STATUS_VAR, ha_update_count), - SHOW_LONG_STATUS}, - {"Handler_write", (char*) offsetof(STATUS_VAR, ha_write_count), - SHOW_LONG_STATUS}, + {"Handler_commit", (char*) offsetof(STATUS_VAR, ha_commit_count), SHOW_LONG_STATUS}, + {"Handler_delete", (char*) offsetof(STATUS_VAR, ha_delete_count), SHOW_LONG_STATUS}, + {"Handler_discover", (char*) offsetof(STATUS_VAR, ha_discover_count), SHOW_LONG_STATUS}, + {"Handler_prepare", (char*) offsetof(STATUS_VAR, ha_prepare_count), SHOW_LONG_STATUS}, + {"Handler_read_first", (char*) offsetof(STATUS_VAR, ha_read_first_count), SHOW_LONG_STATUS}, + {"Handler_read_key", (char*) offsetof(STATUS_VAR, ha_read_key_count), SHOW_LONG_STATUS}, + {"Handler_read_next", (char*) offsetof(STATUS_VAR, ha_read_next_count), SHOW_LONG_STATUS}, + {"Handler_read_prev", (char*) offsetof(STATUS_VAR, ha_read_prev_count), SHOW_LONG_STATUS}, + {"Handler_read_rnd", (char*) offsetof(STATUS_VAR, ha_read_rnd_count), SHOW_LONG_STATUS}, + {"Handler_read_rnd_next", (char*) offsetof(STATUS_VAR, ha_read_rnd_next_count), SHOW_LONG_STATUS}, + {"Handler_rollback", (char*) offsetof(STATUS_VAR, ha_rollback_count), SHOW_LONG_STATUS}, + {"Handler_savepoint", (char*) offsetof(STATUS_VAR, ha_savepoint_count), SHOW_LONG_STATUS}, + {"Handler_savepoint_rollback",(char*) offsetof(STATUS_VAR, ha_savepoint_rollback_count), SHOW_LONG_STATUS}, + {"Handler_update", (char*) offsetof(STATUS_VAR, ha_update_count), SHOW_LONG_STATUS}, + {"Handler_write", (char*) offsetof(STATUS_VAR, ha_write_count), SHOW_LONG_STATUS}, #ifdef HAVE_INNOBASE_DB {"Innodb_", (char*) &innodb_status_variables, SHOW_VARS}, #endif /*HAVE_INNOBASE_DB*/ - {"Key_blocks_not_flushed", (char*) &dflt_key_cache_var.global_blocks_changed, - SHOW_KEY_CACHE_LONG}, - {"Key_blocks_unused", (char*) &dflt_key_cache_var.blocks_unused, - SHOW_KEY_CACHE_CONST_LONG}, - {"Key_blocks_used", (char*) &dflt_key_cache_var.blocks_used, - SHOW_KEY_CACHE_CONST_LONG}, - {"Key_read_requests", (char*) &dflt_key_cache_var.global_cache_r_requests, - SHOW_KEY_CACHE_LONG}, - {"Key_reads", (char*) &dflt_key_cache_var.global_cache_read, - SHOW_KEY_CACHE_LONG}, - {"Key_write_requests", (char*) &dflt_key_cache_var.global_cache_w_requests, - SHOW_KEY_CACHE_LONG}, - {"Key_writes", (char*) &dflt_key_cache_var.global_cache_write, - SHOW_KEY_CACHE_LONG}, + {"Key_blocks_not_flushed", (char*) &dflt_key_cache_var.global_blocks_changed, SHOW_KEY_CACHE_LONG}, + {"Key_blocks_unused", (char*) &dflt_key_cache_var.blocks_unused, SHOW_KEY_CACHE_CONST_LONG}, + {"Key_blocks_used", (char*) &dflt_key_cache_var.blocks_used, SHOW_KEY_CACHE_CONST_LONG}, + {"Key_read_requests", (char*) &dflt_key_cache_var.global_cache_r_requests, SHOW_KEY_CACHE_LONG}, + {"Key_reads", (char*) &dflt_key_cache_var.global_cache_read, SHOW_KEY_CACHE_LONG}, + {"Key_write_requests", (char*) &dflt_key_cache_var.global_cache_w_requests, SHOW_KEY_CACHE_LONG}, + {"Key_writes", (char*) &dflt_key_cache_var.global_cache_write, SHOW_KEY_CACHE_LONG}, {"Last_query_cost", (char*) &last_query_cost, SHOW_DOUBLE}, {"Max_used_connections", (char*) &max_used_connections, SHOW_LONG}, #ifdef HAVE_NDBCLUSTER_DB @@ -5686,53 +5585,32 @@ struct show_var_st status_vars[]= { {"Open_files", (char*) &my_file_opened, SHOW_LONG_CONST}, {"Open_streams", (char*) &my_stream_opened, SHOW_LONG_CONST}, {"Open_tables", (char*) 0, SHOW_OPENTABLES}, - {"Opened_tables", (char*) offsetof(STATUS_VAR, opened_tables), - SHOW_LONG_STATUS}, + {"Opened_tables", (char*) offsetof(STATUS_VAR, opened_tables), SHOW_LONG_STATUS}, #ifdef HAVE_QUERY_CACHE - {"Qcache_free_blocks", (char*) &query_cache.free_memory_blocks, - SHOW_LONG_CONST}, - {"Qcache_free_memory", (char*) &query_cache.free_memory, - SHOW_LONG_CONST}, + {"Qcache_free_blocks", (char*) &query_cache.free_memory_blocks, SHOW_LONG_CONST}, + {"Qcache_free_memory", (char*) &query_cache.free_memory, SHOW_LONG_CONST}, {"Qcache_hits", (char*) &query_cache.hits, SHOW_LONG}, {"Qcache_inserts", (char*) &query_cache.inserts, SHOW_LONG}, {"Qcache_lowmem_prunes", (char*) &query_cache.lowmem_prunes, SHOW_LONG}, {"Qcache_not_cached", (char*) &query_cache.refused, SHOW_LONG}, {"Qcache_queries_in_cache", (char*) &query_cache.queries_in_cache, SHOW_LONG_CONST}, - {"Qcache_total_blocks", (char*) &query_cache.total_blocks, - SHOW_LONG_CONST}, + {"Qcache_total_blocks", (char*) &query_cache.total_blocks, SHOW_LONG_CONST}, #endif /*HAVE_QUERY_CACHE*/ {"Questions", (char*) 0, SHOW_QUESTION}, {"Rpl_status", (char*) 0, SHOW_RPL_STATUS}, - {"Select_full_join", (char*) offsetof(STATUS_VAR, - select_full_join_count), - SHOW_LONG_STATUS}, - {"Select_full_range_join", (char*) offsetof(STATUS_VAR, - select_full_range_join_count), - SHOW_LONG_STATUS}, - {"Select_range", (char*) offsetof(STATUS_VAR, - select_range_count), - SHOW_LONG_STATUS}, - {"Select_range_check", (char*) offsetof(STATUS_VAR, - select_range_check_count), - SHOW_LONG_STATUS}, - {"Select_scan", (char*) offsetof(STATUS_VAR, select_scan_count), - SHOW_LONG_STATUS}, + {"Select_full_join", (char*) offsetof(STATUS_VAR, select_full_join_count), SHOW_LONG_STATUS}, + {"Select_full_range_join", (char*) offsetof(STATUS_VAR, select_full_range_join_count), SHOW_LONG_STATUS}, + {"Select_range", (char*) offsetof(STATUS_VAR, select_range_count), SHOW_LONG_STATUS}, + {"Select_range_check", (char*) offsetof(STATUS_VAR, select_range_check_count), SHOW_LONG_STATUS}, + {"Select_scan", (char*) offsetof(STATUS_VAR, select_scan_count), SHOW_LONG_STATUS}, {"Slave_open_temp_tables", (char*) &slave_open_temp_tables, SHOW_LONG}, {"Slave_running", (char*) 0, SHOW_SLAVE_RUNNING}, {"Slow_launch_threads", (char*) &slow_launch_threads, SHOW_LONG}, - {"Slow_queries", (char*) offsetof(STATUS_VAR, long_query_count), - SHOW_LONG_STATUS}, - {"Sort_merge_passes", (char*) offsetof(STATUS_VAR, - filesort_merge_passes), - SHOW_LONG_STATUS}, - {"Sort_range", (char*) offsetof(STATUS_VAR, - filesort_range_count), - SHOW_LONG_STATUS}, - {"Sort_rows", (char*) offsetof(STATUS_VAR, filesort_rows), - SHOW_LONG_STATUS}, - {"Sort_scan", (char*) offsetof(STATUS_VAR, - filesort_scan_count), - SHOW_LONG_STATUS}, + {"Slow_queries", (char*) offsetof(STATUS_VAR, long_query_count), SHOW_LONG_STATUS}, + {"Sort_merge_passes", (char*) offsetof(STATUS_VAR, filesort_merge_passes), SHOW_LONG_STATUS}, + {"Sort_range", (char*) offsetof(STATUS_VAR, filesort_range_count), SHOW_LONG_STATUS}, + {"Sort_rows", (char*) offsetof(STATUS_VAR, filesort_rows), SHOW_LONG_STATUS}, + {"Sort_scan", (char*) offsetof(STATUS_VAR, filesort_scan_count), SHOW_LONG_STATUS}, #ifdef HAVE_OPENSSL {"Ssl_accept_renegotiates", (char*) 0, SHOW_SSL_CTX_SESS_ACCEPT_RENEGOTIATE}, {"Ssl_accepts", (char*) 0, SHOW_SSL_CTX_SESS_ACCEPT}, @@ -5760,6 +5638,9 @@ struct show_var_st status_vars[]= { #endif /* HAVE_OPENSSL */ {"Table_locks_immediate", (char*) &locks_immediate, SHOW_LONG}, {"Table_locks_waited", (char*) &locks_waited, SHOW_LONG}, + {"Tc_log_max_pages_used", (char*) &tc_log_max_pages_used, SHOW_LONG}, + {"Tc_log_page_size", (char*) &tc_log_page_size, SHOW_LONG}, + {"Tc_log_page_waits", (char*) &tc_log_page_waits, SHOW_LONG}, {"Threads_cached", (char*) &cached_thread_count, SHOW_LONG_CONST}, {"Threads_connected", (char*) &thread_count, SHOW_INT_CONST}, {"Threads_created", (char*) &thread_created, SHOW_LONG_CONST}, @@ -5852,7 +5733,8 @@ static void mysql_init_variables(void) mysql_home[0]= pidfile_name[0]= log_error_file[0]= 0; opt_log= opt_update_log= opt_bin_log= opt_slow_log= 0; opt_disable_networking= opt_skip_show_db=0; - opt_logname= opt_update_logname= opt_binlog_index_name= opt_slow_logname=0; + opt_logname= opt_update_logname= opt_binlog_index_name= opt_slow_logname= 0; + opt_tc_log_file= "tc.log"; // no hostname in tc_log file name ! opt_secure_auth= 0; opt_bootstrap= opt_myisam_log= 0; mqh_used= 0; @@ -6545,6 +6427,16 @@ get_one_option(int optid, const struct my_option *opt __attribute__((unused)), ha_open_options|=HA_OPEN_ABORT_IF_CRASHED; break; } + case OPT_TC_HEURISTIC_RECOVER: + { + if ((tc_heuristic_recover=find_type(argument, + &tc_heuristic_recover_typelib, 2)) <=0) + { + fprintf(stderr, "Unknown option to tc-heuristic-recover: %s\n",argument); + exit(1); + } + break; + } case OPT_SQL_MODE: { sql_mode_str= argument; diff --git a/sql/set_var.h b/sql/set_var.h index 3104fd38976..801ceeebb44 100644 --- a/sql/set_var.h +++ b/sql/set_var.h @@ -778,7 +778,7 @@ public: } save_result; LEX_STRING base; /* for structs */ - set_var(enum_var_type type_arg, sys_var *var_arg, LEX_STRING *base_name_arg, + set_var(enum_var_type type_arg, sys_var *var_arg, const LEX_STRING *base_name_arg, Item *value_arg) :var(var_arg), type(type_arg), base(*base_name_arg) { diff --git a/sql/share/errmsg.txt b/sql/share/errmsg.txt index 030949e15d1..fa20e52f7e7 100644 --- a/sql/share/errmsg.txt +++ b/sql/share/errmsg.txt @@ -5306,6 +5306,18 @@ ER_CANNOT_USER eng "Operation %s failed for %.256s" ger "Das Kommando %s scheiterte für %.256s" norwegian-ny "Operation %s failed for '%.256s'" +ER_XAER_NOTA XAE04 + eng "XAER_NOTA: Unknown XID" +ER_XAER_INVAL XAE05 + eng "XAER_INVAL: Invalid arguments (or unsupported command)" +ER_XAER_RMFAIL XAE07 + eng "XAER_RMFAIL: The command cannot be executed in the %.64s state" +ER_XAER_OUTSIDE XAE09 + eng "XAER_OUTSIDE: Some work is done outside global transaction" +ER_XAER_RMERR XAE03 + eng "XAER_RMERR: Fatal error occurred in the transaction branch - check your data for consistency" +ER_XA_RBROLLBACK XA100 + eng "XA_RBROLLBACK: Transaction branch was rolled back" ER_NONEXISTING_PROC_GRANT 42000 eng "There is no such grant defined for user '%-.32s' on host '%-.64s' on routine '%-.64s'" ER_PROC_AUTO_GRANT_FAIL diff --git a/sql/slave.cc b/sql/slave.cc index f3ab4b21832..22381d2c4e4 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -1,15 +1,15 @@ /* Copyright (C) 2000-2003 MySQL AB - + This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. - + This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - + You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ @@ -158,7 +158,7 @@ int init_slave() sql_print_error("Failed to allocate memory for the master info structure"); goto err; } - + if (init_master_info(active_mi,master_info_file,relay_log_info_file, !master_host, (SLAVE_IO | SLAVE_SQL))) { @@ -551,9 +551,9 @@ int purge_relay_logs(RELAY_LOG_INFO* rli, THD *thd, bool just_reset, /* Even if rli->inited==0, we still try to empty rli->master_log_* variables. Indeed, rli->inited==0 does not imply that they already are empty. - It could be that slave's info initialization partly succeeded : + It could be that slave's info initialization partly succeeded : for example if relay-log.info existed but *relay-bin*.* - have been manually removed, init_relay_log_info reads the old + have been manually removed, init_relay_log_info reads the old relay-log.info and fills rli->master_log_*, then init_relay_log_info checks for the existence of the relay log, this fails and init_relay_log_info leaves rli->inited to 0. @@ -562,7 +562,7 @@ int purge_relay_logs(RELAY_LOG_INFO* rli, THD *thd, bool just_reset, MASTER, the callers of purge_relay_logs, will delete bogus *.info files or replace them with correct files), however if the user does SHOW SLAVE STATUS before START SLAVE, he will see old, confusing rli->master_log_*. - In other words, we reinit rli->master_log_* for SHOW SLAVE STATUS + In other words, we reinit rli->master_log_* for SHOW SLAVE STATUS to display fine in any case. */ @@ -671,11 +671,11 @@ int terminate_slave_thread(THD* thd, pthread_mutex_t* term_lock, } } DBUG_ASSERT(thd != 0); + THD_CHECK_SENTRY(thd); /* - Is is criticate to test if the slave is running. Otherwise, we might + Is is critical to test if the slave is running. Otherwise, we might be referening freed memory trying to kick it */ - THD_CHECK_SENTRY(thd); while (*slave_running) // Should always be true { @@ -1674,7 +1674,8 @@ void end_master_info(MASTER_INFO* mi) } -int init_relay_log_info(RELAY_LOG_INFO* rli, const char* info_fname) +static int init_relay_log_info(RELAY_LOG_INFO* rli, + const char* info_fname) { char fname[FN_REFLEN+128]; int info_fd; @@ -1682,7 +1683,7 @@ int init_relay_log_info(RELAY_LOG_INFO* rli, const char* info_fname) int error = 0; DBUG_ENTER("init_relay_log_info"); - if (rli->inited) // Set if this function called + if (rli->inited) // Set if this function called DBUG_RETURN(0); fn_format(fname, info_fname, mysql_data_home, "", 4+32); pthread_mutex_lock(&rli->data_lock); @@ -1693,23 +1694,10 @@ int init_relay_log_info(RELAY_LOG_INFO* rli, const char* info_fname) rli->log_space_limit= relay_log_space_limit; rli->log_space_total= 0; - // TODO: make this work with multi-master - if (!opt_relay_logname) - { - char tmp[FN_REFLEN]; - /* - TODO: The following should be using fn_format(); We just need to - first change fn_format() to cut the file name if it's too long. - */ - strmake(tmp,glob_hostname,FN_REFLEN-5); - strmov(strcend(tmp,'.'),"-relay-bin"); - opt_relay_logname=my_strdup(tmp,MYF(MY_WME)); - } - /* The relay log will now be opened, as a SEQ_READ_APPEND IO_CACHE. - Note that the I/O thread flushes it to disk after writing every event, in - flush_master_info(mi, 1). + Note that the I/O thread flushes it to disk after writing every + event, in flush_master_info(mi, 1). */ /* @@ -1721,16 +1709,25 @@ int init_relay_log_info(RELAY_LOG_INFO* rli, const char* info_fname) switch to using max_binlog_size for the relay log) and update rli->relay_log.max_size (and mysql_bin_log.max_size). */ - - if (open_log(&rli->relay_log, glob_hostname, opt_relay_logname, - "-relay-bin", opt_relaylog_index_name, - LOG_BIN, 1 /* read_append cache */, - 0 /* starting from 5.0 we want relay logs to have auto events */, - max_relay_log_size ? max_relay_log_size : max_binlog_size)) { - pthread_mutex_unlock(&rli->data_lock); - sql_print_error("Failed in open_log() called from init_relay_log_info()"); - DBUG_RETURN(1); + char buf[FN_REFLEN]; + const char *ln; + ln= rli->relay_log.generate_name(opt_relay_logname, "-relay-bin", + 1, buf); + + /* + note, that if open() fails, we'll still have index file open + but a destructor will take care of that + */ + if (rli->relay_log.open_index_file(opt_relaylog_index_name, ln) || + rli->relay_log.open(ln, LOG_BIN, 0, SEQ_READ_APPEND, 0, + (max_relay_log_size ? max_relay_log_size : + max_binlog_size), 0)) + { + pthread_mutex_unlock(&rli->data_lock); + sql_print_error("Failed in open_log() called from init_relay_log_info()"); + DBUG_RETURN(1); + } } /* if file does not exist */ @@ -1993,9 +1990,9 @@ void clear_until_condition(RELAY_LOG_INFO* rli) int init_master_info(MASTER_INFO* mi, const char* master_info_fname, - const char* slave_info_fname, - bool abort_if_no_master_info_file, - int thread_mask) + const char* slave_info_fname, + bool abort_if_no_master_info_file, + int thread_mask) { int fd,error; char fname[FN_REFLEN+128]; @@ -2009,7 +2006,7 @@ int init_master_info(MASTER_INFO* mi, const char* master_info_fname, last time. If this case pos_in_file would be set and we would get a crash when trying to read the signature for the binary relay log. - + We only rewind the read position if we are starting the SQL thread. The handle_slave_sql thread assumes that the read position is at the beginning of the file, and will read the @@ -2035,7 +2032,7 @@ int init_master_info(MASTER_INFO* mi, const char* master_info_fname, fd = mi->fd; /* does master.info exist ? */ - + if (access(fname,F_OK)) { if (abort_if_no_master_info_file) @@ -2071,7 +2068,7 @@ file '%s')", fname); { if (fd >= 0) reinit_io_cache(&mi->file, READ_CACHE, 0L,0,0); - else + else { if ((fd = my_open(fname, O_RDWR|O_BINARY, MYF(MY_WME))) < 0 ) { @@ -2091,52 +2088,52 @@ file '%s')", fname); mi->fd = fd; int port, connect_retry, master_log_pos, ssl= 0, lines; char *first_non_digit; - + /* Starting from 4.1.x master.info has new format. Now its - first line contains number of lines in file. By reading this - number we will be always distinguish to which version our - master.info corresponds to. We can't simply count lines in + first line contains number of lines in file. By reading this + number we will be always distinguish to which version our + master.info corresponds to. We can't simply count lines in file since versions before 4.1.x could generate files with more lines than needed. - If first line doesn't contain a number or contain number less than + If first line doesn't contain a number or contain number less than 14 then such file is treated like file from pre 4.1.1 version. - There is no ambiguity when reading an old master.info, as before + There is no ambiguity when reading an old master.info, as before 4.1.1, the first line contained the binlog's name, which is either - empty or has an extension (contains a '.'), so can't be confused + empty or has an extension (contains a '.'), so can't be confused with an integer. - So we're just reading first line and trying to figure which version + So we're just reading first line and trying to figure which version is this. */ - - /* - The first row is temporarily stored in mi->master_log_name, - if it is line count and not binlog name (new format) it will be + + /* + The first row is temporarily stored in mi->master_log_name, + if it is line count and not binlog name (new format) it will be overwritten by the second row later. */ if (init_strvar_from_file(mi->master_log_name, sizeof(mi->master_log_name), &mi->file, "")) goto errwithmsg; - + lines= strtoul(mi->master_log_name, &first_non_digit, 10); - if (mi->master_log_name[0]!='\0' && + if (mi->master_log_name[0]!='\0' && *first_non_digit=='\0' && lines >= LINES_IN_MASTER_INFO_WITH_SSL) { // Seems to be new format - if (init_strvar_from_file(mi->master_log_name, + if (init_strvar_from_file(mi->master_log_name, sizeof(mi->master_log_name), &mi->file, "")) goto errwithmsg; } else lines= 7; - + if (init_intvar_from_file(&master_log_pos, &mi->file, 4) || init_strvar_from_file(mi->host, sizeof(mi->host), &mi->file, master_host) || init_strvar_from_file(mi->user, sizeof(mi->user), &mi->file, - master_user) || + master_user) || init_strvar_from_file(mi->password, SCRAMBLED_PASSWORD_CHAR_LENGTH+1, &mi->file, master_password) || init_intvar_from_file(&port, &mi->file, master_port) || @@ -2144,17 +2141,17 @@ file '%s')", fname); master_connect_retry)) goto errwithmsg; - /* - If file has ssl part use it even if we have server without - SSL support. But these option will be ignored later when - slave will try connect to master, so in this case warning + /* + If file has ssl part use it even if we have server without + SSL support. But these option will be ignored later when + slave will try connect to master, so in this case warning is printed. */ - if (lines >= LINES_IN_MASTER_INFO_WITH_SSL && + if (lines >= LINES_IN_MASTER_INFO_WITH_SSL && (init_intvar_from_file(&ssl, &mi->file, master_ssl) || - init_strvar_from_file(mi->ssl_ca, sizeof(mi->ssl_ca), + init_strvar_from_file(mi->ssl_ca, sizeof(mi->ssl_ca), &mi->file, master_ssl_ca) || - init_strvar_from_file(mi->ssl_capath, sizeof(mi->ssl_capath), + init_strvar_from_file(mi->ssl_capath, sizeof(mi->ssl_capath), &mi->file, master_ssl_capath) || init_strvar_from_file(mi->ssl_cert, sizeof(mi->ssl_cert), &mi->file, master_ssl_cert) || @@ -2169,7 +2166,7 @@ file '%s')", fname); "('%s') are ignored because this MySQL slave was compiled " "without SSL support.", fname); #endif /* HAVE_OPENSSL */ - + /* This has to be handled here as init_intvar_from_file can't handle my_off_t types @@ -2189,15 +2186,15 @@ file '%s')", fname); mi->inited = 1; // now change cache READ -> WRITE - must do this before flush_master_info - reinit_io_cache(&mi->file, WRITE_CACHE,0L,0,1); + reinit_io_cache(&mi->file, WRITE_CACHE, 0L, 0, 1); if ((error=test(flush_master_info(mi, 1)))) sql_print_error("Failed to flush master info file"); pthread_mutex_unlock(&mi->data_lock); DBUG_RETURN(error); - + errwithmsg: sql_print_error("Error reading master configuration"); - + err: if (fd >= 0) { @@ -2968,8 +2965,7 @@ static ulong read_event(MYSQL* mysql, MASTER_INFO *mi, bool* suppress_warnings) *suppress_warnings= TRUE; } else - sql_print_error("Error reading packet from server: %s (\ -server_errno=%d)", + sql_print_error("Error reading packet from server: %s ( server_errno=%d)", mysql_error(mysql), mysql_errno(mysql)); return packet_error; } @@ -3218,7 +3214,21 @@ static int exec_relay_log_event(THD* thd, RELAY_LOG_INFO* rli) thd->set_time(); // time the query thd->lex->current_select= 0; if (!ev->when) + { ev->when = time(NULL); + /* + fake Rotate: it means that normal execution flow of statements is + interrupted. Let's fake ROLLBACK to undo any half-executed transaction + */ + if (ev->get_type_code() == ROTATE_EVENT && + ev->flags & LOG_EVENT_FORCE_ROLLBACK_F) + { + ha_rollback_stmt(thd); + ha_rollback(thd); + thd->options&= ~(ulong) (OPTION_BEGIN | OPTION_STATUS_NO_TRANS_UPDATE); + thd->server_status&= ~SERVER_STATUS_IN_TRANS; + } + } ev->thd = thd; exec_res = ev->exec_event(rli); DBUG_ASSERT(rli->sql_thd==thd); @@ -3311,7 +3321,6 @@ slave_begin: goto err; } - thd->proc_info = "Connecting to master"; // we can get killed during safe_connect if (!safe_connect(thd, mysql, mi)) @@ -3408,9 +3417,9 @@ after reconnect"); bool suppress_warnings= 0; /* We say "waiting" because read_event() will wait if there's nothing to - read. But if there's something to read, it will not wait. The important - thing is to not confuse users by saying "reading" whereas we're in fact - receiving nothing. + read. But if there's something to read, it will not wait. The + important thing is to not confuse users by saying "reading" whereas + we're in fact receiving nothing. */ thd->proc_info = "Waiting for master to send event"; ulong event_len = read_event(mysql, mi, &suppress_warnings); @@ -3935,6 +3944,7 @@ static int process_io_rotate(MASTER_INFO *mi, Rotate_log_event *rev) if (disconnect_slave_event_count) events_till_disconnect++; #endif + /* If description_event_for_queue is format <4, there is conversion in the relay log to the slave's format (4). And Rotate can mean upgrade or @@ -3958,8 +3968,8 @@ static int process_io_rotate(MASTER_INFO *mi, Rotate_log_event *rev) } /* - Reads a 3.23 event and converts it to the slave's format. This code was copied - from MySQL 4.0. + Reads a 3.23 event and converts it to the slave's format. This code was + copied from MySQL 4.0. */ static int queue_binlog_ver_1_event(MASTER_INFO *mi, const char *buf, ulong event_len) @@ -4222,9 +4232,9 @@ int queue_event(MASTER_INFO* mi,const char* buf, ulong event_len) to write this event again). */ /* - We are the only thread which reads/writes description_event_for_queue. The - relay_log struct does not move (though some members of it can change), so - we needn't any lock (no rli->data_lock, no log lock). + We are the only thread which reads/writes description_event_for_queue. + The relay_log struct does not move (though some members of it can + change), so we needn't any lock (no rli->data_lock, no log lock). */ Format_description_log_event* tmp; const char* errmsg; @@ -4615,7 +4625,7 @@ Log_event* next_event(RELAY_LOG_INFO* rli) /* This is an assertion which sometimes fails, let's try to track it */ char llbuf1[22], llbuf2[22]; DBUG_PRINT("info", ("my_b_tell(cur_log)=%s rli->event_relay_log_pos=%s", - llstr(my_b_tell(cur_log),llbuf1), + llstr(my_b_tell(cur_log),llbuf1), llstr(rli->event_relay_log_pos,llbuf2))); DBUG_ASSERT(my_b_tell(cur_log) >= BIN_LOG_HEADER_SIZE); DBUG_ASSERT(my_b_tell(cur_log) == rli->event_relay_log_pos); @@ -4635,7 +4645,7 @@ Log_event* next_event(RELAY_LOG_INFO* rli) */ if ((ev=Log_event::read_log_event(cur_log,0, rli->relay_log.description_event_for_exec))) - + { DBUG_ASSERT(thd==rli->sql_thd); /* diff --git a/sql/slave.h b/sql/slave.h index 598ff0a7845..9e9e9070596 100644 --- a/sql/slave.h +++ b/sql/slave.h @@ -1,15 +1,15 @@ /* Copyright (C) 2000-2003 MySQL AB - + This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. - + This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - + You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ @@ -392,11 +392,11 @@ typedef struct st_master_info my_bool ssl; // enables use of SSL connection if true char ssl_ca[FN_REFLEN], ssl_capath[FN_REFLEN], ssl_cert[FN_REFLEN]; char ssl_cipher[FN_REFLEN], ssl_key[FN_REFLEN]; - + my_off_t master_log_pos; File fd; // we keep the file open, so we need to remember the file pointer IO_CACHE file; - + pthread_mutex_t data_lock,run_lock; pthread_cond_t data_cond,start_cond,stop_cond; THD *io_thd; @@ -412,7 +412,7 @@ typedef struct st_master_info volatile bool abort_slave; volatile uint slave_running; volatile ulong slave_run_id; - /* + /* The difference in seconds between the clock of the master and the clock of the slave (second - first). It must be signed as it may be <0 or >0. clock_diff_with_master is computed when the I/O thread starts; for this the @@ -421,8 +421,8 @@ typedef struct st_master_info clock_of_slave - last_timestamp_executed_by_SQL_thread - clock_diff_with_master */ - long clock_diff_with_master; - + long clock_diff_with_master; + st_master_info() :ssl(0), fd(-1), io_thd(0), inited(0), abort_slave(0),slave_running(0), slave_run_id(0) @@ -430,7 +430,7 @@ typedef struct st_master_info host[0] = 0; user[0] = 0; password[0] = 0; ssl_ca[0]= 0; ssl_capath[0]= 0; ssl_cert[0]= 0; ssl_cipher[0]= 0; ssl_key[0]= 0; - + bzero((char*) &file, sizeof(file)); pthread_mutex_init(&run_lock, MY_MUTEX_INIT_FAST); pthread_mutex_init(&data_lock, MY_MUTEX_INIT_FAST); @@ -550,7 +550,6 @@ int init_master_info(MASTER_INFO* mi, const char* master_info_fname, bool abort_if_no_master_info_file, int thread_mask); void end_master_info(MASTER_INFO* mi); -int init_relay_log_info(RELAY_LOG_INFO* rli, const char* info_fname); void end_relay_log_info(RELAY_LOG_INFO* rli); void lock_slave_threads(MASTER_INFO* mi); void unlock_slave_threads(MASTER_INFO* mi); diff --git a/sql/sp_head.cc b/sql/sp_head.cc index 075aef9d286..26fb94e5234 100644 --- a/sql/sp_head.cc +++ b/sql/sp_head.cc @@ -1243,7 +1243,7 @@ sp_instr_stmt::exec_stmt(THD *thd, LEX *lex) thd->free_list= NULL; VOID(pthread_mutex_lock(&LOCK_thread_count)); - thd->query_id= query_id++; + thd->query_id= next_query_id(); VOID(pthread_mutex_unlock(&LOCK_thread_count)); reset_stmt_for_execute(thd, lex); diff --git a/sql/sql_base.cc b/sql/sql_base.cc index eef86921012..dc9af1ca5ad 100644 --- a/sql/sql_base.cc +++ b/sql/sql_base.cc @@ -384,8 +384,8 @@ void close_thread_tables(THD *thd, bool lock_in_use, bool skip_derived, { TABLE *table, *next; /* - Close all derived tables generated from questions like - SELECT * from (select * from t1)) + Close all derived tables generated in queries like + SELECT * FROM (SELECT * FROM t1) */ for (table= thd->derived_tables ; table ; table= next) { @@ -405,6 +405,18 @@ void close_thread_tables(THD *thd, bool lock_in_use, bool skip_derived, mysql_unlock_tables(thd, thd->lock); thd->lock=0; } + /* + assume handlers auto-commit (if some doesn't - transaction handling + in MySQL should be redesigned to support it; it's a big change, + and it's not worth it - better to commit explicitly only writing + transactions, read-only ones should better take care of themselves. + saves some work in 2pc too) + see also sql_parse.cc - dispatch_command() + */ + bzero(&thd->transaction.stmt, sizeof(thd->transaction.stmt)); + if (!thd->active_transaction()) + thd->transaction.xid.null(); + /* VOID(pthread_sigmask(SIG_SETMASK,&thd->block_signals,NULL)); */ if (!lock_in_use) VOID(pthread_mutex_lock(&LOCK_open)); @@ -1824,7 +1836,7 @@ TABLE *open_ltable(THD *thd, TABLE_LIST *table_list, thr_lock_type lock_type) -1 - error NOTE - The lock will automaticly be freed by close_thread_tables() + The lock will automaticaly be freed by close_thread_tables() */ int simple_open_n_lock_tables(THD *thd, TABLE_LIST *tables) @@ -1851,7 +1863,7 @@ int simple_open_n_lock_tables(THD *thd, TABLE_LIST *tables) TRUE - error NOTE - The lock will automaticly be freed by close_thread_tables() + The lock will automaticaly be freed by close_thread_tables() */ bool open_and_lock_tables(THD *thd, TABLE_LIST *tables) diff --git a/sql/sql_class.cc b/sql/sql_class.cc index 32c9e2a50f7..c6f441e1825 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -194,6 +194,7 @@ THD::THD() file_id = 0; warn_id= 0; db_charset= global_system_variables.collation_database; + bzero(ha_data, sizeof(ha_data)); mysys_var=0; #ifndef DBUG_OFF dbug_sentry=THD_SENTRY_MAGIC; @@ -205,7 +206,6 @@ THD::THD() ull=0; system_thread= cleanup_done= abort_on_warning= 0; peer_port= 0; // For SHOW PROCESSLIST - transaction.changed_tables = 0; #ifdef __WIN__ real_id = 0; #endif @@ -240,9 +240,7 @@ THD::THD() /* For user vars replication*/ if (opt_bin_log) my_init_dynamic_array(&user_var_events, - sizeof(BINLOG_USER_VAR_EVENT *), - 16, - 16); + sizeof(BINLOG_USER_VAR_EVENT *), 16, 16); else bzero((char*) &user_var_events, sizeof(user_var_events)); @@ -252,26 +250,8 @@ THD::THD() protocol_prep.init(this); tablespace_op=FALSE; -#ifdef USING_TRANSACTIONS - bzero((char*) &transaction,sizeof(transaction)); - /* - Binlog is always open (if needed) before a THD is created (including - bootstrap). - */ - if (opt_using_transactions && mysql_bin_log.is_open()) - { - if (open_cached_file(&transaction.trans_log, - mysql_tmpdir, LOG_PREFIX, binlog_cache_size, - MYF(MY_WME))) - killed= KILL_CONNECTION; - transaction.trans_log.end_of_file= max_binlog_cache_size; - } -#endif - init_sql_alloc(&transaction.mem_root, ALLOC_ROOT_MIN_BLOCK_SIZE, 0); - { ulong tmp=sql_rnd_with_mutex(); randominit(&rand, tmp + (ulong) &rand, tmp + (ulong) ::query_id); - } } @@ -320,9 +300,12 @@ void THD::init_for_queries() reset_root_defaults(mem_root, variables.query_alloc_block_size, variables.query_prealloc_size); +#ifdef USING_TRANSACTIONS reset_root_defaults(&transaction.mem_root, variables.trans_alloc_block_size, variables.trans_prealloc_size); +#endif + transaction.xid.null(); } @@ -407,13 +390,8 @@ THD::~THD() #endif if (!cleanup_done) cleanup(); -#ifdef USING_TRANSACTIONS - if (opt_using_transactions) - { - close_cached_file(&transaction.trans_log); + ha_close_connection(this); - } -#endif sp_cache_clear(&sp_proc_cache); sp_cache_clear(&sp_func_cache); @@ -426,7 +404,9 @@ THD::~THD() safeFree(ip); safeFree(db); free_root(&warn_root,MYF(0)); +#ifdef USING_TRANSACTIONS free_root(&transaction.mem_root,MYF(0)); +#endif mysys_var=0; // Safety (shouldn't be needed) pthread_mutex_destroy(&LOCK_delete); #ifndef DBUG_OFF @@ -866,7 +846,6 @@ bool select_send::send_data(List<Item> &items) InnoDB adaptive hash S-latch to avoid thread deadlocks if it was reserved by thd */ - if (thd->transaction.all.innobase_tid) ha_release_temporary_latches(thd); #endif @@ -901,7 +880,6 @@ bool select_send::send_eof() /* We may be passing the control from mysqld to the client: release the InnoDB adaptive hash S-latch to avoid thread deadlocks if it was reserved by thd */ - if (thd->transaction.all.innobase_tid) ha_release_temporary_latches(thd); #endif diff --git a/sql/sql_class.h b/sql/sql_class.h index 32a2390a402..835e9dd2362 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -43,6 +43,96 @@ enum enum_check_fields { CHECK_FIELD_IGNORE, CHECK_FIELD_WARN, extern char internal_table_name[2]; extern const char **errmesg; +#define TC_LOG_PAGE_SIZE 8192 +#define TC_LOG_MIN_SIZE (3*TC_LOG_PAGE_SIZE) +extern uint opt_tc_log_size; +extern uint tc_log_max_pages_used; +extern uint tc_log_page_size; +extern uint tc_log_page_waits; + +#define TC_HEURISTIC_RECOVER_COMMIT 1 +#define TC_HEURISTIC_RECOVER_ROLLBACK 2 +extern uint tc_heuristic_recover; + +/* + Transaction Coordinator log - a base abstract class + for two different implementations +*/ +class TC_LOG +{ + public: + int using_heuristic_recover(); + TC_LOG() {} + virtual ~TC_LOG() {} + + virtual int open(const char *opt_name)=0; + virtual void close()=0; + virtual int log(THD *thd, my_xid xid)=0; + virtual void unlog(ulong cookie, my_xid xid)=0; +}; + +class TC_LOG_DUMMY: public TC_LOG // use it to disable the logging +{ + public: + int open(const char *opt_name) { return 0; } + void close() { } + int log(THD *thd, my_xid xid) { return 1; } + void unlog(ulong cookie, my_xid xid) { } +}; + +class TC_LOG_MMAP: public TC_LOG +{ + private: + + typedef enum { + POOL, // page is in pool + ERROR, // last sync failed + DIRTY // new xids added since last sync + } PAGE_STATE; + + typedef struct st_page { + struct st_page *next; // page a linked in a fifo queue + my_xid *start, *end; // usable area of a page + my_xid *ptr; // next xid will be written here + int size, free; // max and current number of free xid slots on the page + int waiters; // number of waiters on condition + PAGE_STATE state; // see above + pthread_mutex_t lock; // to access page data or control structure + pthread_cond_t cond; // to wait for a sync + } PAGE; + + char logname[FN_REFLEN]; + File fd; + uint file_length, npages, inited; + uchar *data; + struct st_page *pages, *syncing, *active, *pool, *pool_last; + /* + note that, e.g. LOCK_active is only used to protect + 'active' pointer, to protect the content of the active page + one has to use active->lock. + Same for LOCK_pool and LOCK_sync + */ + pthread_mutex_t LOCK_active, LOCK_pool, LOCK_sync; + pthread_cond_t COND_pool, COND_active; + + public: + TC_LOG_MMAP(): inited(0) {} + int open(const char *opt_name); + void close(); + int log(THD *thd, my_xid xid); + void unlog(ulong cookie, my_xid xid); + int recover(); + + private: + void get_active_from_pool(); + int sync(); + int overflow(); +}; + +extern TC_LOG *tc_log; +extern TC_LOG_MMAP tc_log_mmap; +extern TC_LOG_DUMMY tc_log_dummy; + /* log info errors */ #define LOG_INFO_EOF -1 #define LOG_INFO_IO -2 @@ -81,8 +171,18 @@ typedef struct st_user_var_events class Log_event; -class MYSQL_LOG - { +/* + TODO split MYSQL_LOG into base MYSQL_LOG and + MYSQL_QUERY_LOG, MYSQL_SLOW_LOG, MYSQL_BIN_LOG + most of the code from MYSQL_LOG should be in the MYSQL_BIN_LOG + only (TC_LOG included) + + TODO use mmap instead of IO_CACHE for binlog + (mmap+fsync is two times faster than write+fsync) +*/ + +class MYSQL_LOG: public TC_LOG +{ private: /* LOCK_log and LOCK_index are inited by init_pthread_objects() */ pthread_mutex_t LOCK_log, LOCK_index; @@ -108,8 +208,8 @@ class MYSQL_LOG etc. So in 4.x this is 1 for relay logs, 0 for binlogs. In 5.0 it's 0 for relay logs too! */ - bool no_auto_events; - /* + bool no_auto_events; + /* The max size before rotation (usable only if log_type == LOG_BIN: binary logs and relay logs). For a binlog, max_size should be max_binlog_size. @@ -117,16 +217,26 @@ class MYSQL_LOG max_binlog_size otherwise. max_size is set in init(), and dynamically changed (when one does SET GLOBAL MAX_BINLOG_SIZE|MAX_RELAY_LOG_SIZE) by fix_max_binlog_size and - fix_max_relay_log_size). + fix_max_relay_log_size). */ ulong max_size; + + ulong prepared_xids; /* for tc log - number of xids to remember */ + pthread_mutex_t LOCK_prep_xids; + pthread_cond_t COND_prep_xids; friend class Log_event; public: MYSQL_LOG(); ~MYSQL_LOG(); - /* + int open(const char *opt_name); + void close(); + int log(THD *thd, my_xid xid); + void unlog(ulong cookie, my_xid xid); + int recover(IO_CACHE *log, Format_description_log_event *fdle); + + /* These describe the log's format. This is used only for relay logs. _for_exec is used by the SQL thread, _for_queue by the I/O thread. It's necessary to have 2 distinct objects, because the I/O thread may be reading @@ -145,7 +255,7 @@ public: { #ifndef DBUG_OFF char buf1[22],buf2[22]; -#endif +#endif DBUG_ENTER("harvest_bytes_written"); (*counter)+=bytes_written; DBUG_PRINT("info",("counter: %s bytes_written: %s", llstr(*counter,buf1), @@ -162,18 +272,36 @@ public: bool no_auto_events_arg, ulong max_size); void init_pthread_objects(); void cleanup(); - bool open(const char *log_name,enum_log_type log_type, - const char *new_name, const char *index_file_name_arg, + bool open(const char *log_name, + enum_log_type log_type, + const char *new_name, enum cache_type io_cache_type_arg, bool no_auto_events_arg, ulong max_size, bool null_created); + const char *generate_name(const char *log_name, const char *suffix, + bool strip_ext, char *buff); + /* simplified open_xxx wrappers for the gigantic open above */ + bool open_query_log(const char *log_name) + { + char buf[FN_REFLEN]; + return open(generate_name(log_name, ".log", 0, buf), + LOG_NORMAL, 0, WRITE_CACHE, 0, 0, 0); + } + bool open_slow_log(const char *log_name) + { + char buf[FN_REFLEN]; + return open(generate_name(log_name, "-slow.log", 0, buf), + LOG_NORMAL, 0, WRITE_CACHE, 0, 0, 0); + } + bool open_index_file(const char *index_file_name_arg, + const char *log_name); void new_file(bool need_lock= 1); bool write(THD *thd, enum enum_server_command command, const char *format,...); bool write(THD *thd, const char *query, uint query_length, time_t query_start=0); bool write(Log_event* event_info); // binary log write - bool write(THD *thd, IO_CACHE *cache, bool commit_or_rollback); + bool write(THD *thd, IO_CACHE *cache); /* v stands for vector @@ -181,20 +309,18 @@ public: */ bool appendv(const char* buf,uint len,...); bool append(Log_event* ev); - + int generate_new_name(char *new_name,const char *old_name); void make_log_name(char* buf, const char* log_ident); bool is_active(const char* log_file_name); int update_log_index(LOG_INFO* linfo, bool need_update_threads); - int purge_logs(const char *to_log, bool included, + int purge_logs(const char *to_log, bool included, bool need_mutex, bool need_update_threads, ulonglong *decrease_log_space); int purge_logs_before_date(time_t purge_time); - int purge_first_log(struct st_relay_log_info* rli, bool included); + int purge_first_log(struct st_relay_log_info* rli, bool included); bool reset_logs(THD* thd); void close(uint exiting); - bool cut_spurious_tail(); - void report_pos_in_innodb(); // iterating through the log index file int find_log_pos(LOG_INFO* linfo, const char* log_name, @@ -487,6 +613,10 @@ typedef struct system_status_var ulong ha_rollback_count; ulong ha_update_count; ulong ha_write_count; + ulong ha_prepare_count; + ulong ha_discover_count; + ulong ha_savepoint_count; + ulong ha_savepoint_rollback_count; /* KEY_CACHE parts. These are copies of the original */ ulong key_blocks_changed; @@ -765,6 +895,14 @@ private: Statement *last_found_statement; }; +struct st_savepoint { + struct st_savepoint *prev; + char *name; + uint length, nht; +}; + +enum xa_states {XA_NOTR=0, XA_ACTIVE, XA_IDLE, XA_PREPARED}; +extern const char *xa_state_names[]; /* A registry for item tree transformations performed during @@ -907,15 +1045,15 @@ public: thr_lock_type update_lock_default; delayed_insert *di; my_bool tablespace_op; /* This is TRUE in DISCARD/IMPORT TABLESPACE */ + /* container for handler's private per-connection data */ + void *ha_data[MAX_HA]; struct st_transactions { - IO_CACHE trans_log; // Inited ONLY if binlog is open ! + SAVEPOINT *savepoints; THD_TRANS all; // Trans since BEGIN WORK THD_TRANS stmt; // Trans for current statement - uint bdb_lock_count; -#ifdef HAVE_NDBCLUSTER_DB - void* thd_ndb; -#endif bool on; + XID xid; + enum xa_states xa_state; /* Tables changed in transaction (that must be invalidated in query cache). List contain only transactional tables, that not invalidated in query @@ -926,8 +1064,18 @@ public: void cleanup() { changed_tables = 0; +#ifdef USING_TRANSACTIONS free_root(&mem_root,MYF(MY_KEEP_PREALLOC)); +#endif } +#ifdef USING_TRANSACTIONS + st_transactions() + { + bzero((char*)this, sizeof(*this)); + xid.null(); + init_sql_alloc(&mem_root, ALLOC_ROOT_MIN_BLOCK_SIZE, 0); + } +#endif } transaction; Field *dupp_field; #ifndef __WIN__ @@ -1126,7 +1274,7 @@ public: inline ulonglong insert_id(void) { if (!last_insert_id_used) - { + { last_insert_id_used=1; current_insert_id=last_insert_id; } @@ -1135,13 +1283,11 @@ public: inline ulonglong found_rows(void) { return limit_found_rows; - } + } inline bool active_transaction() { -#ifdef USING_TRANSACTIONS - return (transaction.all.bdb_tid != 0 || - transaction.all.innodb_active_trans != 0 || - transaction.all.ndb_tid != 0); +#ifdef USING_TRANSACTIONS + return server_status & SERVER_STATUS_IN_TRANS; #else return 0; #endif @@ -1662,7 +1808,7 @@ class multi_delete :public select_result_interceptor ha_rows deleted, found; uint num_of_tables; int error; - bool do_delete, transactional_tables, log_delayed, normal_tables; + bool do_delete, transactional_tables, normal_tables; public: multi_delete(THD *thd, TABLE_LIST *dt, uint num_of_tables); ~multi_delete(); @@ -1689,7 +1835,8 @@ class multi_update :public select_result_interceptor uint table_count; Copy_field *copy_field; enum enum_duplicates handle_duplicates; - bool do_update, trans_safe, transactional_tables, log_delayed, ignore; + bool do_update, trans_safe, transactional_tables; + bool do_update, trans_safe, transactional_tables, ignore; public: multi_update(THD *thd_arg, TABLE_LIST *ut, TABLE_LIST *leaves_list, diff --git a/sql/sql_delete.cc b/sql/sql_delete.cc index 15fbfcf928b..0033e419351 100644 --- a/sql/sql_delete.cc +++ b/sql/sql_delete.cc @@ -36,8 +36,8 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds, TABLE *table; SQL_SELECT *select=0; READ_RECORD info; - bool using_limit=limit != HA_POS_ERROR; - bool transactional_table, log_delayed, safe_update, const_cond; + bool using_limit=limit != HA_POS_ERROR; + bool transactional_table, safe_update, const_cond; ha_rows deleted; SELECT_LEX *select_lex= &thd->lex->select_lex; DBUG_ENTER("mysql_delete"); @@ -232,7 +232,6 @@ cleanup: delete select; transactional_table= table->file->has_transactions(); - log_delayed= (transactional_table || table->s->tmp_table); /* We write to the binary log even if we deleted no row, because maybe the user is using this command to ensure that a table is clean on master *and @@ -248,11 +247,11 @@ cleanup: if (error <= 0) thd->clear_error(); Query_log_event qinfo(thd, thd->query, thd->query_length, - log_delayed, FALSE); + transactional_table, FALSE); if (mysql_bin_log.write(&qinfo) && transactional_table) error=1; } - if (!log_delayed) + if (!transactional_table) thd->options|=OPTION_STATUS_NO_TRANS_UPDATE; } if (transactional_table) @@ -397,7 +396,7 @@ multi_delete::multi_delete(THD *thd_arg, TABLE_LIST *dt, uint num_of_tables_arg) : delete_tables(dt), thd(thd_arg), deleted(0), found(0), num_of_tables(num_of_tables_arg), error(0), - do_delete(0), transactional_tables(0), log_delayed(0), normal_tables(0) + do_delete(0), transactional_tables(0), normal_tables(0) { tempfiles = (Unique **) sql_calloc(sizeof(Unique *) * (num_of_tables-1)); } @@ -444,9 +443,7 @@ multi_delete::initialize_tables(JOIN *join) tbl->no_cache= 1; tbl->used_keys.clear_all(); if (tbl->file->has_transactions()) - log_delayed= transactional_tables= 1; - else if (tbl->s->tmp_table != NO_TMP_TABLE) - log_delayed= 1; + transactional_tables= 1; else normal_tables= 1; } @@ -669,14 +666,14 @@ bool multi_delete::send_eof() if (error <= 0) thd->clear_error(); Query_log_event qinfo(thd, thd->query, thd->query_length, - log_delayed, FALSE); + transactional_tables, FALSE); if (mysql_bin_log.write(&qinfo) && !normal_tables) local_error=1; // Log write failed: roll back the SQL statement } - if (!log_delayed) + if (!transactional_tables) thd->options|=OPTION_STATUS_NO_TRANS_UPDATE; } - /* Commit or rollback the current SQL statement */ + /* Commit or rollback the current SQL statement */ if (transactional_tables) if (ha_autocommit_or_rollback(thd,local_error > 0)) local_error=1; @@ -767,7 +764,7 @@ bool mysql_truncate(THD *thd, TABLE_LIST *table_list, bool dont_send_ok) *fn_ext(path)=0; // Remove the .frm extension error= ha_create_table(path,&create_info,1); - query_cache_invalidate3(thd, table_list, 0); + query_cache_invalidate3(thd, table_list, 0); end: if (!dont_send_ok) @@ -778,7 +775,7 @@ end: { thd->clear_error(); Query_log_event qinfo(thd, thd->query, thd->query_length, - thd->tmp_table, FALSE); + 0, FALSE); mysql_bin_log.write(&qinfo); } send_ok(thd); // This should return record count diff --git a/sql/sql_insert.cc b/sql/sql_insert.cc index fa6f1e05dc6..7d467d215be 100644 --- a/sql/sql_insert.cc +++ b/sql/sql_insert.cc @@ -168,7 +168,7 @@ bool mysql_insert(THD *thd,TABLE_LIST *table_list, runs without --log-update or --log-bin). */ bool log_on= (thd->options & OPTION_BIN_LOG) || (!(thd->master_access & SUPER_ACL)); - bool transactional_table, log_delayed; + bool transactional_table; uint value_count; ulong counter = 1; ulonglong id; @@ -433,7 +433,7 @@ bool mysql_insert(THD *thd,TABLE_LIST *table_list, /* Invalidate the table in the query cache if something changed. For the transactional algorithm to work the invalidation must be - before binlog writing and ha_autocommit_... + before binlog writing and ha_autocommit_or_rollback */ if (info.copied || info.deleted || info.updated) { @@ -442,7 +442,6 @@ bool mysql_insert(THD *thd,TABLE_LIST *table_list, transactional_table= table->file->has_transactions(); - log_delayed= (transactional_table || table->s->tmp_table); if ((info.copied || info.deleted || info.updated) && (error <= 0 || !transactional_table)) { @@ -451,11 +450,11 @@ bool mysql_insert(THD *thd,TABLE_LIST *table_list, if (error <= 0) thd->clear_error(); Query_log_event qinfo(thd, thd->query, thd->query_length, - log_delayed, FALSE); + transactional_table, FALSE); if (mysql_bin_log.write(&qinfo) && transactional_table) error=1; } - if (!log_delayed) + if (!transactional_table) thd->options|=OPTION_STATUS_NO_TRANS_UPDATE; } if (transactional_table) @@ -1970,7 +1969,7 @@ bool select_insert::send_eof() /* We must invalidate the table in the query cache before binlog writing - and ha_autocommit_... + and ha_autocommit_or_rollback */ if (info.copied || info.deleted || info.updated) diff --git a/sql/sql_lex.cc b/sql/sql_lex.cc index 30c657c3b79..15822aa43d2 100644 --- a/sql/sql_lex.cc +++ b/sql/sql_lex.cc @@ -141,7 +141,7 @@ void lex_start(THD *thd, uchar *buf,uint length) lex->view_prepare_mode= FALSE; lex->derived_tables= 0; lex->lock_option= TL_READ; - lex->found_colon= 0; + lex->found_semicolon= 0; lex->safe_to_cache_query= 1; lex->time_zone_tables_used= 0; lex->leaf_tables_insert= lex->proc_table= lex->query_tables= 0; @@ -949,7 +949,7 @@ int yylex(void *arg, void *yythd) (thd->command != COM_PREPARE)) { lex->safe_to_cache_query= 0; - lex->found_colon= (char*) lex->ptr; + lex->found_semicolon=(char*) lex->ptr; thd->server_status|= SERVER_MORE_RESULTS_EXISTS; lex->next_state= MY_LEX_END; return (END_OF_INPUT); diff --git a/sql/sql_lex.h b/sql/sql_lex.h index 5d232d60e79..c7830a21fcf 100644 --- a/sql/sql_lex.h +++ b/sql/sql_lex.h @@ -87,6 +87,8 @@ enum enum_sql_command { SQLCOM_PREPARE, SQLCOM_EXECUTE, SQLCOM_DEALLOCATE_PREPARE, SQLCOM_CREATE_VIEW, SQLCOM_DROP_VIEW, SQLCOM_CREATE_TRIGGER, SQLCOM_DROP_TRIGGER, + SQLCOM_XA_START, SQLCOM_XA_END, SQLCOM_XA_PREPARE, + SQLCOM_XA_COMMIT, SQLCOM_XA_ROLLBACK, SQLCOM_XA_RECOVER, /* This should be the last !!! */ SQLCOM_END @@ -636,6 +638,9 @@ struct st_trg_chistics extern sys_var_long_ptr trg_new_row_fake_var; +enum xa_option_words {XA_NONE, XA_JOIN, XA_RESUME, XA_ONE_PHASE, + XA_SUSPEND, XA_FOR_MIGRATE}; + /* The state of the lex parsing. This is saved in the THD struct */ typedef struct st_lex @@ -655,12 +660,12 @@ typedef struct st_lex char *backup_dir; /* For RESTORE/BACKUP */ char* to_log; /* For PURGE MASTER LOGS TO */ char* x509_subject,*x509_issuer,*ssl_cipher; - char* found_colon; /* For multi queries - next query */ + char* found_semicolon; /* For multi queries - next query */ String *wild; sql_exchange *exchange; select_result *result; Item *default_value, *on_update_value; - LEX_STRING *comment, name_and_length; + LEX_STRING comment, ident; LEX_USER *grant_user; gptr yacc_yyss,yacc_yyvs; THD *thd; @@ -690,7 +695,6 @@ typedef struct st_lex List<LEX_STRING> view_list; // view list (list of field names in view) SQL_LIST proc_list, auxilliary_table_list, save_list; create_field *last_field; - char *savepoint_name; // Transaction savepoint id udf_func udf; HA_CHECK_OPT check_opt; // check/repair options HA_CREATE_INFO create_info; @@ -704,7 +708,10 @@ typedef struct st_lex enum enum_duplicates duplicates; enum enum_tx_isolation tx_isolation; enum enum_ha_read_modes ha_read_mode; + union { enum ha_rkey_function ha_rkey_mode; + enum xa_option_words xa_opt; + }; enum enum_var_type option_type; enum enum_view_create_mode create_view_mode; enum enum_drop_mode drop_mode; diff --git a/sql/sql_load.cc b/sql/sql_load.cc index 7858632fff2..efd914003dc 100644 --- a/sql/sql_load.cc +++ b/sql/sql_load.cc @@ -105,7 +105,7 @@ bool mysql_load(THD *thd,sql_exchange *ex,TABLE_LIST *table_list, char *tdb= thd->db ? thd->db : db; // Result is never null ulong skip_lines= ex->skip_lines; int res; - bool transactional_table, log_delayed; + bool transactional_table; DBUG_ENTER("mysql_load"); #ifdef EMBEDDED_LIBRARY @@ -133,7 +133,6 @@ bool mysql_load(THD *thd,sql_exchange *ex,TABLE_LIST *table_list, } table= table_list->table; transactional_table= table->file->has_transactions(); - log_delayed= (transactional_table || table->s->tmp_table); if (!fields.elements) { @@ -263,7 +262,7 @@ bool mysql_load(THD *thd,sql_exchange *ex,TABLE_LIST *table_list, lf_info.handle_dup = handle_duplicates; lf_info.wrote_create_file = 0; lf_info.last_pos_in_file = HA_POS_ERROR; - lf_info.log_delayed= log_delayed; + lf_info.log_delayed= transactional_table; read_info.set_io_cache_arg((void*) &lf_info); } #endif /*!EMBEDDED_LIBRARY*/ @@ -365,7 +364,7 @@ bool mysql_load(THD *thd,sql_exchange *ex,TABLE_LIST *table_list, /* If the file was not empty, wrote_create_file is true */ if (lf_info.wrote_create_file) { - Delete_file_log_event d(thd, db, log_delayed); + Delete_file_log_event d(thd, db, transactional_table); mysql_bin_log.write(&d); } } @@ -377,7 +376,7 @@ bool mysql_load(THD *thd,sql_exchange *ex,TABLE_LIST *table_list, (ulong) (info.records - info.copied), (ulong) thd->cuted_fields); send_ok(thd,info.copied+info.deleted,0L,name); - if (!log_delayed) + if (!transactional_table) thd->options|=OPTION_STATUS_NO_TRANS_UPDATE; #ifndef EMBEDDED_LIBRARY if (mysql_bin_log.is_open()) @@ -387,16 +386,16 @@ bool mysql_load(THD *thd,sql_exchange *ex,TABLE_LIST *table_list, block will be logged only after Execute_load_log_event (which is wrong), when read_info is destroyed. */ - read_info.end_io_cache(); + read_info.end_io_cache(); if (lf_info.wrote_create_file) { - Execute_load_log_event e(thd, db, log_delayed); + Execute_load_log_event e(thd, db, transactional_table); mysql_bin_log.write(&e); } } #endif /*!EMBEDDED_LIBRARY*/ if (transactional_table) - error=ha_autocommit_or_rollback(thd,error); + error=ha_autocommit_or_rollback(thd,error); err: if (thd->lock) @@ -404,7 +403,7 @@ err: mysql_unlock_tables(thd, thd->lock); thd->lock=0; } - thd->abort_on_warning= 0; + thd->abort_on_warning= 0; DBUG_RETURN(error); } @@ -732,12 +731,11 @@ READ_INFO::READ_INFO(File file_par, uint tot_length, CHARSET_INFO *cs, my_free((gptr) buffer,MYF(0)); /* purecov: inspected */ error=1; } - else + else { /* init_io_cache() will not initialize read_function member - if the cache is READ_NET. The reason is explained in - mysys/mf_iocache.c. So we work around the problem with a + if the cache is READ_NET. So we work around the problem with a manual assignment */ need_end_io_cache = 1; diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc index 0a0258465fb..6f29e8b848d 100644 --- a/sql/sql_parse.cc +++ b/sql/sql_parse.cc @@ -83,6 +83,10 @@ const char *command_name[]={ "Error" // Last command number }; +const char *xa_state_names[]={ + "NON-EXISTING", "ACTIVE", "IDLE", "PREPARED" +}; + static char empty_c_string[1]= {0}; // Used for not defined 'db' #ifdef __WIN__ @@ -152,7 +156,7 @@ static bool begin_trans(THD *thd) OPTION_BEGIN); thd->server_status|= SERVER_STATUS_IN_TRANS; if (lex->start_transaction_opt & MYSQL_START_TRANS_OPT_WITH_CONS_SNAPSHOT) - error= ha_start_consistent_snapshot(thd); + error= ha_start_consistent_snapshot(thd); } return error; } @@ -555,6 +559,12 @@ void free_max_user_conn(void) sql_command is actually set to SQLCOM_END sometimes so we need the +1 to include it in the array. + + numbers are: + 0 - read-only query + != 0 - query that may change a table + 2 - query that returns meaningful ROW_COUNT() - + a number of modified rows */ char uc_update_queries[SQLCOM_END+1]; @@ -566,23 +576,23 @@ void init_update_queries(void) uc_update_queries[SQLCOM_CREATE_TABLE]=1; uc_update_queries[SQLCOM_CREATE_INDEX]=1; uc_update_queries[SQLCOM_ALTER_TABLE]=1; - uc_update_queries[SQLCOM_UPDATE]=1; - uc_update_queries[SQLCOM_INSERT]=1; - uc_update_queries[SQLCOM_INSERT_SELECT]=1; - uc_update_queries[SQLCOM_DELETE]=1; + uc_update_queries[SQLCOM_UPDATE]=2; + uc_update_queries[SQLCOM_UPDATE_MULTI]=2; + uc_update_queries[SQLCOM_INSERT]=2; + uc_update_queries[SQLCOM_INSERT_SELECT]=2; + uc_update_queries[SQLCOM_DELETE]=2; + uc_update_queries[SQLCOM_DELETE_MULTI]=2; uc_update_queries[SQLCOM_TRUNCATE]=1; uc_update_queries[SQLCOM_DROP_TABLE]=1; uc_update_queries[SQLCOM_LOAD]=1; uc_update_queries[SQLCOM_CREATE_DB]=1; uc_update_queries[SQLCOM_DROP_DB]=1; - uc_update_queries[SQLCOM_REPLACE]=1; - uc_update_queries[SQLCOM_REPLACE_SELECT]=1; + uc_update_queries[SQLCOM_REPLACE]=2; + uc_update_queries[SQLCOM_REPLACE_SELECT]=2; uc_update_queries[SQLCOM_RENAME_TABLE]=1; uc_update_queries[SQLCOM_BACKUP_TABLE]=1; uc_update_queries[SQLCOM_RESTORE_TABLE]=1; - uc_update_queries[SQLCOM_DELETE_MULTI]=1; uc_update_queries[SQLCOM_DROP_INDEX]=1; - uc_update_queries[SQLCOM_UPDATE_MULTI]=1; uc_update_queries[SQLCOM_CREATE_VIEW]=1; uc_update_queries[SQLCOM_DROP_VIEW]=1; } @@ -1189,24 +1199,25 @@ extern "C" pthread_handler_decl(handle_bootstrap,arg) We don't need to obtain LOCK_thread_count here because in bootstrap mode we have only one thread. */ - thd->query_id=query_id++; - if (mqh_used && thd->user_connect && check_mqh(thd, SQLCOM_END)) - { - thd->net.error = 0; - close_thread_tables(thd); // Free tables - free_root(thd->mem_root,MYF(MY_KEEP_PREALLOC)); - break; - } + thd->query_id=next_query_id(); mysql_parse(thd,thd->query,length); close_thread_tables(thd); // Free tables if (thd->is_fatal_error) break; free_root(thd->mem_root,MYF(MY_KEEP_PREALLOC)); +#ifdef USING_TRANSACTIONS free_root(&thd->transaction.mem_root,MYF(MY_KEEP_PREALLOC)); +#endif } /* thd->fatal_error should be set in case something went wrong */ end: + bootstrap_error= thd->is_fatal_error; + + net_end(&thd->net); + thd->cleanup(); + delete thd; + #ifndef EMBEDDED_LIBRARY (void) pthread_mutex_lock(&LOCK_thread_count); thread_count--; @@ -1215,7 +1226,7 @@ end: my_thread_end(); pthread_exit(0); #endif - DBUG_RETURN(0); // Never reached + DBUG_RETURN(0); } /* This works because items are allocated with sql_alloc() */ @@ -1281,21 +1292,17 @@ int mysql_table_dump(THD* thd, char* db, char* tbl_name, int fd) my_error(ER_GET_ERRNO, MYF(0), error); err: - close_thread_tables(thd); DBUG_RETURN(error); } /* Ends the current transaction and (maybe) begin the next First uint4 in packet is completion type - Remainder is savepoint name (if required) SYNOPSIS - mysql_endtrans() + end_trans_and_send_ok() thd Current thread completion Completion type - savepoint_name Savepoint when doing ROLLBACK_SAVEPOINT_NAME - or RELEASE_SAVEPOINT_NAME release (OUT) indicator for release operation RETURN @@ -1307,19 +1314,16 @@ enum enum_mysql_completiontype { COMMIT_RELEASE=-1, COMMIT=0, ROLLBACK=1, - SAVEPOINT_NAME_ROLLBACK=2, - SAVEPOINT_NAME_RELEASE=4, COMMIT_AND_CHAIN=6, ROLLBACK_AND_CHAIN=7 }; -int mysql_endtrans(THD *thd, enum enum_mysql_completiontype completion, - char *savepoint_name) +int end_trans_and_send_ok(THD *thd, enum enum_mysql_completiontype completion) { bool do_release= 0; int res= 0; LEX *lex= thd->lex; - DBUG_ENTER("mysql_endtrans"); + DBUG_ENTER("end_trans_and_send_ok"); switch (completion) { case COMMIT: @@ -1334,7 +1338,7 @@ int mysql_endtrans(THD *thd, enum enum_mysql_completiontype completion, send_ok(thd); break; case COMMIT_RELEASE: - do_release= 1; + do_release= 1; /* fall through */ case COMMIT_AND_CHAIN: res= end_active_trans(thd); if (!res && completion == COMMIT_AND_CHAIN) @@ -1343,7 +1347,7 @@ int mysql_endtrans(THD *thd, enum enum_mysql_completiontype completion, send_ok(thd); break; case ROLLBACK_RELEASE: - do_release= 1; + do_release= 1; /* fall through */ case ROLLBACK: case ROLLBACK_AND_CHAIN: { @@ -1360,8 +1364,8 @@ int mysql_endtrans(THD *thd, enum enum_mysql_completiontype completion, the error log; but we don't want users to wonder why they have this message in the error log, so we don't send it. */ - warn= (thd->options & OPTION_STATUS_NO_TRANS_UPDATE) && - !thd->slave_thread; + warn= (thd->options & OPTION_STATUS_NO_TRANS_UPDATE) && + !thd->slave_thread; } else res= -1; @@ -1369,7 +1373,7 @@ int mysql_endtrans(THD *thd, enum enum_mysql_completiontype completion, if (!res && (completion == ROLLBACK_AND_CHAIN)) res= begin_trans(thd); - if (!res) + if (!res) { if (warn) push_warning(thd, MYSQL_ERROR::WARN_LEVEL_WARN, @@ -1379,31 +1383,17 @@ int mysql_endtrans(THD *thd, enum enum_mysql_completiontype completion, } break; } - case SAVEPOINT_NAME_ROLLBACK: - if (!(res=ha_rollback_to_savepoint(thd, savepoint_name))) - { - if ((thd->options & OPTION_STATUS_NO_TRANS_UPDATE) && !thd->slave_thread) - push_warning(thd, MYSQL_ERROR::WARN_LEVEL_WARN, - ER_WARNING_NOT_COMPLETE_ROLLBACK, - ER(ER_WARNING_NOT_COMPLETE_ROLLBACK)); - send_ok(thd); - } - break; - case SAVEPOINT_NAME_RELEASE: - if (!(res=ha_release_savepoint_name(thd, savepoint_name))) - send_ok(thd); - break; default: res= -1; my_error(ER_UNKNOWN_COM_ERROR, MYF(0)); DBUG_RETURN(-1); } - + if (res < 0) my_error(thd->killed_errno(), MYF(0)); else if ((res == 0) && do_release) - thd->killed= THD::KILL_CONNECTION; - + thd->killed= THD::KILL_CONNECTION; + DBUG_RETURN(res); } @@ -1518,7 +1508,7 @@ bool dispatch_command(enum enum_server_command command, THD *thd, VOID(pthread_mutex_lock(&LOCK_thread_count)); thd->query_id=query_id; if (command != COM_STATISTICS && command != COM_PING) - query_id++; + next_query_id(); thread_running++; /* TODO: set thd->lex->sql_command to SQLCOM_END here */ VOID(pthread_mutex_unlock(&LOCK_thread_count)); @@ -1674,9 +1664,9 @@ bool dispatch_command(enum enum_server_command command, THD *thd, DBUG_PRINT("query",("%-.4096s",thd->query)); mysql_parse(thd,thd->query, thd->query_length); - while (!thd->killed && thd->lex->found_colon && !thd->net.report_error) + while (!thd->killed && thd->lex->found_semicolon && !thd->net.report_error) { - char *packet= thd->lex->found_colon; + char *packet= thd->lex->found_semicolon; /* Multiple queries exits, execute them individually in embedded server - just store them to be executed later @@ -1696,7 +1686,7 @@ bool dispatch_command(enum enum_server_command command, THD *thd, VOID(pthread_mutex_lock(&LOCK_thread_count)); thd->query_length= length; thd->query= packet; - thd->query_id= query_id++; + thd->query_id= next_query_id(); /* TODO: set thd->lex->sql_command to SQLCOM_END here */ VOID(pthread_mutex_unlock(&LOCK_thread_count)); #ifndef EMBEDDED_LIBRARY @@ -2008,6 +1998,17 @@ bool dispatch_command(enum enum_server_command command, THD *thd, thd->proc_info="closing tables"; close_thread_tables(thd); /* Free tables */ } + /* + assume handlers auto-commit (if some doesn't - transaction handling + in MySQL should be redesigned to support it; it's a big change, + and it's not worth it - better to commit explicitly only writing + transactions, read-only ones should better take care of themselves. + saves some work in 2pc too) + see also sql_base.cc - close_thread_tables() + */ + bzero(&thd->transaction.stmt, sizeof(thd->transaction.stmt)); + if (!thd->active_transaction()) + thd->transaction.xid.null(); /* report error issued during command execution */ if (thd->killed_errno() && !thd->net.report_error) @@ -2404,7 +2405,7 @@ mysql_execute_command(THD *thd) */ if (opt_readonly && !(thd->slave_thread || (thd->master_access & SUPER_ACL)) && - (uc_update_queries[lex->sql_command] > 0)) + uc_update_queries[lex->sql_command]) { my_error(ER_OPTION_PREVENTS_STATEMENT, MYF(0), "--read-only"); DBUG_RETURN(-1); @@ -2670,7 +2671,7 @@ mysql_execute_command(THD *thd) { if (check_global_access(thd, REPL_SLAVE_ACL)) goto error; - res = show_binlog_events(thd); + res = mysql_show_binlog_events(thd); break; } #endif @@ -2705,7 +2706,7 @@ mysql_execute_command(THD *thd) check_access(thd, INDEX_ACL, first_table->db, &first_table->grant.privilege, 0, 0)) goto error; - res= mysql_assign_to_keycache(thd, first_table, &lex->name_and_length); + res= mysql_assign_to_keycache(thd, first_table, &lex->ident); break; } case SQLCOM_PRELOAD_KEYS: @@ -3326,7 +3327,7 @@ unsent_create_error: first_table->ancestor && first_table->ancestor->next_local); my_error(ER_VIEW_DELETE_MERGE_VIEW, MYF(0), first_table->view_db.str, first_table->view_name.str); - res= -1; + res= FALSE; break; } @@ -3351,7 +3352,6 @@ unsent_create_error: } else res= TRUE; - close_thread_tables(thd); break; } case SQLCOM_DROP_TABLE: @@ -3874,7 +3874,7 @@ unsent_create_error: */ if (check_db_used(thd, all_tables)) goto error; - res= mysql_ha_read(thd, first_table, lex->ha_read_mode, lex->backup_dir, + res= mysql_ha_read(thd, first_table, lex->ha_read_mode, lex->ident.str, lex->insert_list, lex->ha_rkey_mode, select_lex->where, select_lex->select_limit, select_lex->offset_limit); break; @@ -3882,32 +3882,116 @@ unsent_create_error: case SQLCOM_BEGIN: if (begin_trans(thd)) goto error; - else - send_ok(thd); + send_ok(thd); break; case SQLCOM_COMMIT: - if (mysql_endtrans(thd, lex->tx_release ? COMMIT_RELEASE : - lex->tx_chain ? COMMIT_AND_CHAIN : COMMIT, 0)) + if (end_trans_and_send_ok(thd, lex->tx_release ? COMMIT_RELEASE : + lex->tx_chain ? COMMIT_AND_CHAIN : COMMIT, 0)) goto error; break; case SQLCOM_ROLLBACK: - if (mysql_endtrans(thd, lex->tx_release ? ROLLBACK_RELEASE : - lex->tx_chain ? ROLLBACK_AND_CHAIN : ROLLBACK, 0)) + if (end_trans_and_send_ok(thd, lex->tx_release ? ROLLBACK_RELEASE : + lex->tx_chain ? ROLLBACK_AND_CHAIN : ROLLBACK, + 0)) goto error; break; + case SQLCOM_RELEASE_SAVEPOINT: + SAVEPOINT **sv; + for (sv=&thd->transaction.savepoints; *sv; sv=&(*sv)->prev) + { + if (my_strnncoll(system_charset_info, + (uchar *)lex->ident.str, lex->ident.length, + (uchar *)(*sv)->name, (*sv)->length) == 0) + break; + } + if (*sv) + { + if (ha_release_savepoint(thd, *sv)) + res= TRUE; // cannot happen + *sv= 0; + } + else + { + my_error(ER_SP_DOES_NOT_EXIST, MYF(0), "SAVEPOINT", lex->ident.str); + res= TRUE; + } + break; + } case SQLCOM_ROLLBACK_TO_SAVEPOINT: - if (mysql_endtrans(thd, SAVEPOINT_NAME_ROLLBACK, lex->savepoint_name)) - goto error; + SAVEPOINT **sv; + for (sv=&thd->transaction.savepoints; *sv; sv=&(*sv)->prev) + { + if (my_strnncoll(system_charset_info, + (uchar *)lex->ident.str, lex->ident.length, + (uchar *)(*sv)->name, (*sv)->length) == 0) + break; + } + if (*sv) + { + if (ha_rollback_to_savepoint(thd, *sv)) + res= TRUE; // cannot happen + else + { + if ((thd->options & OPTION_STATUS_NO_TRANS_UPDATE) && + !thd->slave_thread) + push_warning(thd, MYSQL_ERROR::WARN_LEVEL_WARN, + ER_WARNING_NOT_COMPLETE_ROLLBACK, + ER(ER_WARNING_NOT_COMPLETE_ROLLBACK)); + send_ok(thd); + } + *sv= 0; + } + else + { + my_error(ER_SP_DOES_NOT_EXIST, MYF(0), "SAVEPOINT", lex->ident.str); + res= TRUE; + } break; + } case SQLCOM_SAVEPOINT: - if (!ha_savepoint(thd, lex->savepoint_name)) + if (!(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) || + !opt_using_transactions) send_ok(thd); else - goto error; - break; - case SQLCOM_RELEASE_SAVEPOINT: - if (mysql_endtrans(thd, SAVEPOINT_NAME_RELEASE, lex->savepoint_name)) - goto error; + { + SAVEPOINT **sv, *newsv; + for (sv=&thd->transaction.savepoints; *sv; sv=&(*sv)->prev) + { + if (my_strnncoll(system_charset_info, + (uchar *)lex->ident.str, lex->ident.length, + (uchar *)(*sv)->name, (*sv)->length) == 0) + break; + } + if (*sv) /* old savepoint of the same name exists */ + { + newsv=*sv; + ha_release_savepoint(thd, *sv); // it cannot fail + *sv=(*sv)->prev; + } + else if ((newsv=(SAVEPOINT *) alloc_root(&thd->transaction.mem_root, + savepoint_alloc_size)) == 0) + { + my_error(ER_OUT_OF_RESOURCES, MYF(0)); + res= TRUE; + break; + } + newsv->name=strmake_root(&thd->transaction.mem_root, + lex->ident.str, lex->ident.length); + newsv->length=lex->ident.length; + /* + if we'll get an error here, don't add new savepoint to the list. + we'll lose a little bit of memory in transaction mem_root, but it'll + be free'd when transaction ends anyway + */ + if (ha_savepoint(thd, newsv)) + res= TRUE; + else + { + newsv->prev=thd->transaction.savepoints; + thd->transaction.savepoints=newsv; + send_ok(thd); + } + } break; case SQLCOM_CREATE_PROCEDURE: case SQLCOM_CREATE_SPFUNCTION: @@ -3924,12 +4008,12 @@ unsent_create_error: lex->sphead= 0; goto error; } - + if (!lex->sphead->m_db.str || !lex->sphead->m_db.str[0]) { lex->sphead->m_db.length= strlen(thd->db); - lex->sphead->m_db.str= strmake_root(thd->mem_root, thd->db, - lex->sphead->m_db.length); + lex->sphead->m_db.str= strmake_root(thd->mem_root, thd->db, + lex->sphead->m_db.length); } name= lex->sphead->name(&namelen); @@ -4284,7 +4368,157 @@ unsent_create_error: res= mysql_create_or_drop_trigger(thd, all_tables, 0); break; } - default: /* Impossible */ + case SQLCOM_XA_START: + if (thd->transaction.xa_state == XA_IDLE && thd->lex->xa_opt == XA_RESUME) + { + if (! thd->transaction.xid.eq(&thd->lex->ident)) + { + my_error(ER_XAER_NOTA, MYF(0)); + break; + } + thd->transaction.xa_state=XA_ACTIVE; + send_ok(thd); + res=TRUE; + break; + } + if (thd->lex->ident.length > MAXGTRIDSIZE || thd->lex->xa_opt != XA_NONE) + { // JOIN is not supported yet. TODO + my_error(ER_XAER_INVAL, MYF(0)); + break; + } + if (thd->transaction.xa_state != XA_NOTR) + { + my_error(ER_XAER_RMFAIL, MYF(0), + xa_state_names[thd->transaction.xa_state]); + break; + } + if (thd->active_transaction() || thd->locked_tables) + { + my_error(ER_XAER_OUTSIDE, MYF(0)); + break; + } + DBUG_ASSERT(thd->transaction.xid.is_null()); + thd->transaction.xa_state=XA_ACTIVE; + thd->transaction.xid.set(&thd->lex->ident); + thd->options= ((thd->options & (ulong) ~(OPTION_STATUS_NO_TRANS_UPDATE)) | + OPTION_BEGIN); + thd->server_status|= SERVER_STATUS_IN_TRANS; + send_ok(thd); + res=TRUE; + break; + case SQLCOM_XA_END: + /* fake it */ + if (thd->lex->xa_opt != XA_NONE) + { // SUSPEND and FOR MIGRATE are not supported yet. TODO + my_error(ER_XAER_INVAL, MYF(0)); + break; + } + if (thd->transaction.xa_state != XA_ACTIVE) + { + my_error(ER_XAER_RMFAIL, MYF(0), + xa_state_names[thd->transaction.xa_state]); + break; + } + if (!thd->transaction.xid.eq(&thd->lex->ident)) + { + my_error(ER_XAER_NOTA, MYF(0)); + break; + } + thd->transaction.xa_state=XA_IDLE; + send_ok(thd); + res=TRUE; + break; + case SQLCOM_XA_PREPARE: + if (thd->transaction.xa_state != XA_IDLE) + { + my_error(ER_XAER_RMFAIL, MYF(0), + xa_state_names[thd->transaction.xa_state]); + break; + } + if (!thd->transaction.xid.eq(&thd->lex->ident)) + { + my_error(ER_XAER_NOTA, MYF(0)); + break; + } + if (ha_prepare(thd)) + { + my_error(ER_XA_RBROLLBACK, MYF(0)); + thd->transaction.xa_state=XA_NOTR; + break; + } + res=TRUE; + thd->transaction.xa_state=XA_PREPARED; + send_ok(thd); + break; + case SQLCOM_XA_COMMIT: + if (!thd->transaction.xid.eq(&thd->lex->ident)) + { + if (!(res= !ha_commit_or_rollback_by_xid(&thd->lex->ident, 1))) + my_error(ER_XAER_NOTA, MYF(0)); + break; + } + if (thd->transaction.xa_state == XA_IDLE && thd->lex->xa_opt == XA_ONE_PHASE) + { + int r; + if ((r= ha_commit(thd))) + my_error(r == 1 ? ER_XA_RBROLLBACK : ER_XAER_RMERR, MYF(0)); + else + { + send_ok(thd); + res= TRUE; + } + } + else + if (thd->transaction.xa_state == XA_PREPARED && thd->lex->xa_opt == XA_NONE) + { + if (ha_commit_one_phase(thd, 1)) + my_error(ER_XAER_RMERR, MYF(0)); + else + { + send_ok(thd); + res= TRUE; + } + } + else + { + my_error(ER_XAER_RMFAIL, MYF(0), + xa_state_names[thd->transaction.xa_state]); + break; + } + thd->options&= ~(ulong) (OPTION_BEGIN | OPTION_STATUS_NO_TRANS_UPDATE); + thd->server_status&= ~SERVER_STATUS_IN_TRANS; + thd->transaction.xa_state=XA_NOTR; + break; + case SQLCOM_XA_ROLLBACK: + if (!thd->transaction.xid.eq(&thd->lex->ident)) + { + if (!(res= !ha_commit_or_rollback_by_xid(&thd->lex->ident, 0))) + my_error(ER_XAER_NOTA, MYF(0)); + break; + } + if (thd->transaction.xa_state != XA_IDLE && + thd->transaction.xa_state != XA_PREPARED) + { + my_error(ER_XAER_RMFAIL, MYF(0), + xa_state_names[thd->transaction.xa_state]); + break; + } + if (ha_rollback(thd)) + my_error(ER_XAER_RMERR, MYF(0)); + else + { + send_ok(thd); + res= TRUE; + } + thd->options&= ~(ulong) (OPTION_BEGIN | OPTION_STATUS_NO_TRANS_UPDATE); + thd->server_status&= ~SERVER_STATUS_IN_TRANS; + thd->transaction.xa_state=XA_NOTR; + break; + case SQLCOM_XA_RECOVER: + res= !mysql_xa_recover(thd); + break; + default: + DBUG_ASSERT(0); /* Impossible */ send_ok(thd); break; } @@ -4319,22 +4553,10 @@ unsent_create_error: the statement is not DELETE, INSERT or UPDATE (or a CALL executing such a statement), but -1 is what JDBC and ODBC wants. */ - switch (lex->sql_command) { - case SQLCOM_UPDATE: - case SQLCOM_UPDATE_MULTI: - case SQLCOM_REPLACE: - case SQLCOM_INSERT: - case SQLCOM_REPLACE_SELECT: - case SQLCOM_INSERT_SELECT: - case SQLCOM_DELETE: - case SQLCOM_DELETE_MULTI: - case SQLCOM_CALL: - break; - default: + if (lex->sql_command != SQLCOM_CALL && uc_update_queries[lex->sql_command]<2) thd->row_count_func= -1; - } goto cleanup; - + error: res= 1; @@ -5112,17 +5334,7 @@ bool add_field_to_list(THD *thd, char *field_name, enum_field_types type, new_field->charset=cs; new_field->geom_type= (Field::geometry_type) uint_geom_type; - if (!comment) - { - new_field->comment.str=0; - new_field->comment.length=0; - } - else - { - /* In this case comment is always of type Item_string */ - new_field->comment.str= (char*) comment->str; - new_field->comment.length=comment->length; - } + new_field->comment=*comment; /* Set flag if this field doesn't have a default value Enum values has always the first value as a default (set in diff --git a/sql/sql_repl.cc b/sql/sql_repl.cc index 8ba92015535..ebaef722af2 100644 --- a/sql/sql_repl.cc +++ b/sql/sql_repl.cc @@ -25,49 +25,34 @@ int max_binlog_dump_events = 0; // unlimited my_bool opt_sporadic_binlog_dump_fail = 0; static int binlog_dump_count = 0; -int check_binlog_magic(IO_CACHE* log, const char** errmsg) -{ - char magic[4]; - DBUG_ASSERT(my_b_tell(log) == 0); - - if (my_b_read(log, (byte*) magic, sizeof(magic))) - { - *errmsg = "I/O error reading the header from the binary log"; - sql_print_error("%s, errno=%d, io cache code=%d", *errmsg, my_errno, - log->error); - return 1; - } - if (memcmp(magic, BINLOG_MAGIC, sizeof(magic))) - { - *errmsg = "Binlog has bad magic number; It's not a binary log file that can be used by this version of MySQL"; - return 1; - } - return 0; -} - - /* +/* fake_rotate_event() builds a fake (=which does not exist physically in any binlog) Rotate event, which contains the name of the binlog we are going to send to the slave (because the slave may not know it if it just asked for MASTER_LOG_FILE='', MASTER_LOG_POS=4). - < 4.0.14, fake_rotate_event() was called only if the requested pos was - 4. After this version we always call it, so that a 3.23.58 slave can rely on + < 4.0.14, fake_rotate_event() was called only if the requested pos was 4. + After this version we always call it, so that a 3.23.58 slave can rely on it to detect if the master is 4.0 (and stop) (the _fake_ Rotate event has zeros in the good positions which, by chance, make it possible for the 3.23 slave to detect that this event is unexpected) (this is luck which happens because the master and slave disagree on the size of the header of Log_event). - - Relying on the event length of the Rotate event instead of these well-placed - zeros was not possible as Rotate events have a variable-length part. + + Relying on the event length of the Rotate event instead of these + well-placed zeros was not possible as Rotate events have a variable-length + part. */ static int fake_rotate_event(NET* net, String* packet, char* log_file_name, - ulonglong position, const char** errmsg) + ulonglong position, int flags, const char** errmsg) { DBUG_ENTER("fake_rotate_event"); - char header[LOG_EVENT_HEADER_LEN], buf[ROTATE_HEADER_LEN]; - memset(header, 0, 4); // 'when' (the timestamp) does not matter, is set to 0 + char header[LOG_EVENT_HEADER_LEN], buf[ROTATE_HEADER_LEN+100]; + /* + 'when' (the timestamp) is set to 0 so that slave could distinguish between + real and fake Rotate events (if necessary) + */ + memset(header, 0, 4); header[EVENT_TYPE_OFFSET] = ROTATE_EVENT; char* p = log_file_name+dirname_length(log_file_name); @@ -75,11 +60,11 @@ static int fake_rotate_event(NET* net, String* packet, char* log_file_name, ulong event_len = ident_len + LOG_EVENT_HEADER_LEN + ROTATE_HEADER_LEN; int4store(header + SERVER_ID_OFFSET, server_id); int4store(header + EVENT_LEN_OFFSET, event_len); - int2store(header + FLAGS_OFFSET, 0); - + int2store(header + FLAGS_OFFSET, flags); + // TODO: check what problems this may cause and fix them int4store(header + LOG_POS_OFFSET, 0); - + packet->append(header, sizeof(header)); int8store(buf+R_POS_OFFSET,position); packet->append(buf, ROTATE_HEADER_LEN); @@ -164,41 +149,6 @@ static int send_file(THD *thd) } -File open_binlog(IO_CACHE *log, const char *log_file_name, - const char **errmsg) -{ - File file; - DBUG_ENTER("open_binlog"); - - if ((file = my_open(log_file_name, O_RDONLY | O_BINARY, MYF(MY_WME))) < 0) - { - sql_print_error("Failed to open log (\ -file '%s', errno %d)", log_file_name, my_errno); - *errmsg = "Could not open log file"; // This will not be sent - goto err; - } - if (init_io_cache(log, file, IO_SIZE*2, READ_CACHE, 0, 0, - MYF(MY_WME | MY_DONT_CHECK_FILESIZE))) - { - sql_print_error("Failed to create a cache on log (\ -file '%s')", log_file_name); - *errmsg = "Could not open log file"; // This will not be sent - goto err; - } - if (check_binlog_magic(log,errmsg)) - goto err; - DBUG_RETURN(file); - -err: - if (file >= 0) - { - my_close(file,MYF(0)); - end_io_cache(log); - } - DBUG_RETURN(-1); -} - - /* Adjust the position pointer in the binary log file for all running slaves @@ -330,7 +280,7 @@ bool purge_master_logs_before_date(THD* thd, time_t purge_time) int test_for_non_eof_log_read_errors(int error, const char **errmsg) { - if (error == LOG_READ_EOF) + if (error == LOG_READ_EOF) return 0; my_errno= ER_MASTER_FATAL_ERROR_READING_BINLOG; switch (error) { @@ -375,6 +325,7 @@ void mysql_binlog_send(THD* thd, char* log_ident, my_off_t pos, const char *errmsg = "Unknown error"; NET* net = &thd->net; pthread_mutex_t *log_lock; + bool binlog_can_be_corrupted= FALSE, rotate_was_found=FALSE; #ifndef DBUG_OFF int left_events = max_binlog_dump_events; #endif @@ -442,37 +393,38 @@ impossible position"; /* Tell the client about the log name with a fake Rotate event; - this is needed even if we also send a Format_description_log_event just - after, because that event does not contain the binlog's name. - Note that as this Rotate event is sent before Format_description_log_event, - the slave cannot have any info to understand this event's format, so the - header len of Rotate_log_event is FROZEN - (so in 5.0 it will have a header shorter than other events except - FORMAT_DESCRIPTION_EVENT). - Before 4.0.14 we called fake_rotate_event below only if - (pos == BIN_LOG_HEADER_SIZE), because if this is false then the slave + this is needed even if we also send a Format_description_log_event + just after, because that event does not contain the binlog's name. + Note that as this Rotate event is sent before + Format_description_log_event, the slave cannot have any info to + understand this event's format, so the header len of + Rotate_log_event is FROZEN (so in 5.0 it will have a header shorter + than other events except FORMAT_DESCRIPTION_EVENT). + Before 4.0.14 we called fake_rotate_event below only if (pos == + BIN_LOG_HEADER_SIZE), because if this is false then the slave already knows the binlog's name. - Since, we always call fake_rotate_event; if the slave already knew the log's - name (ex: CHANGE MASTER TO MASTER_LOG_FILE=...) this is useless but does - not harm much. It is nice for 3.23 (>=.58) slaves which test Rotate events - to see if the master is 4.0 (then they choose to stop because they can't - replicate 4.0); by always calling fake_rotate_event we are sure that - 3.23.58 and newer will detect the problem as soon as replication starts - (BUG#198). + Since, we always call fake_rotate_event; if the slave already knew + the log's name (ex: CHANGE MASTER TO MASTER_LOG_FILE=...) this is + useless but does not harm much. It is nice for 3.23 (>=.58) slaves + which test Rotate events to see if the master is 4.0 (then they + choose to stop because they can't replicate 4.0); by always calling + fake_rotate_event we are sure that 3.23.58 and newer will detect the + problem as soon as replication starts (BUG#198). Always calling fake_rotate_event makes sending of normal - (=from-binlog) Rotate events a priori unneeded, but it is not so simple: - the 2 Rotate events are not equivalent, the normal one is before the Stop - event, the fake one is after. If we don't send the normal one, then the - Stop event will be interpreted (by existing 4.0 slaves) as "the master - stopped", which is wrong. So for safety, given that we want minimum - modification of 4.0, we send the normal and fake Rotates. + (=from-binlog) Rotate events a priori unneeded, but it is not so + simple: the 2 Rotate events are not equivalent, the normal one is + before the Stop event, the fake one is after. If we don't send the + normal one, then the Stop event will be interpreted (by existing 4.0 + slaves) as "the master stopped", which is wrong. So for safety, + given that we want minimum modification of 4.0, we send the normal + and fake Rotates. */ - if (fake_rotate_event(net, packet, log_file_name, pos, &errmsg)) + if (fake_rotate_event(net, packet, log_file_name, pos, 0, &errmsg)) { - /* - This error code is not perfect, as fake_rotate_event() does not read - anything from the binlog; if it fails it's because of an error in - my_net_write(), fortunately it will say it in errmsg. + /* + This error code is not perfect, as fake_rotate_event() does not + read anything from the binlog; if it fails it's because of an + error in my_net_write(), fortunately it will say so in errmsg. */ my_errno= ER_MASTER_FATAL_ERROR_READING_BINLOG; goto err; @@ -480,30 +432,35 @@ impossible position"; packet->set("\0", 1, &my_charset_bin); /* - We can set log_lock now, it does not move (it's a member of mysql_bin_log, - and it's already inited, and it will be destroyed only at shutdown). + We can set log_lock now, it does not move (it's a member of + mysql_bin_log, and it's already inited, and it will be destroyed + only at shutdown). */ - log_lock = mysql_bin_log.get_log_lock(); + log_lock = mysql_bin_log.get_log_lock(); if (pos > BIN_LOG_HEADER_SIZE) - { - /* Try to find a Format_description_log_event at the beginning of the binlog */ + { + /* + Try to find a Format_description_log_event at the beginning of + the binlog + */ if (!(error = Log_event::read_log_event(&log, packet, log_lock))) { /* - The packet has offsets equal to the normal offsets in a binlog event - +1 (the first character is \0). + The packet has offsets equal to the normal offsets in a binlog + event +1 (the first character is \0). */ DBUG_PRINT("info", ("Looked for a Format_description_log_event, found event type %d", (*packet)[EVENT_TYPE_OFFSET+1])); if ((*packet)[EVENT_TYPE_OFFSET+1] == FORMAT_DESCRIPTION_EVENT) { + binlog_can_be_corrupted= (*packet)[FLAGS_OFFSET+1] & LOG_EVENT_BINLOG_IN_USE_F; /* mark that this event with "log_pos=0", so the slave should not increment master's binlog position (rli->group_master_log_pos) */ - int4store(packet->c_ptr() +LOG_POS_OFFSET+1,0); + int4store(packet->c_ptr()+LOG_POS_OFFSET+1, 0); /* send it */ if (my_net_write(net, (char*)packet->ptr(), packet->length())) { @@ -512,24 +469,25 @@ impossible position"; goto err; } /* - No need to save this event. We are only doing simple reads (no real - parsing of the events) so we don't need it. And so we don't need the - artificial Format_description_log_event of 3.23&4.x. + No need to save this event. We are only doing simple reads + (no real parsing of the events) so we don't need it. And so + we don't need the artificial Format_description_log_event of + 3.23&4.x. */ } } else if (test_for_non_eof_log_read_errors(error, &errmsg)) goto err; - /* + /* else: it's EOF, nothing to do, go on reading next events, the Format_description_log_event will be found naturally if it is written. */ /* reset the packet as we wrote to it in any case */ packet->set("\0", 1, &my_charset_bin); - } /* end of if (pos > BIN_LOG_HEADER_SIZE); if false, the Format_description_log_event - event will be found naturally. */ - + } /* end of if (pos > BIN_LOG_HEADER_SIZE); if false, the + Format_description_log_event event will be found naturally. */ + /* seek to the requested position, to start the requested dump */ my_b_seek(&log, pos); // Seek will done on next read @@ -546,6 +504,14 @@ impossible position"; goto err; } #endif + + if ((*packet)[EVENT_TYPE_OFFSET+1] == FORMAT_DESCRIPTION_EVENT) + binlog_can_be_corrupted= (*packet)[FLAGS_OFFSET+1] & LOG_EVENT_BINLOG_IN_USE_F; + else if ((*packet)[EVENT_TYPE_OFFSET+1] == STOP_EVENT) + binlog_can_be_corrupted= FALSE; + else if ((*packet)[EVENT_TYPE_OFFSET+1] == ROTATE_EVENT) + rotate_was_found=TRUE; + if (my_net_write(net, (char*)packet->ptr(), packet->length())) { errmsg = "Failed on my_net_write()"; @@ -565,19 +531,25 @@ impossible position"; } packet->set("\0", 1, &my_charset_bin); } + + /* + here we were reading binlog that was not closed properly (as a result + of a crash ?). treat any corruption as EOF + */ + if (binlog_can_be_corrupted && error != LOG_READ_MEM) + error=LOG_READ_EOF; /* TODO: now that we are logging the offset, check to make sure the recorded offset and the actual match. - Guilhem 2003-06: this is not true if this master is a slave <4.0.15 - running with --log-slave-updates, because then log_pos may be the offset - in the-master-of-this-master's binlog. + Guilhem 2003-06: this is not true if this master is a slave + <4.0.15 running with --log-slave-updates, because then log_pos may + be the offset in the-master-of-this-master's binlog. */ - if (test_for_non_eof_log_read_errors(error, &errmsg)) goto err; if (!(flags & BINLOG_DUMP_NON_BLOCK) && - mysql_bin_log.is_active(log_file_name)) + mysql_bin_log.is_active(log_file_name)) { /* Block until there is more data in the log @@ -613,9 +585,9 @@ impossible position"; now, but we'll be quick and just read one record TODO: - Add an counter that is incremented for each time we update - the binary log. We can avoid the following read if the counter - has not been updated since last read. + Add an counter that is incremented for each time we update the + binary log. We can avoid the following read if the counter + has not been updated since last read. */ pthread_mutex_lock(log_lock); @@ -708,20 +680,23 @@ impossible position"; (void) my_close(file, MYF(MY_WME)); /* - Call fake_rotate_event() in case the previous log (the one which we have - just finished reading) did not contain a Rotate event (for example (I - don't know any other example) the previous log was the last one before - the master was shutdown & restarted). - This way we tell the slave about the new log's name and position. - If the binlog is 5.0, the next event we are going to read and send is - Format_description_log_event. + Call fake_rotate_event() in case the previous log (the one which + we have just finished reading) did not contain a Rotate event + (for example (I don't know any other example) the previous log + was the last one before the master was shutdown & restarted). + This way we tell the slave about the new log's name and + position. If the binlog is 5.0, the next event we are going to + read and send is Format_description_log_event. */ if ((file=open_binlog(&log, log_file_name, &errmsg)) < 0 || - fake_rotate_event(net, packet, log_file_name, BIN_LOG_HEADER_SIZE, &errmsg)) + fake_rotate_event(net, packet, log_file_name, BIN_LOG_HEADER_SIZE, + rotate_was_found ? 0 : LOG_EVENT_FORCE_ROLLBACK_F, + &errmsg)) { my_errno= ER_MASTER_FATAL_ERROR_READING_BINLOG; goto err; } + rotate_was_found=FALSE; packet->length(0); packet->append('\0'); } @@ -762,17 +737,17 @@ int start_slave(THD* thd , MASTER_INFO* mi, bool net_report) int slave_errno= 0; int thread_mask; DBUG_ENTER("start_slave"); - + if (check_access(thd, SUPER_ACL, any_db,0,0,0)) DBUG_RETURN(1); lock_slave_threads(mi); // this allows us to cleanly read slave_running // Get a mask of _stopped_ threads init_thread_mask(&thread_mask,mi,1 /* inverse */); /* - Below we will start all stopped threads. - But if the user wants to start only one thread, do as if the other thread - was running (as we don't wan't to touch the other thread), so set the - bit to 0 for the other thread + Below we will start all stopped threads. But if the user wants to + start only one thread, do as if the other thread was running (as we + don't wan't to touch the other thread), so set the bit to 0 for the + other thread */ if (thd->lex->slave_thd_opt) thread_mask&= thd->lex->slave_thd_opt; @@ -783,9 +758,9 @@ int start_slave(THD* thd , MASTER_INFO* mi, bool net_report) slave_errno=ER_MASTER_INFO; else if (server_id_supplied && *mi->host) { - /* - If we will start SQL thread we will care about UNTIL options - If not and they are specified we will ignore them and warn user + /* + If we will start SQL thread we will care about UNTIL options If + not and they are specified we will ignore them and warn user about this fact. */ if (thread_mask & SLAVE_SQL) @@ -796,13 +771,13 @@ int start_slave(THD* thd , MASTER_INFO* mi, bool net_report) { mi->rli.until_condition= RELAY_LOG_INFO::UNTIL_MASTER_POS; mi->rli.until_log_pos= thd->lex->mi.pos; - /* - We don't check thd->lex->mi.log_file_name for NULL here + /* + We don't check thd->lex->mi.log_file_name for NULL here since it is checked in sql_yacc.yy */ strmake(mi->rli.until_log_name, thd->lex->mi.log_file_name, sizeof(mi->rli.until_log_name)-1); - } + } else if (thd->lex->mi.relay_log_pos) { mi->rli.until_condition= RELAY_LOG_INFO::UNTIL_RELAY_POS; @@ -826,15 +801,15 @@ int start_slave(THD* thd , MASTER_INFO* mi, bool net_report) p_end points to the first invalid character. If it equals to p, no digits were found, error. If it contains '\0' it means conversion went ok. - */ + */ if (p_end==p || *p_end) slave_errno=ER_BAD_SLAVE_UNTIL_COND; } else slave_errno=ER_BAD_SLAVE_UNTIL_COND; - + /* mark the cached result of the UNTIL comparison as "undefined" */ - mi->rli.until_log_names_cmp_result= + mi->rli.until_log_names_cmp_result= RELAY_LOG_INFO::UNTIL_LOG_NAMES_CMP_UNKNOWN; /* Issuing warning then started without --skip-slave-start */ @@ -842,14 +817,13 @@ int start_slave(THD* thd , MASTER_INFO* mi, bool net_report) push_warning(thd, MYSQL_ERROR::WARN_LEVEL_NOTE, ER_MISSING_SKIP_SLAVE, ER(ER_MISSING_SKIP_SLAVE)); } - + pthread_mutex_unlock(&mi->rli.data_lock); } else if (thd->lex->mi.pos || thd->lex->mi.relay_log_pos) push_warning(thd, MYSQL_ERROR::WARN_LEVEL_NOTE, ER_UNTIL_COND_IGNORED, ER(ER_UNTIL_COND_IGNORED)); - - + if (!slave_errno) slave_errno = start_slave_threads(0 /*no mutex */, 1 /* wait for start */, @@ -864,9 +838,9 @@ int start_slave(THD* thd , MASTER_INFO* mi, bool net_report) //no error if all threads are already started, only a warning push_warning(thd, MYSQL_ERROR::WARN_LEVEL_NOTE, ER_SLAVE_WAS_RUNNING, ER(ER_SLAVE_WAS_RUNNING)); - + unlock_slave_threads(mi); - + if (slave_errno) { if (net_report) @@ -966,7 +940,7 @@ int reset_slave(THD *thd, MASTER_INFO* mi) 1 /* just reset */, &errmsg))) goto err; - + /* Clear master's log coordinates and reset host/user/etc to the values specified in mysqld's options (only for good display of SHOW SLAVE STATUS; @@ -975,13 +949,13 @@ int reset_slave(THD *thd, MASTER_INFO* mi) STATUS; before doing START SLAVE; */ init_master_info_with_options(mi); - /* + /* Reset errors (the idea is that we forget about the old master). */ clear_slave_error(&mi->rli); clear_until_condition(&mi->rli); - + // close master_info_file, relay_log_info_file, set mi->inited=rli->inited=0 end_master_info(mi); // and delete these two files @@ -1288,7 +1262,7 @@ int cmp_master_pos(const char* log_file_name1, ulonglong log_pos1, } -bool show_binlog_events(THD* thd) +bool mysql_show_binlog_events(THD* thd) { Protocol *protocol= thd->protocol; DBUG_ENTER("show_binlog_events"); @@ -1297,7 +1271,7 @@ bool show_binlog_events(THD* thd) IO_CACHE log; File file = -1; Format_description_log_event *description_event= new - Format_description_log_event(3); /* MySQL 4.0 by default */ + Format_description_log_event(3); /* MySQL 4.0 by default */ Log_event::init_show_field_list(&field_list); if (protocol->send_fields(&field_list, @@ -1314,7 +1288,7 @@ bool show_binlog_events(THD* thd) pthread_mutex_t *log_lock = mysql_bin_log.get_log_lock(); LOG_INFO linfo; Log_event* ev; - + limit_start= thd->lex->current_select->offset_limit; limit_end= thd->lex->current_select->select_limit + limit_start; @@ -1338,15 +1312,15 @@ bool show_binlog_events(THD* thd) pthread_mutex_lock(log_lock); - /* + /* open_binlog() sought to position 4. - Read the first event in case it's a Format_description_log_event, to know the - format. If there's no such event, we are 3.23 or 4.x. This code, like - before, can't read 3.23 binlogs. + Read the first event in case it's a Format_description_log_event, to + know the format. If there's no such event, we are 3.23 or 4.x. This + code, like before, can't read 3.23 binlogs. This code will fail on a mixed relay log (one which has Format_desc then Rotate then Format_desc). */ - + ev = Log_event::read_log_event(&log,(pthread_mutex_t*)0,description_event); if (ev) { @@ -1366,7 +1340,7 @@ bool show_binlog_events(THD* thd) errmsg="Invalid Format_description event; could be out of memory"; goto err; } - + for (event_count = 0; (ev = Log_event::read_log_event(&log,(pthread_mutex_t*)0,description_event)); ) { diff --git a/sql/sql_repl.h b/sql/sql_repl.h index e8497fee343..b901d7cfe0c 100644 --- a/sql/sql_repl.h +++ b/sql/sql_repl.h @@ -36,15 +36,16 @@ extern I_List<i_string> binlog_do_db, binlog_ignore_db; extern int max_binlog_dump_events; extern my_bool opt_sporadic_binlog_dump_fail; -#define KICK_SLAVE(thd) { pthread_mutex_lock(&(thd)->LOCK_delete); (thd)->awake(THD::NOT_KILLED); pthread_mutex_unlock(&(thd)->LOCK_delete); } - -File open_binlog(IO_CACHE *log, const char *log_file_name, - const char **errmsg); +#define KICK_SLAVE(thd) do { \ + pthread_mutex_lock(&(thd)->LOCK_delete); \ + (thd)->awake(THD::NOT_KILLED); \ + pthread_mutex_unlock(&(thd)->LOCK_delete); \ + } while(0) int start_slave(THD* thd, MASTER_INFO* mi, bool net_report); int stop_slave(THD* thd, MASTER_INFO* mi, bool net_report); bool change_master(THD* thd, MASTER_INFO* mi); -bool show_binlog_events(THD* thd); +bool mysql_show_binlog_events(THD* thd); int cmp_master_pos(const char* log_file_name1, ulonglong log_pos1, const char* log_file_name2, ulonglong log_pos2); int reset_slave(THD *thd, MASTER_INFO* mi); diff --git a/sql/sql_table.cc b/sql/sql_table.cc index d7cefb3dceb..db1c04efe80 100644 --- a/sql/sql_table.cc +++ b/sql/sql_table.cc @@ -14,7 +14,6 @@ along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ - /* drop and alter of tables */ #include "mysql_priv.h" @@ -275,9 +274,7 @@ int mysql_rm_table_part2(THD *thd, TABLE_LIST *tables, bool if_exists, { if (!error) thd->clear_error(); - Query_log_event qinfo(thd, thd->query, thd->query_length, - tmp_table_deleted && !some_tables_deleted, - FALSE); + Query_log_event qinfo(thd, thd->query, thd->query_length, FALSE, FALSE); mysql_bin_log.write(&qinfo); } } @@ -1297,7 +1294,7 @@ int mysql_prepare_table(THD *thd, HA_CREATE_INFO *create_info, create_info Create information (like MAX_ROWS) fields List of fields to create keys List of keys to create - tmp_table Set to 1 if this is an internal temporary table + internal_tmp_table Set to 1 if this is an internal temporary table (From ALTER TABLE) DESCRIPTION @@ -1316,7 +1313,7 @@ int mysql_prepare_table(THD *thd, HA_CREATE_INFO *create_info, bool mysql_create_table(THD *thd,const char *db, const char *table_name, HA_CREATE_INFO *create_info, List<create_field> &fields, - List<Key> &keys,bool tmp_table, + List<Key> &keys,bool internal_tmp_table, uint select_field_count) { char path[FN_REFLEN]; @@ -1385,7 +1382,7 @@ bool mysql_create_table(THD *thd,const char *db, const char *table_name, } if (mysql_prepare_table(thd, create_info, fields, - keys, tmp_table, db_options, file, + keys, internal_tmp_table, db_options, file, key_info_buffer, &key_count, select_field_count)) DBUG_RETURN(TRUE); @@ -1419,7 +1416,7 @@ bool mysql_create_table(THD *thd,const char *db, const char *table_name, if (wait_if_global_read_lock(thd, 0, 1)) DBUG_RETURN(error); VOID(pthread_mutex_lock(&LOCK_open)); - if (!tmp_table && !(create_info->options & HA_LEX_CREATE_TMP_TABLE)) + if (!internal_tmp_table && !(create_info->options & HA_LEX_CREATE_TMP_TABLE)) { if (!access(path,F_OK)) { @@ -1486,13 +1483,10 @@ bool mysql_create_table(THD *thd,const char *db, const char *table_name, } thd->tmp_table_used= 1; } - if (!tmp_table && mysql_bin_log.is_open()) + if (!internal_tmp_table && mysql_bin_log.is_open()) { thd->clear_error(); - Query_log_event qinfo(thd, thd->query, thd->query_length, - test(create_info->options & - HA_LEX_CREATE_TMP_TABLE), - FALSE); + Query_log_event qinfo(thd, thd->query, thd->query_length, FALSE, FALSE); mysql_bin_log.write(&qinfo); } error= FALSE; @@ -2473,10 +2467,7 @@ bool mysql_create_like_table(THD* thd, TABLE_LIST* table, if (mysql_bin_log.is_open()) { thd->clear_error(); - Query_log_event qinfo(thd, thd->query, thd->query_length, - test(create_info->options & - HA_LEX_CREATE_TMP_TABLE), - FALSE); + Query_log_event qinfo(thd, thd->query, thd->query_length, FALSE, FALSE); mysql_bin_log.write(&qinfo); } res= FALSE; @@ -2586,7 +2577,7 @@ mysql_discard_or_import_tablespace(THD *thd, goto err; if (mysql_bin_log.is_open()) { - Query_log_event qinfo(thd, thd->query, thd->query_length, 0, FALSE); + Query_log_event qinfo(thd, thd->query, thd->query_length, FALSE, FALSE); mysql_bin_log.write(&qinfo); } err: @@ -2979,7 +2970,7 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name, if (mysql_bin_log.is_open()) { thd->clear_error(); - Query_log_event qinfo(thd, thd->query, thd->query_length, 0, FALSE); + Query_log_event qinfo(thd, thd->query, thd->query_length, FALSE, FALSE); mysql_bin_log.write(&qinfo); } if (do_send_ok) @@ -3396,7 +3387,7 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name, if (mysql_bin_log.is_open()) { thd->clear_error(); - Query_log_event qinfo(thd, thd->query, thd->query_length, 0, FALSE); + Query_log_event qinfo(thd, thd->query, thd->query_length, FALSE, FALSE); mysql_bin_log.write(&qinfo); } goto end_temporary; @@ -3530,7 +3521,7 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name, if (mysql_bin_log.is_open()) { thd->clear_error(); - Query_log_event qinfo(thd, thd->query, thd->query_length, 0, FALSE); + Query_log_event qinfo(thd, thd->query, thd->query_length, FALSE, FALSE); mysql_bin_log.write(&qinfo); } VOID(pthread_cond_broadcast(&COND_refresh)); diff --git a/sql/sql_trigger.cc b/sql/sql_trigger.cc index b9e837b0d64..f61ff12f365 100644 --- a/sql/sql_trigger.cc +++ b/sql/sql_trigger.cc @@ -147,7 +147,7 @@ bool Table_triggers_list::create_trigger(THD *thd, TABLE_LIST *tables) /* Let us check if trigger with the same name exists */ while ((name= it++)) { - if (my_strcasecmp(system_charset_info, lex->name_and_length.str, + if (my_strcasecmp(system_charset_info, lex->ident.str, name->str) == 0) { my_message(ER_TRG_ALREADY_EXISTS, ER(ER_TRG_ALREADY_EXISTS), MYF(0)); @@ -238,7 +238,7 @@ bool Table_triggers_list::drop_trigger(THD *thd, TABLE_LIST *tables) { it_def++; - if (my_strcasecmp(system_charset_info, lex->name_and_length.str, + if (my_strcasecmp(system_charset_info, lex->ident.str, name->str) == 0) { /* @@ -428,15 +428,15 @@ bool Table_triggers_list::check_n_load(THD *thd, const char *db, if (!(trg_name_buff= alloc_root(&table->mem_root, sizeof(LEX_STRING) + - lex.name_and_length.length + 1))) + lex.ident.length + 1))) goto err_with_lex_cleanup; trg_name_str= (LEX_STRING *)trg_name_buff; trg_name_buff+= sizeof(LEX_STRING); - memcpy(trg_name_buff, lex.name_and_length.str, - lex.name_and_length.length + 1); + memcpy(trg_name_buff, lex.ident.str, + lex.ident.length + 1); trg_name_str->str= trg_name_buff; - trg_name_str->length= lex.name_and_length.length; + trg_name_str->length= lex.ident.length; if (triggers->names_list.push_back(trg_name_str, &table->mem_root)) goto err_with_lex_cleanup; diff --git a/sql/sql_update.cc b/sql/sql_update.cc index f9df1be2abd..b5df3595f41 100644 --- a/sql/sql_update.cc +++ b/sql/sql_update.cc @@ -117,7 +117,7 @@ int mysql_update(THD *thd, { bool using_limit= limit != HA_POS_ERROR; bool safe_update= thd->options & OPTION_SAFE_UPDATES; - bool used_key_is_modified, transactional_table, log_delayed; + bool used_key_is_modified, transactional_table; int res; int error=0; uint used_index; @@ -471,7 +471,6 @@ int mysql_update(THD *thd, query_cache_invalidate3(thd, table_list, 1); } - log_delayed= (transactional_table || table->s->tmp_table); if ((updated || (error < 0)) && (error <= 0 || !transactional_table)) { if (mysql_bin_log.is_open()) @@ -479,11 +478,11 @@ int mysql_update(THD *thd, if (error <= 0) thd->clear_error(); Query_log_event qinfo(thd, thd->query, thd->query_length, - log_delayed, FALSE); + transactional_table, FALSE); if (mysql_bin_log.write(&qinfo) && transactional_table) error=1; // Rollback update } - if (!log_delayed) + if (!transactional_table) thd->options|=OPTION_STATUS_NO_TRANS_UPDATE; } if (transactional_table) @@ -1002,7 +1001,6 @@ multi_update::initialize_tables(JOIN *join) DBUG_RETURN(1); main_table=join->join_tab->table; trans_safe= transactional_tables= main_table->file->has_transactions(); - log_delayed= trans_safe || main_table->s->tmp_table != NO_TMP_TABLE; table_to_update= 0; /* Create a temporary table for keys to all tables, except main table */ @@ -1337,17 +1335,13 @@ int multi_update::do_updates(bool from_send_error) goto err; } updated++; - if (table->s->tmp_table != NO_TMP_TABLE) - log_delayed= 1; } } if (updated != org_updated) { - if (table->s->tmp_table != NO_TMP_TABLE) - log_delayed= 1; // Tmp tables forces delay log if (table->file->has_transactions()) - log_delayed= transactional_tables= 1; + transactional_tables= 1; else trans_safe= 0; // Can't do safe rollback } @@ -1368,10 +1362,8 @@ err: if (updated != org_updated) { - if (table->s->tmp_table != NO_TMP_TABLE) - log_delayed= 1; if (table->file->has_transactions()) - log_delayed= transactional_tables= 1; + transactional_tables= 1; else trans_safe= 0; } @@ -1413,11 +1405,11 @@ bool multi_update::send_eof() if (local_error <= 0) thd->clear_error(); Query_log_event qinfo(thd, thd->query, thd->query_length, - log_delayed, FALSE); + transactional_tables, FALSE); if (mysql_bin_log.write(&qinfo) && trans_safe) local_error= 1; // Rollback update } - if (!log_delayed) + if (!transactional_tables) thd->options|=OPTION_STATUS_NO_TRANS_UPDATE; } diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy index c4649c57269..92658192ac2 100644 --- a/sql/sql_yacc.yy +++ b/sql/sql_yacc.yy @@ -43,19 +43,21 @@ int yylex(void *yylval, void *yythd); +const LEX_STRING null_lex_str={0,0}; + #define yyoverflow(A,B,C,D,E,F) {ulong val= *(F); if(my_yyoverflow((B), (D), &val)) { yyerror((char*) (A)); return 2; } else { *(F)= (YYSIZE_T)val; }} #define WARN_DEPRECATED(A,B) \ push_warning_printf(((THD *)yythd), MYSQL_ERROR::WARN_LEVEL_WARN, \ ER_WARN_DEPRECATED_SYNTAX, \ - ER(ER_WARN_DEPRECATED_SYNTAX), (A), (B)); + ER(ER_WARN_DEPRECATED_SYNTAX), (A), (B)); /* Helper for parsing "IS [NOT] truth_value" */ inline Item *is_truth_value(Item *A, bool v1, bool v2) { return new Item_func_if(create_func_ifnull(A, new Item_int((char *) (v2 ? "TRUE" : "FALSE"), v2, 1)), - new Item_int((char *) (v1 ? "TRUE" : "FALSE"), v1, 1), + new Item_int((char *) (v1 ? "TRUE" : "FALSE"), v1, 1), new Item_int((char *) (v1 ? "FALSE" : "TRUE"),!v1, 1)); } @@ -105,552 +107,548 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %pure_parser /* We have threads */ -%token END_OF_INPUT - -%token CLOSE_SYM -%token HANDLER_SYM -%token LAST_SYM -%token NEXT_SYM -%token PREV_SYM - -%token DIV_SYM -%token EQ -%token EQUAL_SYM -%token SOUNDS_SYM -%token GE -%token GT_SYM -%token LE -%token LT -%token NE -%token IS -%token MOD_SYM -%token SHIFT_LEFT -%token SHIFT_RIGHT -%token SET_VAR - -%token ABORT_SYM -%token ADD -%token AFTER_SYM -%token ALTER -%token ANALYZE_SYM -%token ANY_SYM -%token AVG_SYM -%token BEGIN_SYM -%token BINLOG_SYM -%token CALL_SYM -%token CHANGE -%token CLIENT_SYM -%token COMMENT_SYM -%token COMMIT_SYM -%token CONSISTENT_SYM -%token COUNT_SYM -%token CREATE -%token CROSS -%token CUBE_SYM -%token DEFINER_SYM -%token DELETE_SYM -%token DETERMINISTIC_SYM -%token DUAL_SYM -%token DO_SYM -%token DROP -%token EVENTS_SYM -%token EXECUTE_SYM -%token EXPANSION_SYM -%token FLUSH_SYM -%token HELP_SYM -%token INSERT -%token RELAY_THREAD -%token KILL_SYM -%token LOAD -%token LOCKS_SYM -%token LOCK_SYM -%token MASTER_SYM -%token MAX_SYM -%token MIN_SYM -%token NONE_SYM -%token OPTIMIZE -%token PURGE -%token REPAIR -%token REPLICATION -%token RESET_SYM -%token ROLLBACK_SYM -%token ROLLUP_SYM -%token SAVEPOINT_SYM -%token SELECT_SYM -%token SHOW -%token SLAVE -%token SNAPSHOT_SYM -%token SQL_SYM -%token SQL_THREAD -%token START_SYM -%token STD_SYM -%token VARIANCE_SYM -%token STOP_SYM -%token SUM_SYM -%token ADDDATE_SYM -%token SUPER_SYM -%token TRUNCATE_SYM -%token UNLOCK_SYM -%token UNTIL_SYM -%token UPDATE_SYM - -%token ACTION -%token AGGREGATE_SYM -%token ALGORITHM_SYM -%token ALL -%token AND_SYM -%token AND_AND_SYM -%token AS -%token ASC -%token AUTO_INC -%token AVG_ROW_LENGTH -%token BACKUP_SYM -%token BERKELEY_DB_SYM -%token BINARY +%token END_OF_INPUT + +%token ABORT_SYM +%token ACTION +%token ADD +%token ADDDATE_SYM +%token AFTER_SYM +%token AGAINST +%token AGGREGATE_SYM +%token ALGORITHM_SYM +%token ALL +%token ALTER +%token ANALYZE_SYM +%token AND_AND_SYM +%token AND_SYM +%token ANY_SYM +%token AS +%token ASC +%token ASCII_SYM +%token ASENSITIVE_SYM +%token ATAN +%token AUTO_INC +%token AVG_ROW_LENGTH +%token AVG_SYM +%token BACKUP_SYM +%token BEFORE_SYM +%token BEGIN_SYM +%token BENCHMARK_SYM +%token BERKELEY_DB_SYM +%token BIGINT +%token BINARY +%token BINLOG_SYM %token BIN_NUM -%token BIT_SYM -%token BOOL_SYM -%token BOOLEAN_SYM -%token BOTH -%token BTREE_SYM -%token BY -%token BYTE_SYM -%token CACHE_SYM -%token CASCADE +%token BIT_AND +%token BIT_OR +%token BIT_SYM +%token BIT_XOR +%token BLOB_SYM +%token BOOLEAN_SYM +%token BOOL_SYM +%token BOTH +%token BTREE_SYM +%token BY +%token BYTE_SYM +%token CACHE_SYM +%token CALL_SYM +%token CASCADE %token CASCADED -%token CAST_SYM -%token CHAIN_SYM -%token CHARSET -%token CHECKSUM_SYM -%token CHECK_SYM -%token COMMITTED_SYM -%token COLLATE_SYM -%token COLLATION_SYM -%token COLUMNS -%token COLUMN_SYM -%token COMPACT_SYM -%token CONCURRENT +%token CAST_SYM +%token CHAIN_SYM +%token CHANGE +%token CHANGED +%token CHARSET +%token CHAR_SYM +%token CHECKSUM_SYM +%token CHECK_SYM +%token CIPHER_SYM +%token CLIENT_SYM +%token CLOSE_SYM +%token COALESCE +%token COLLATE_SYM +%token COLLATION_SYM +%token COLUMNS +%token COLUMN_SYM +%token COMMENT_SYM +%token COMMITTED_SYM +%token COMMIT_SYM +%token COMPACT_SYM +%token COMPRESSED_SYM +%token CONCAT +%token CONCAT_WS +%token CONCURRENT %token CONDITION_SYM -%token CONNECTION_SYM -%token CONSTRAINT +%token CONNECTION_SYM +%token CONSISTENT_SYM +%token CONSTRAINT %token CONTAINS_SYM %token CONTINUE_SYM -%token CONVERT_SYM +%token CONVERT_SYM +%token CONVERT_TZ_SYM +%token COUNT_SYM +%token CREATE +%token CROSS +%token CUBE_SYM +%token CURDATE %token CURRENT_USER -%token DATABASES -%token DATA_SYM -%token DECIMAL_NUM +%token CURSOR_SYM +%token CURTIME +%token DATABASE +%token DATABASES +%token DATA_SYM +%token DATETIME +%token DATE_ADD_INTERVAL +%token DATE_SUB_INTERVAL +%token DATE_SYM +%token DAY_HOUR_SYM +%token DAY_MICROSECOND_SYM +%token DAY_MINUTE_SYM +%token DAY_SECOND_SYM +%token DAY_SYM +%token DEALLOCATE_SYM +%token DECIMAL_NUM +%token DECIMAL_SYM %token DECLARE_SYM -%token DEFAULT -%token DELAYED_SYM -%token DELAY_KEY_WRITE_SYM -%token DESC -%token DESCRIBE -%token DES_KEY_FILE -%token DISABLE_SYM -%token DISCARD -%token DISTINCT +%token DECODE_SYM +%token DEFAULT +%token DEFINER_SYM +%token DELAYED_SYM +%token DELAY_KEY_WRITE_SYM +%token DELETE_SYM +%token DESC +%token DESCRIBE +%token DES_DECRYPT_SYM +%token DES_ENCRYPT_SYM +%token DES_KEY_FILE +%token DETERMINISTIC_SYM +%token DIRECTORY_SYM +%token DISABLE_SYM +%token DISCARD +%token DISTINCT +%token DIV_SYM +%token DOUBLE_SYM +%token DO_SYM +%token DROP +%token DUAL_SYM +%token DUMPFILE %token DUPLICATE_SYM -%token DYNAMIC_SYM +%token DYNAMIC_SYM %token EACH_SYM -%token ENABLE_SYM -%token ENCLOSED -%token ESCAPED -%token DIRECTORY_SYM -%token ESCAPE_SYM -%token EXISTS +%token EALLOCATE_SYM +%token ELSEIF_SYM +%token ELT_FUNC +%token ENABLE_SYM +%token ENCLOSED +%token ENCODE_SYM +%token ENCRYPT +%token END +%token ENGINES_SYM +%token ENGINE_SYM +%token ENUM +%token EQ +%token EQUAL_SYM +%token ERRORS +%token ESCAPED +%token ESCAPE_SYM +%token EVENTS_SYM +%token EXECUTE_SYM +%token EXISTS %token EXIT_SYM -%token EXTENDED_SYM -%token FALSE_SYM +%token EXPANSION_SYM +%token EXPORT_SET +%token EXTENDED_SYM +%token EXTRACT_SYM +%token FALSE_SYM +%token FAST_SYM %token FETCH_SYM -%token FILE_SYM -%token FIRST_SYM -%token FIXED_SYM -%token FLOAT_NUM -%token FORCE_SYM -%token FOREIGN +%token FIELD_FUNC +%token FILE_SYM +%token FIRST_SYM +%token FIXED_SYM +%token FLOAT_NUM +%token FLOAT_SYM +%token FLUSH_SYM +%token FORCE_SYM +%token FOREIGN +%token FORMAT_SYM +%token FOR_SYM %token FOUND_SYM -%token FROM -%token FULL -%token FULLTEXT_SYM -%token GLOBAL_SYM -%token GRANT -%token GRANTS -%token GREATEST_SYM -%token GROUP -%token HAVING -%token HASH_SYM -%token HEX_NUM -%token HIGH_PRIORITY -%token HOSTS_SYM -%token IDENT -%token IDENT_QUOTED -%token IGNORE_SYM -%token IMPORT -%token INDEX_SYM -%token INDEXES -%token INFILE -%token INNER_SYM -%token INNOBASE_SYM +%token FRAC_SECOND_SYM +%token FROM +%token FROM_UNIXTIME +%token FULL +%token FULLTEXT_SYM +%token FUNCTION_SYM +%token FUNC_ARG0 +%token FUNC_ARG1 +%token FUNC_ARG2 +%token FUNC_ARG3 +%token GE +%token GEOMCOLLFROMTEXT +%token GEOMETRYCOLLECTION +%token GEOMETRY_SYM +%token GEOMFROMTEXT +%token GEOMFROMWKB +%token GET_FORMAT +%token GLOBAL_SYM +%token GOTO_SYM +%token GRANT +%token GRANTS +%token GREATEST_SYM +%token GROUP +%token GROUP_CONCAT_SYM +%token GROUP_UNIQUE_USERS +%token GT_SYM +%token HANDLER_SYM +%token HASH_SYM +%token HAVING +%token HELP_SYM +%token HEX_NUM +%token HIGH_PRIORITY +%token HOSTS_SYM +%token HOUR_MICROSECOND_SYM +%token HOUR_MINUTE_SYM +%token HOUR_SECOND_SYM +%token HOUR_SYM +%token IDENT +%token IDENTIFIED_SYM +%token IDENT_QUOTED +%token IF +%token IGNORE_SYM +%token IMPORT +%token INDEXES +%token INDEX_SYM +%token INFILE +%token INNER_SYM +%token INNOBASE_SYM %token INOUT_SYM -%token INTO -%token IN_SYM +%token INSENSITIVE_SYM +%token INSERT +%token INSERT_METHOD +%token INTERVAL_SYM +%token INTO +%token INT_SYM %token INVOKER_SYM -%token ISOLATION -%token JOIN_SYM -%token KEYS -%token KEY_SYM -%token LEADING -%token LEAST_SYM -%token LEAVES -%token LEVEL_SYM -%token LEX_HOSTNAME +%token IN_SYM +%token IS +%token ISOLATION +%token ISSUER_SYM +%token ITERATE_SYM +%token JOIN_SYM +%token KEYS +%token KEY_SYM +%token KILL_SYM +%token LABEL_SYM %token LANGUAGE_SYM -%token LIKE -%token LINES -%token LOCAL_SYM +%token LAST_INSERT_ID +%token LAST_SYM +%token LE +%token LEADING +%token LEAST_SYM +%token LEAVES +%token LEAVE_SYM +%token LEFT +%token LEVEL_SYM +%token LEX_HOSTNAME +%token LIKE +%token LIMIT +%token LINEFROMTEXT +%token LINES +%token LINESTRING +%token LOAD +%token LOCAL_SYM +%token LOCATE %token LOCATOR_SYM -%token LOG_SYM -%token LOGS_SYM -%token LONG_NUM -%token LONG_SYM -%token LOW_PRIORITY -%token MERGE_SYM -%token MASTER_HOST_SYM -%token MASTER_USER_SYM -%token MASTER_LOG_FILE_SYM -%token MASTER_LOG_POS_SYM -%token MASTER_PASSWORD_SYM -%token MASTER_PORT_SYM -%token MASTER_CONNECT_RETRY_SYM -%token MASTER_SERVER_ID_SYM -%token MASTER_SSL_SYM -%token MASTER_SSL_CA_SYM -%token MASTER_SSL_CAPATH_SYM -%token MASTER_SSL_CERT_SYM -%token MASTER_SSL_CIPHER_SYM -%token MASTER_SSL_KEY_SYM -%token RELAY_LOG_FILE_SYM -%token RELAY_LOG_POS_SYM -%token MATCH -%token MAX_ROWS -%token MAX_CONNECTIONS_PER_HOUR -%token MAX_QUERIES_PER_HOUR -%token MAX_UPDATES_PER_HOUR -%token MAX_USER_CONNECTIONS_SYM -%token MEDIUM_SYM -%token MIN_ROWS +%token LOCKS_SYM +%token LOCK_SYM +%token LOGS_SYM +%token LOG_SYM +%token LONGBLOB +%token LONGTEXT +%token LONG_NUM +%token LONG_SYM +%token LOOP_SYM +%token LOW_PRIORITY +%token LT +%token MAKE_SET_SYM +%token MASTER_CONNECT_RETRY_SYM +%token MASTER_HOST_SYM +%token MASTER_LOG_FILE_SYM +%token MASTER_LOG_POS_SYM +%token MASTER_PASSWORD_SYM +%token MASTER_PORT_SYM +%token MASTER_POS_WAIT +%token MASTER_SERVER_ID_SYM +%token MASTER_SSL_CAPATH_SYM +%token MASTER_SSL_CA_SYM +%token MASTER_SSL_CERT_SYM +%token MASTER_SSL_CIPHER_SYM +%token MASTER_SSL_KEY_SYM +%token MASTER_SSL_SYM +%token MASTER_SYM +%token MASTER_USER_SYM +%token MATCH +%token MAX_CONNECTIONS_PER_HOUR +%token MAX_QUERIES_PER_HOUR +%token MAX_ROWS +%token MAX_SYM +%token MAX_UPDATES_PER_HOUR +%token MAX_USER_CONNECTIONS_SYM +%token MEDIUMBLOB +%token MEDIUMINT +%token MEDIUMTEXT +%token MEDIUM_SYM +%token MERGE_SYM +%token MICROSECOND_SYM +%token MIGRATE_SYM +%token MINUTE_MICROSECOND_SYM +%token MINUTE_SECOND_SYM +%token MINUTE_SYM +%token MIN_ROWS +%token MIN_SYM +%token MLINEFROMTEXT +%token MODE_SYM +%token MODIFIES_SYM +%token MODIFY_SYM +%token MOD_SYM +%token MONTH_SYM +%token MPOINTFROMTEXT +%token MPOLYFROMTEXT +%token MULTILINESTRING +%token MULTIPOINT +%token MULTIPOLYGON %token MUTEX_SYM -%token NAMES_SYM -%token NAME_SYM -%token NATIONAL_SYM -%token NATURAL +%token NAMES_SYM +%token NAME_SYM +%token NATIONAL_SYM +%token NATURAL +%token NCHAR_STRING +%token NCHAR_SYM %token NDBCLUSTER_SYM -%token NEW_SYM -%token NCHAR_SYM -%token NCHAR_STRING +%token NE +%token NEW_SYM +%token NEXT_SYM +%token NONE_SYM +%token NOT2_SYM +%token NOT_SYM +%token NOW_SYM +%token NO_SYM +%token NO_WRITE_TO_BINLOG +%token NULL_SYM +%token NUM +%token NUMERIC_SYM %token NVARCHAR_SYM -%token NOT_SYM -%token NOT2_SYM -%token NO_SYM -%token NULL_SYM -%token NUM -%token OFFSET_SYM -%token ON +%token OFFSET_SYM +%token OLD_PASSWORD +%token ON %token ONE_SHOT_SYM -%token OPEN_SYM -%token OPTION -%token OPTIONALLY -%token OR_SYM -%token OR2_SYM -%token OR_OR_SYM -%token ORDER_SYM +%token ONE_SYM +%token OPEN_SYM +%token OPTIMIZE +%token OPTION +%token OPTIONALLY +%token OR2_SYM +%token ORDER_SYM +%token OR_OR_SYM +%token OR_SYM +%token OUTER +%token OUTFILE %token OUT_SYM -%token OUTER -%token OUTFILE -%token DUMPFILE -%token PACK_KEYS_SYM -%token PARTIAL -%token PRIMARY_SYM -%token PRIVILEGES -%token PROCESS -%token PROCESSLIST_SYM -%token QUERY_SYM -%token RAID_0_SYM -%token RAID_STRIPED_SYM -%token RAID_TYPE -%token RAID_CHUNKS -%token RAID_CHUNKSIZE -%token READ_SYM -%token READS_SYM -%token REDUNDANT_SYM -%token REFERENCES -%token REGEXP -%token RELEASE_SYM -%token RELOAD -%token RENAME -%token REPEATABLE_SYM -%token REQUIRE_SYM -%token RESOURCES -%token RESTORE_SYM -%token RESTRICT -%token REVOKE -%token ROUTINE_SYM -%token ROWS_SYM -%token ROW_FORMAT_SYM -%token ROW_SYM -%token RTREE_SYM +%token PACK_KEYS_SYM +%token PARTIAL +%token PASSWORD +%token PHASE_SYM +%token POINTFROMTEXT +%token POINT_SYM +%token POLYFROMTEXT +%token POLYGON +%token POSITION_SYM +%token PRECISION +%token PREPARE_SYM +%token PREV_SYM +%token PRIMARY_SYM +%token PRIVILEGES +%token PROCEDURE +%token PROCESS +%token PROCESSLIST_SYM +%token PURGE +%token QUARTER_SYM +%token QUERY_SYM +%token QUICK +%token RAID_0_SYM +%token RAID_CHUNKS +%token RAID_CHUNKSIZE +%token RAID_STRIPED_SYM +%token RAID_TYPE +%token RAND +%token READS_SYM +%token READ_SYM +%token REAL +%token RECOVER_SYM +%token REDUNDANT_SYM +%token REFERENCES +%token REGEXP +%token RELAY_LOG_FILE_SYM +%token RELAY_LOG_POS_SYM +%token RELAY_THREAD +%token RELEASE_SYM +%token RELOAD +%token RENAME +%token REPAIR +%token REPEATABLE_SYM +%token REPEAT_SYM +%token REPLACE +%token REPLICATION +%token REQUIRE_SYM +%token RESET_SYM +%token RESOURCES +%token RESTORE_SYM +%token RESTRICT +%token RESUME_SYM +%token RETURNS_SYM +%token RETURN_SYM +%token REVOKE +%token RIGHT +%token ROLLBACK_SYM +%token ROLLUP_SYM +%token ROUND +%token ROUTINE_SYM +%token ROWS_SYM +%token ROW_COUNT_SYM +%token ROW_FORMAT_SYM +%token ROW_SYM +%token RTREE_SYM +%token SAVEPOINT_SYM +%token SECOND_MICROSECOND_SYM +%token SECOND_SYM %token SECURITY_SYM -%token SET +%token SELECT_SYM +%token SENSITIVE_SYM %token SEPARATOR_SYM -%token SERIAL_SYM -%token SERIALIZABLE_SYM -%token SESSION_SYM -%token SIMPLE_SYM -%token SHUTDOWN -%token SPATIAL_SYM +%token SERIALIZABLE_SYM +%token SERIAL_SYM +%token SESSION_SYM +%token SET +%token SET_VAR +%token SHARE_SYM +%token SHIFT_LEFT +%token SHIFT_RIGHT +%token SHOW +%token SHUTDOWN +%token SIGNED_SYM +%token SIMPLE_SYM +%token SLAVE +%token SMALLINT +%token SNAPSHOT_SYM +%token SOUNDS_SYM +%token SPATIAL_SYM %token SPECIFIC_SYM %token SQLEXCEPTION_SYM %token SQLSTATE_SYM %token SQLWARNING_SYM +%token SQL_BIG_RESULT +%token SQL_BUFFER_RESULT +%token SQL_CACHE_SYM +%token SQL_CALC_FOUND_ROWS +%token SQL_NO_CACHE_SYM +%token SQL_SMALL_RESULT +%token SQL_SYM +%token SQL_THREAD %token SSL_SYM -%token STARTING -%token STATUS_SYM -%token STORAGE_SYM -%token STRAIGHT_JOIN -%token SUBJECT_SYM -%token TABLES -%token TABLE_SYM -%token TABLESPACE -%token TEMPORARY -%token TEMPTABLE_SYM -%token TERMINATED -%token TEXT_STRING -%token TO_SYM -%token TRAILING -%token TRANSACTION_SYM +%token STARTING +%token START_SYM +%token STATUS_SYM +%token STD_SYM +%token STOP_SYM +%token STORAGE_SYM +%token STRAIGHT_JOIN +%token STRING_SYM +%token SUBDATE_SYM +%token SUBJECT_SYM +%token SUBSTRING +%token SUBSTRING_INDEX +%token SUM_SYM +%token SUPER_SYM +%token SUSPEND_SYM +%token TABLES +%token TABLESPACE +%token TABLE_SYM +%token TEMPORARY +%token TEMPTABLE_SYM +%token TERMINATED +%token TEXT_STRING +%token TEXT_SYM +%token TIMESTAMP +%token TIMESTAMP_ADD +%token TIMESTAMP_DIFF +%token TIME_SYM +%token TINYBLOB +%token TINYINT +%token TINYTEXT +%token TO_SYM +%token TRAILING +%token TRANSACTION_SYM %token TRIGGER_SYM -%token TRUE_SYM -%token TYPE_SYM +%token TRIM +%token TRUE_SYM +%token TRUNCATE_SYM %token TYPES_SYM -%token FUNC_ARG0 -%token FUNC_ARG1 -%token FUNC_ARG2 -%token FUNC_ARG3 -%token RETURN_SYM -%token RETURNS_SYM -%token UDF_SONAME_SYM -%token UDF_RETURNS_SYM -%token FUNCTION_SYM -%token UNCOMMITTED_SYM -%token UNDEFINED_SYM -%token UNDERSCORE_CHARSET +%token TYPE_SYM +%token UDF_RETURNS_SYM +%token UDF_SONAME_SYM +%token ULONGLONG_NUM +%token UNCOMMITTED_SYM +%token UNDEFINED_SYM +%token UNDERSCORE_CHARSET %token UNDO_SYM -%token UNICODE_SYM -%token UNION_SYM -%token UNIQUE_SYM -%token UNKNOWN_SYM -%token USAGE -%token USE_FRM -%token USE_SYM -%token USING -%token VALUE_SYM -%token VALUES -%token VARIABLES -%token VIEW_SYM -%token WHERE -%token WITH -%token WRITE_SYM -%token NO_WRITE_TO_BINLOG -%token X509_SYM -%token XOR -%token COMPRESSED_SYM -%token ROW_COUNT_SYM - -%token ERRORS -%token WARNINGS - -%token ASCII_SYM -%token BIGINT -%token BLOB_SYM -%token CHAR_SYM -%token CHANGED -%token COALESCE -%token DATETIME -%token DATE_SYM -%token DECIMAL_SYM -%token DOUBLE_SYM -%token ENUM -%token FAST_SYM -%token FLOAT_SYM -%token GEOMETRY_SYM -%token INT_SYM -%token LIMIT -%token LONGBLOB -%token LONGTEXT -%token MEDIUMBLOB -%token MEDIUMINT -%token MEDIUMTEXT -%token NUMERIC_SYM -%token PRECISION -%token PREPARE_SYM -%token DEALLOCATE_SYM -%token QUICK -%token REAL -%token SIGNED_SYM -%token SMALLINT -%token STRING_SYM -%token TEXT_SYM -%token TIMESTAMP -%token TIMESTAMP_ADD -%token TIMESTAMP_DIFF -%token TIME_SYM -%token TINYBLOB -%token TINYINT -%token TINYTEXT -%token ULONGLONG_NUM -%token UNSIGNED -%token VARBINARY -%token VARCHAR -%token VARYING -%token ZEROFILL - -%token ADDDATE_SYM -%token AGAINST -%token ATAN -%token BETWEEN_SYM -%token BIT_AND -%token BIT_OR -%token BIT_XOR -%token CASE_SYM -%token CONCAT -%token CONCAT_WS -%token CONVERT_TZ_SYM -%token CURDATE -%token CURTIME -%token DATABASE -%token DATE_ADD_INTERVAL -%token DATE_SUB_INTERVAL -%token DAY_HOUR_SYM -%token DAY_MICROSECOND_SYM -%token DAY_MINUTE_SYM -%token DAY_SECOND_SYM -%token DAY_SYM -%token DECODE_SYM -%token DES_ENCRYPT_SYM -%token DES_DECRYPT_SYM -%token ELSE -%token ELT_FUNC -%token ENCODE_SYM -%token ENGINE_SYM -%token ENGINES_SYM -%token ENCRYPT -%token EXPORT_SET -%token EXTRACT_SYM -%token FIELD_FUNC -%token FORMAT_SYM -%token FOR_SYM -%token FRAC_SECOND_SYM -%token FROM_UNIXTIME -%token GEOMCOLLFROMTEXT -%token GEOMFROMTEXT -%token GEOMFROMWKB -%token GEOMETRYCOLLECTION -%token GROUP_CONCAT_SYM -%token GROUP_UNIQUE_USERS -%token GET_FORMAT -%token HOUR_MICROSECOND_SYM -%token HOUR_MINUTE_SYM -%token HOUR_SECOND_SYM -%token HOUR_SYM -%token IDENTIFIED_SYM -%token IF -%token INSERT_METHOD -%token INTERVAL_SYM -%token LAST_INSERT_ID -%token LEFT -%token LINEFROMTEXT -%token LINESTRING -%token LOCATE -%token MAKE_SET_SYM -%token MASTER_POS_WAIT -%token MICROSECOND_SYM -%token MINUTE_MICROSECOND_SYM -%token MINUTE_SECOND_SYM -%token MINUTE_SYM -%token MODE_SYM -%token MODIFIES_SYM -%token MODIFY_SYM -%token MONTH_SYM -%token MLINEFROMTEXT -%token MPOINTFROMTEXT -%token MPOLYFROMTEXT -%token MULTILINESTRING -%token MULTIPOINT -%token MULTIPOLYGON -%token NOW_SYM -%token OLD_PASSWORD -%token PASSWORD -%token POINTFROMTEXT -%token POINT_SYM -%token POLYFROMTEXT -%token POLYGON -%token POSITION_SYM -%token PROCEDURE -%token QUARTER_SYM -%token RAND -%token REPLACE -%token RIGHT -%token ROUND -%token SECOND_SYM -%token SECOND_MICROSECOND_SYM -%token SHARE_SYM -%token SUBDATE_SYM -%token SUBSTRING -%token SUBSTRING_INDEX -%token TRIM -%token UNIQUE_USERS -%token UNIX_TIMESTAMP -%token USER -%token UTC_DATE_SYM -%token UTC_TIME_SYM -%token UTC_TIMESTAMP_SYM -%token WEEK_SYM -%token WHEN_SYM -%token WORK_SYM -%token YEAR_MONTH_SYM -%token YEAR_SYM -%token YEARWEEK -%token BENCHMARK_SYM -%token END -%token THEN_SYM - -%token SQL_BIG_RESULT -%token SQL_CACHE_SYM -%token SQL_CALC_FOUND_ROWS -%token SQL_NO_CACHE_SYM -%token SQL_SMALL_RESULT -%token SQL_BUFFER_RESULT - -%token CURSOR_SYM -%token ELSEIF_SYM -%token ITERATE_SYM -%token GOTO_SYM -%token LABEL_SYM -%token LEAVE_SYM -%token LOOP_SYM -%token REPEAT_SYM +%token UNICODE_SYM +%token UNION_SYM +%token UNIQUE_SYM +%token UNIQUE_USERS +%token UNIX_TIMESTAMP +%token UNKNOWN_SYM +%token UNLOCK_SYM +%token UNLOCK_SYM +%token UNSIGNED +%token UNTIL_SYM %token UNTIL_SYM +%token UPDATE_SYM +%token UPDATE_SYM +%token USAGE +%token USER +%token USE_FRM +%token USE_SYM +%token USING +%token UTC_DATE_SYM +%token UTC_TIMESTAMP_SYM +%token UTC_TIME_SYM +%token VALUES +%token VALUE_SYM +%token VARBINARY +%token VARCHAR +%token VARIABLES +%token VARIANCE_SYM +%token VARIANCE_SYM +%token VARYING +%token VIEW_SYM +%token WARNINGS +%token WEEK_SYM +%token WHEN_SYM +%token WHERE %token WHILE_SYM -%token ASENSITIVE_SYM -%token INSENSITIVE_SYM -%token SENSITIVE_SYM - -%token ISSUER_SYM -%token SUBJECT_SYM -%token CIPHER_SYM +%token WITH +%token WORK_SYM +%token WRITE_SYM +%token X509_SYM +%token XA_SYM +%token XOR +%token YEARWEEK +%token YEAR_MONTH_SYM +%token YEAR_SYM +%token ZEROFILL -%token BEFORE_SYM %left SET_VAR %left OR_OR_SYM OR_SYM OR2_SYM XOR %left AND_SYM AND_AND_SYM @@ -677,7 +675,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); opt_table_alias %type <table> - table_ident table_ident_nodb references + table_ident table_ident_nodb references xid %type <simple_string> remember_name remember_end opt_ident opt_db text_or_password @@ -692,7 +690,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); table_option opt_if_not_exists opt_no_write_to_binlog opt_var_type opt_var_ident_type delete_option opt_temporary all_or_any opt_distinct opt_ignore_leaves fulltext_options spatial_type union_option - start_transaction_opts opt_chain opt_work_and_chain opt_release + start_transaction_opts opt_chain opt_release %type <ulong_num> ULONG_NUM raid_types merge_insert_types @@ -809,7 +807,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); opt_extended_describe prepare prepare_src execute deallocate statement sp_suid opt_view_list view_list or_replace algorithm - sp_c_chistics sp_a_chistics sp_chistic sp_c_chistic + sp_c_chistics sp_a_chistics sp_chistic sp_c_chistic xa END_OF_INPUT %type <NONE> call sp_proc_stmts sp_proc_stmts1 sp_proc_stmt @@ -822,7 +820,7 @@ END_OF_INPUT %type <NONE> '-' '+' '*' '/' '%' '(' ')' ',' '!' '{' '}' '&' '|' AND_SYM OR_SYM OR_OR_SYM BETWEEN_SYM CASE_SYM - THEN_SYM WHEN_SYM DIV_SYM MOD_SYM + THEN_SYM WHEN_SYM DIV_SYM MOD_SYM OR2_SYM AND_AND_SYM %% @@ -896,6 +894,7 @@ statement: | unlock | update | use + | xa ; deallocate: @@ -1278,7 +1277,7 @@ create: YYTHD->client_capabilities |= CLIENT_MULTI_QUERIES; sp->restore_thd_mem_root(YYTHD); - lex->name_and_length= $3; + lex->ident= $3; /* We have to do it after parsing trigger body, because some of @@ -1870,22 +1869,22 @@ sp_proc_stmt: if (lex->sql_command != SQLCOM_SET_OPTION || ! lex->var_list.is_empty()) { - sp_instr_stmt *i=new sp_instr_stmt(sp->instructions(), - lex->spcont); - - /* Extract the query statement from the tokenizer: - The end is either lex->tok_end or tok->ptr. */ - if (lex->ptr - lex->tok_end > 1) - i->m_query.length= lex->ptr - sp->m_tmp_query; - else - i->m_query.length= lex->tok_end - sp->m_tmp_query; - i->m_query.str= strmake_root(YYTHD->mem_root, - (char *)sp->m_tmp_query, - i->m_query.length); - i->set_lex(lex); - sp->add_instr(i); - lex->sp_lex_in_use= TRUE; - } + sp_instr_stmt *i=new sp_instr_stmt(sp->instructions(), + lex->spcont); + + /* Extract the query statement from the tokenizer: + The end is either lex->tok_end or tok->ptr. */ + if (lex->ptr - lex->tok_end > 1) + i->m_query.length= lex->ptr - sp->m_tmp_query; + else + i->m_query.length= lex->tok_end - sp->m_tmp_query; + i->m_query.str= strmake_root(YYTHD->mem_root, + (char *)sp->m_tmp_query, + i->m_query.length); + i->set_lex(lex); + sp->add_instr(i); + lex->sp_lex_in_use= TRUE; + } sp->restore_lex(YYTHD); } | RETURN_SYM expr @@ -1925,10 +1924,8 @@ sp_proc_stmt: sp_instr_set *i = new sp_instr_set(lex->sphead->instructions(), lex->spcont, offset, $2, MYSQL_TYPE_STRING); - LEX_STRING dummy; + LEX_STRING dummy={"", 0}; - dummy.str= (char *)""; - dummy.length= 0; lex->spcont->push_pvar(&dummy, MYSQL_TYPE_STRING, sp_param_in); i->tables= lex->query_tables; lex->query_tables= 0; @@ -2326,10 +2323,8 @@ sp_labeled_control: ; sp_opt_label: - /* Empty */ - { $$.str= NULL; $$.length= 0; } - | IDENT - { $$= $1; } + /* Empty */ { $$= null_lex_str; } + | IDENT { $$= $1; } ; sp_unlabeled_control: @@ -2723,7 +2718,7 @@ field_spec: LEX *lex=Lex; lex->length=lex->dec=0; lex->type=0; lex->default_value= lex->on_update_value= 0; - lex->comment=0; + lex->comment=null_lex_str; lex->charset=NULL; } type opt_attribute @@ -2733,7 +2728,7 @@ field_spec: (enum enum_field_types) $3, lex->length,lex->dec,lex->type, lex->default_value, lex->on_update_value, - lex->comment, + &lex->comment, lex->change,&lex->interval_list,lex->charset, lex->uint_geom_type)) YYABORT; @@ -2957,7 +2952,7 @@ attribute: lex->type|= UNIQUE_KEY_FLAG; lex->alter_info.flags|= ALTER_ADD_INDEX; } - | COMMENT_SYM TEXT_STRING_sys { Lex->comment= &$2; } + | COMMENT_SYM TEXT_STRING_sys { Lex->comment= $2; } | BINARY { Lex->type|= BINCMP_FLAG; } | COLLATE_SYM collation_name { @@ -3186,8 +3181,8 @@ opt_ident: | field_ident { $$=$1.str; }; opt_component: - /* empty */ { $$.str= 0; $$.length= 0; } - | '.' ident { $$=$2; }; + /* empty */ { $$= null_lex_str; } + | '.' ident { $$= $2; }; string_list: text_string { Lex->interval_list.push_back($1); } @@ -3315,7 +3310,7 @@ alter_list_item: LEX *lex=Lex; lex->length=lex->dec=0; lex->type=0; lex->default_value= lex->on_update_value= 0; - lex->comment=0; + lex->comment=null_lex_str; lex->charset= NULL; lex->alter_info.flags|= ALTER_CHANGE_COLUMN; } @@ -3326,7 +3321,7 @@ alter_list_item: (enum enum_field_types) $5, lex->length,lex->dec,lex->type, lex->default_value, lex->on_update_value, - lex->comment, + &lex->comment, $3.str, &lex->interval_list, lex->charset, lex->uint_geom_type)) YYABORT; @@ -3715,7 +3710,7 @@ keycache: { LEX *lex=Lex; lex->sql_command= SQLCOM_ASSIGN_TO_KEYCACHE; - lex->name_and_length= $5; + lex->ident= $5; } ; @@ -3978,7 +3973,7 @@ select_item2: | expr { $$=$1; }; select_alias: - /* empty */ { $$.str=0;} + /* empty */ { $$=null_lex_str;} | AS ident { $$=$2; } | AS TEXT_STRING_sys { $$=$2; } | ident { $$=$1; } @@ -4023,7 +4018,7 @@ bool_pri: | predicate ; predicate: - bit_expr IN_SYM '(' expr_list ')' + bit_expr IN_SYM '(' expr_list ')' { $4->push_front($1); $$= new Item_func_in(*$4); } | bit_expr not IN_SYM '(' expr_list ')' { $5->push_front($1); $$= negate_expression(YYTHD, new Item_func_in(*$5)); } @@ -5545,7 +5540,7 @@ drop: TL_OPTION_UPDATING, TL_WRITE)) YYABORT; - lex->name_and_length= $5; + lex->ident= $5; } ; @@ -5783,12 +5778,12 @@ insert_update_list: insert_update_elem: simple_ident_nospvar equal expr_or_default - { + { LEX *lex= Lex; if (lex->update_list.push_back($1) || lex->value_list.push_back($3)) - YYABORT; - }; + YYABORT; + }; opt_low_priority: /* empty */ { $$= YYTHD->update_lock_default; } @@ -5919,14 +5914,14 @@ show_param: YYABORT; } | OPEN_SYM TABLES opt_db wild_and_where - { - LEX *lex= Lex; + { + LEX *lex= Lex; lex->sql_command= SQLCOM_SELECT; lex->orig_sql_command= SQLCOM_SHOW_OPEN_TABLES; - lex->select_lex.db= $3; + lex->select_lex.db= $3; if (prepare_schema_table(YYTHD, lex, 0, SCH_OPEN_TABLES)) YYABORT; - } + } | ENGINE_SYM storage_engines { Lex->create_info.db_type= $2; } show_engine_param @@ -6010,7 +6005,7 @@ show_param: lex->option_type= (enum_var_type) $1; if (prepare_schema_table(YYTHD, lex, 0, SCH_STATUS)) YYABORT; - } + } | INNOBASE_SYM STATUS_SYM { Lex->sql_command = SQLCOM_SHOW_INNODB_STATUS; WARN_DEPRECATED("SHOW INNODB STATUS", "SHOW ENGINE INNODB STATUS"); } | MUTEX_SYM STATUS_SYM @@ -6018,14 +6013,14 @@ show_param: | opt_full PROCESSLIST_SYM { Lex->sql_command= SQLCOM_SHOW_PROCESSLIST;} | opt_var_type VARIABLES wild_and_where - { + { LEX *lex= Lex; lex->sql_command= SQLCOM_SELECT; lex->orig_sql_command= SQLCOM_SHOW_VARIABLES; lex->option_type= (enum_var_type) $1; if (prepare_schema_table(YYTHD, lex, 0, SCH_VARIABLES)) YYABORT; - } + } | charset wild_and_where { LEX *lex= Lex; @@ -6066,7 +6061,7 @@ show_param: curr_user->host.str= (char *) "%"; curr_user->host.length= 1; } - curr_user->password.str=NullS; + curr_user->password=null_lex_str; lex->grant_user= curr_user; } | GRANTS FOR_SYM user @@ -6074,7 +6069,7 @@ show_param: LEX *lex=Lex; lex->sql_command= SQLCOM_SHOW_GRANTS; lex->grant_user=$3; - lex->grant_user->password.str=NullS; + lex->grant_user->password=null_lex_str; } | CREATE DATABASE opt_if_not_exists ident { @@ -7003,6 +6998,7 @@ keyword: | MEDIUM_SYM {} | MERGE_SYM {} | MICROSECOND_SYM {} + | MIGRATE_SYM {} | MINUTE_SYM {} | MIN_ROWS {} | MODIFY_SYM {} @@ -7025,10 +7021,12 @@ keyword: | OFFSET_SYM {} | OLD_PASSWORD {} | ONE_SHOT_SYM {} + | ONE_SYM {} | OPEN_SYM {} | PACK_KEYS_SYM {} | PARTIAL {} | PASSWORD {} + | PHASE_SYM {} | POINT_SYM {} | POLYGON {} | PREPARE_SYM {} @@ -7044,7 +7042,8 @@ keyword: | RAID_CHUNKSIZE {} | RAID_STRIPED_SYM {} | RAID_TYPE {} - | REDUNDANT_SYM {} + | RECOVER_SYM {} + | REDUNDANT_SYM {} | RELAY_LOG_FILE_SYM {} | RELAY_LOG_POS_SYM {} | RELOAD {} @@ -7054,6 +7053,7 @@ keyword: | RESET_SYM {} | RESOURCES {} | RESTORE_SYM {} + | RESUME_SYM {} | RETURNS_SYM {} | ROLLBACK_SYM {} | ROLLUP_SYM {} @@ -7087,6 +7087,7 @@ keyword: | SUBDATE_SYM {} | SUBJECT_SYM {} | SUPER_SYM {} + | SUSPEND_SYM {} | TABLES {} | TABLESPACE {} | TEMPORARY {} @@ -7116,6 +7117,7 @@ keyword: | WEEK_SYM {} | WORK_SYM {} | X509_SYM {} + | XA_SYM {} | YEAR_SYM {} ; @@ -7269,12 +7271,9 @@ option_value: | TRANSACTION_SYM ISOLATION LEVEL_SYM isolation_types { LEX *lex=Lex; - LEX_STRING tmp; - tmp.str=0; - tmp.length=0; lex->var_list.push_back(new set_var(lex->option_type, find_sys_var("tx_isolation"), - &tmp, + &null_lex_str, new Item_int((int32) $4))); } | charset old_or_new_charset_name_or_default @@ -7304,7 +7303,7 @@ option_value: LEX_USER *user; if (!(user=(LEX_USER*) thd->alloc(sizeof(LEX_USER)))) YYABORT; - user->host.str=0; + user->host=null_lex_str; user->user.str=thd->priv_user; thd->lex->var_list.push_back(new set_var_password(user, $3)); } @@ -7329,8 +7328,7 @@ internal_variable_name: if (!tmp) YYABORT; $$.var= tmp; - $$.base_name.str=0; - $$.base_name.length=0; + $$.base_name= null_lex_str; /* If this is time_zone variable we should open time zone describing tables @@ -7526,8 +7524,8 @@ handler: ; handler_read_or_scan: - handler_scan_function { Lex->backup_dir= 0; } - | ident handler_rkey_function { Lex->backup_dir= $1.str; } + handler_scan_function { Lex->ident= null_lex_str; } + | ident handler_rkey_function { Lex->ident= $1; } ; handler_scan_function: @@ -7772,9 +7770,9 @@ grant_user: } } | user IDENTIFIED_SYM BY PASSWORD TEXT_STRING - { $$=$1; $1->password=$5 ; } + { $$= $1; $1->password= $5; } | user - { $$=$1; $1->password.str=NullS; } + { $$= $1; $1->password= null_lex_str; } ; @@ -7844,96 +7842,107 @@ grant_option: GRANT OPTION { Lex->grant |= GRANT_ACL;} | MAX_QUERIES_PER_HOUR ULONG_NUM { - Lex->mqh.questions=$2; - Lex->mqh.specified_limits|= USER_RESOURCES::QUERIES_PER_HOUR; + LEX *lex=Lex; + lex->mqh.questions=$2; + lex->mqh.specified_limits|= USER_RESOURCES::QUERIES_PER_HOUR; } | MAX_UPDATES_PER_HOUR ULONG_NUM { - Lex->mqh.updates=$2; - Lex->mqh.specified_limits|= USER_RESOURCES::UPDATES_PER_HOUR; + LEX *lex=Lex; + lex->mqh.updates=$2; + lex->mqh.specified_limits|= USER_RESOURCES::UPDATES_PER_HOUR; } | MAX_CONNECTIONS_PER_HOUR ULONG_NUM { - Lex->mqh.conn_per_hour= $2; - Lex->mqh.specified_limits|= USER_RESOURCES::CONNECTIONS_PER_HOUR; + LEX *lex=Lex; + lex->mqh.conn_per_hour= $2; + lex->mqh.specified_limits|= USER_RESOURCES::CONNECTIONS_PER_HOUR; } | MAX_USER_CONNECTIONS_SYM ULONG_NUM { - Lex->mqh.user_conn= $2; - Lex->mqh.specified_limits|= USER_RESOURCES::USER_CONNECTIONS; - } + LEX *lex=Lex; + lex->mqh.user_conn= $2; + lex->mqh.specified_limits|= USER_RESOURCES::USER_CONNECTIONS; + } ; begin: - BEGIN_SYM { Lex->sql_command = SQLCOM_BEGIN; Lex->start_transaction_opt= 0;} opt_work {} + BEGIN_SYM + { + LEX *lex=Lex; + lex->sql_command = SQLCOM_BEGIN; + lex->start_transaction_opt= 0; + } + opt_work {} ; opt_work: /* empty */ {} - | WORK_SYM {;} + | WORK_SYM {} ; opt_chain: - /* empty */ { $$= (Lex->thd->variables.completion_type == 1); } + /* empty */ { $$= (YYTHD->variables.completion_type == 1); } | AND_SYM NO_SYM CHAIN_SYM { $$=0; } | AND_SYM CHAIN_SYM { $$=1; } ; opt_release: - /* empty */ { $$= (Lex->thd->variables.completion_type == 2); } + /* empty */ { $$= (YYTHD->variables.completion_type == 2); } | RELEASE_SYM { $$=1; } | NO_SYM RELEASE_SYM { $$=0; } ; -opt_work_and_chain: - opt_work opt_chain { $$=$2; } - ; - opt_savepoint: /* empty */ {} | SAVEPOINT_SYM {} ; commit: - COMMIT_SYM opt_work_and_chain opt_release + COMMIT_SYM opt_work opt_chain opt_release { - Lex->sql_command= SQLCOM_COMMIT; - Lex->tx_chain= $2; - Lex->tx_release= $3; + LEX *lex=Lex; + lex->sql_command= SQLCOM_COMMIT; + lex->tx_chain= $3; + lex->tx_release= $4; } ; rollback: - ROLLBACK_SYM opt_work_and_chain opt_release - { - Lex->sql_command= SQLCOM_ROLLBACK; - Lex->tx_chain= $2; - Lex->tx_release= $3; + ROLLBACK_SYM opt_work opt_chain opt_release + { + LEX *lex=Lex; + lex->sql_command= SQLCOM_ROLLBACK; + lex->tx_chain= $3; + lex->tx_release= $4; } | ROLLBACK_SYM opt_work TO_SYM opt_savepoint ident { - Lex->sql_command = SQLCOM_ROLLBACK_TO_SAVEPOINT; - Lex->savepoint_name = $5.str; + LEX *lex=Lex; + lex->sql_command= SQLCOM_ROLLBACK_TO_SAVEPOINT; + lex->ident= $5; } ; savepoint: SAVEPOINT_SYM ident { - Lex->sql_command = SQLCOM_SAVEPOINT; - Lex->savepoint_name = $2.str; + LEX *lex=Lex; + lex->sql_command= SQLCOM_SAVEPOINT; + lex->ident= $2; } ; release: RELEASE_SYM SAVEPOINT_SYM ident { - Lex->sql_command = SQLCOM_RELEASE_SAVEPOINT; - Lex->savepoint_name = $3.str; + LEX *lex=Lex; + lex->sql_command= SQLCOM_RELEASE_SAVEPOINT; + lex->ident= $3; } ; - + /* UNIONS : glue selects together */ @@ -8119,3 +8128,60 @@ check_option: { Lex->create_view_check= VIEW_CHECK_LOCAL; } ; +xa: XA_SYM begin_or_start xid opt_join_or_resume + { + LEX *lex= Lex; + lex->sql_command = SQLCOM_XA_START; + } + | XA_SYM END xid opt_suspend_or_migrate + { + LEX *lex= Lex; + lex->sql_command = SQLCOM_XA_END; + } + | XA_SYM PREPARE_SYM xid + { + LEX *lex= Lex; + lex->sql_command = SQLCOM_XA_PREPARE; + } + | XA_SYM COMMIT_SYM xid opt_one_phase + { + LEX *lex= Lex; + lex->sql_command = SQLCOM_XA_COMMIT; + } + | XA_SYM ROLLBACK_SYM xid + { + LEX *lex= Lex; + lex->sql_command = SQLCOM_XA_ROLLBACK; + } + | XA_SYM RECOVER_SYM + { + LEX *lex= Lex; + lex->sql_command = SQLCOM_XA_RECOVER; + } + ; + +xid: ident_or_text { Lex->ident=$1; } + ; + +begin_or_start: BEGIN_SYM {} + | START_SYM {} + ; + +opt_join_or_resume: + /* nothing */ { Lex->xa_opt=XA_NONE; } + | JOIN_SYM { Lex->xa_opt=XA_JOIN; } + | RESUME_SYM { Lex->xa_opt=XA_RESUME; } + ; + +opt_one_phase: + /* nothing */ { Lex->xa_opt=XA_NONE; } + | ONE_SYM PHASE_SYM { Lex->xa_opt=XA_ONE_PHASE; } + ; + +opt_suspend_or_migrate: + /* nothing */ { Lex->xa_opt=XA_NONE; } + | SUSPEND_SYM { Lex->xa_opt=XA_SUSPEND; } + | FOR_SYM MIGRATE_SYM { Lex->xa_opt=XA_FOR_MIGRATE; } + ; + + |