diff options
-rw-r--r-- | BitKeeper/etc/logging_ok | 1 | ||||
-rw-r--r-- | libmysql/Makefile.shared | 3 | ||||
-rw-r--r-- | mysql-test/r/rpl_deadlock.result | 6 | ||||
-rw-r--r-- | mysql-test/t/rpl_deadlock.test | 4 | ||||
-rw-r--r-- | sql/mysqld.cc | 16 | ||||
-rw-r--r-- | sql/slave.cc | 33 | ||||
-rw-r--r-- | sql/slave.h | 9 | ||||
-rw-r--r-- | sql/sql_show.cc | 13 | ||||
-rw-r--r-- | sql/structs.h | 2 |
9 files changed, 64 insertions, 23 deletions
diff --git a/BitKeeper/etc/logging_ok b/BitKeeper/etc/logging_ok index 1a7799dc442..90e6c3c100d 100644 --- a/BitKeeper/etc/logging_ok +++ b/BitKeeper/etc/logging_ok @@ -44,6 +44,7 @@ dlenev@build.mysql.com dlenev@jabberwock.localdomain dlenev@mysql.com ejonore@mc03.ndb.mysql.com +gbichot@quadita2.mysql.com gbichot@quadxeon.mysql.com georg@beethoven.local georg@lmy002.wdf.sap.corp diff --git a/libmysql/Makefile.shared b/libmysql/Makefile.shared index 9664fb0abef..23a8201cbf6 100644 --- a/libmysql/Makefile.shared +++ b/libmysql/Makefile.shared @@ -94,7 +94,8 @@ clean-local: `echo $(sql_cmn_objects) | sed "s;\.lo;.c;g"` \ $(CHARSET_SRCS) $(CHARSET_OBJS) \ $(mystringsextra) $(mysysheaders) $(vioheaders)\ - ../linked_client_sources net.c + ../linked_libmysql_sources ../linked_libmysql_r_sources \ + net.c conf_to_src_SOURCES = conf_to_src.c conf_to_src_LDADD= diff --git a/mysql-test/r/rpl_deadlock.result b/mysql-test/r/rpl_deadlock.result index 366d18f3e05..8808a973855 100644 --- a/mysql-test/r/rpl_deadlock.result +++ b/mysql-test/r/rpl_deadlock.result @@ -8,6 +8,9 @@ create table t1 (a int not null, key(a)) engine=innodb; create table t2 (a int not null, key(a)) engine=innodb; create table t3 (a int) engine=innodb; create table t4 (a int) engine=innodb; +show variables like 'slave_transaction_retries'; +Variable_name Value +slave_transaction_retries 0 show create table t1; Table Create Table t1 CREATE TABLE `t1` ( @@ -20,6 +23,9 @@ t2 CREATE TABLE `t2` ( `a` int(11) NOT NULL default '0', KEY `a` (`a`) ) ENGINE=InnoDB DEFAULT CHARSET=latin1 +show variables like 'slave_transaction_retries'; +Variable_name Value +slave_transaction_retries 2 stop slave; begin; insert into t3 select * from t2 for update; diff --git a/mysql-test/t/rpl_deadlock.test b/mysql-test/t/rpl_deadlock.test index 82470e8ebd0..9ad6362f7e7 100644 --- a/mysql-test/t/rpl_deadlock.test +++ b/mysql-test/t/rpl_deadlock.test @@ -7,6 +7,8 @@ # (Guilhem) have seen the test manage to provoke lock wait timeout # error but not deadlock error; that is ok as code deals with the two # errors in exactly the same way. +# We don't 'show status like 'slave_retried_transactions'' because this +# is not repeatable (depends on sleeps). source include/have_innodb.inc; source include/master-slave.inc; @@ -16,10 +18,12 @@ create table t1 (a int not null, key(a)) engine=innodb; create table t2 (a int not null, key(a)) engine=innodb; create table t3 (a int) engine=innodb; create table t4 (a int) engine=innodb; +show variables like 'slave_transaction_retries'; sync_slave_with_master; show create table t1; show create table t2; +show variables like 'slave_transaction_retries'; stop slave; # 1) Test deadlock diff --git a/sql/mysqld.cc b/sql/mysqld.cc index 43ccfe2b366..ccbbe8c4908 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -3062,8 +3062,17 @@ we force server id to 2, but this MySQL server will not act as a slave."); #endif if (opt_bootstrap) /* If running with bootstrap, do not start replication. */ opt_skip_slave_start= 1; - /* init_slave() must be called after the thread keys are created */ - init_slave(); + /* + init_slave() must be called after the thread keys are created. + Some parts of the code (e.g. SHOW STATUS LIKE 'slave_running' and other + places) assume that active_mi != 0, so let's fail if it's 0 (out of + memory); a message has already been printed. + */ + if (init_slave() && !active_mi) + { + end_thr_alarm(1); // Don't allow alarms + unireg_abort(1); + } if (opt_bootstrap) { @@ -5494,7 +5503,8 @@ struct show_var_st status_vars[]= { {"Select_range_check", (char*) &select_range_check_count, SHOW_LONG}, {"Select_scan", (char*) &select_scan_count, SHOW_LONG}, {"Slave_open_temp_tables", (char*) &slave_open_temp_tables, SHOW_LONG}, - {"Slave_running", (char*) 0, SHOW_SLAVE_RUNNING}, + {"Slave_running", (char*) 0, SHOW_SLAVE_RUNNING}, + {"Slave_retried_transactions",(char*) 0, SHOW_SLAVE_RETRIED_TRANS}, {"Slow_launch_threads", (char*) &slow_launch_threads, SHOW_LONG}, {"Slow_queries", (char*) &long_query_count, SHOW_LONG}, {"Sort_merge_passes", (char*) &filesort_merge_passes, SHOW_LONG}, diff --git a/sql/slave.cc b/sql/slave.cc index 145f4295075..605f8289946 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -27,6 +27,7 @@ #include <my_dir.h> #include <sql_common.h> +#define MAX_SLAVE_RETRY_PAUSE 5 bool use_slave_mask = 0; MY_BITMAP slave_error_mask; @@ -2335,7 +2336,7 @@ st_relay_log_info::st_relay_log_info() ignore_log_space_limit(0), last_master_timestamp(0), slave_skip_counter(0), abort_pos_wait(0), slave_run_id(0), sql_thd(0), last_slave_errno(0), inited(0), abort_slave(0), slave_running(0), until_condition(UNTIL_NONE), - until_log_pos(0) + until_log_pos(0), retried_trans(0) { group_relay_log_name[0]= event_relay_log_name[0]= group_master_log_name[0]= 0; @@ -2980,9 +2981,8 @@ static int exec_relay_log_event(THD* thd, RELAY_LOG_INFO* rli) init_master_info()). b) init_relay_log_pos(), because the BEGIN may be an older relay log. */ - if (rli->trans_retries--) + if (rli->trans_retries < slave_trans_retries) { - sql_print_information("Slave SQL thread retries transaction"); if (init_master_info(rli->mi, 0, 0, 0, SLAVE_SQL)) sql_print_error("Failed to initialize the master info structure"); else if (init_relay_log_pos(rli, @@ -2994,8 +2994,16 @@ static int exec_relay_log_event(THD* thd, RELAY_LOG_INFO* rli) else { exec_res= 0; - sleep(2); // chance for concurrent connection to get more locks - } + /* chance for concurrent connection to get more locks */ + safe_sleep(thd, min(rli->trans_retries, MAX_SLAVE_RETRY_PAUSE), + (CHECK_KILLED_FUNC)sql_slave_killed, (void*)rli); + pthread_mutex_lock(&rli->data_lock); // because of SHOW STATUS + rli->trans_retries++; + rli->retried_trans++; + pthread_mutex_unlock(&rli->data_lock); + DBUG_PRINT("info", ("Slave retries transaction " + "rli->trans_retries: %lu", rli->trans_retries)); + } } else sql_print_error("Slave SQL thread retried transaction %lu time(s) " @@ -3004,17 +3012,8 @@ static int exec_relay_log_event(THD* thd, RELAY_LOG_INFO* rli) slave_trans_retries); } if (!((thd->options & OPTION_BEGIN) && opt_using_transactions)) - { - rli->trans_retries= slave_trans_retries; // restart from fresh - /* - TODO: when merged into 5.0, when slave does auto-rollback if - corrupted binlog, this should reset the retry counter too - (any rollback should). In fact it will work, as here we are just out - of a Format_description_log_event::exec_event() which rolled back. - But check repl code in 5.0 for new ha_rollback calls, just in case. - */ - } - } + rli->trans_retries= 0; // restart from fresh + } return exec_res; } else @@ -3426,7 +3425,7 @@ slave_begin: pthread_mutex_lock(&rli->log_space_lock); rli->ignore_log_space_limit= 0; pthread_mutex_unlock(&rli->log_space_lock); - rli->trans_retries= slave_trans_retries; // start from "no error" + rli->trans_retries= 0; // start from "no error" if (init_relay_log_pos(rli, rli->group_relay_log_name, diff --git a/sql/slave.h b/sql/slave.h index fb7560ec738..5a85e26d9ad 100644 --- a/sql/slave.h +++ b/sql/slave.h @@ -295,7 +295,14 @@ typedef struct st_relay_log_info UNTIL_LOG_NAMES_CMP_EQUAL= 0, UNTIL_LOG_NAMES_CMP_GREATER= 1 } until_log_names_cmp_result; - ulong trans_retries; + /* + trans_retries varies between 0 to slave_transaction_retries and counts how + many times the slave has retried the present transaction; gets reset to 0 + when the transaction finally succeeds. retried_trans is a cumulative + counter: how many times the slave has retried a transaction (any) since + slave started. + */ + ulong trans_retries, retried_trans; st_relay_log_info(); ~st_relay_log_info(); diff --git a/sql/sql_show.cc b/sql/sql_show.cc index 76ea72ef41c..f979b3ca771 100644 --- a/sql/sql_show.cc +++ b/sql/sql_show.cc @@ -1887,6 +1887,19 @@ int mysqld_show(THD *thd, const char *wild, show_var_st *variables, pthread_mutex_unlock(&LOCK_active_mi); break; } + case SHOW_SLAVE_RETRIED_TRANS: + { + /* + TODO: in 5.1 with multimaster, have one such counter per line in SHOW + SLAVE STATUS, and have the sum over all lines here. + */ + pthread_mutex_lock(&LOCK_active_mi); + pthread_mutex_lock(&active_mi->rli.data_lock); + end= int10_to_str(active_mi->rli.retried_trans, buff, 10); + pthread_mutex_unlock(&active_mi->rli.data_lock); + pthread_mutex_unlock(&LOCK_active_mi); + break; + } #endif /* HAVE_REPLICATION */ case SHOW_OPENTABLES: end= int10_to_str((long) cached_tables(), buff, 10); diff --git a/sql/structs.h b/sql/structs.h index 846b3400fab..ba081b570c1 100644 --- a/sql/structs.h +++ b/sql/structs.h @@ -180,7 +180,7 @@ enum SHOW_TYPE SHOW_SSL_CTX_SESS_TIMEOUTS, SHOW_SSL_CTX_SESS_CACHE_FULL, SHOW_SSL_GET_CIPHER_LIST, #endif /* HAVE_OPENSSL */ - SHOW_RPL_STATUS, SHOW_SLAVE_RUNNING, + SHOW_RPL_STATUS, SHOW_SLAVE_RUNNING, SHOW_SLAVE_RETRIED_TRANS, SHOW_KEY_CACHE_LONG, SHOW_KEY_CACHE_CONST_LONG }; |