diff options
author | unknown <guilhem@mysql.com> | 2003-07-24 22:25:36 +0200 |
---|---|---|
committer | unknown <guilhem@mysql.com> | 2003-07-24 22:25:36 +0200 |
commit | ab34adf66acb0830a1105777b31a427da1e49b63 (patch) | |
tree | 38aa7e4dde6c59d05318d1c048b93848531ad47e /sql | |
parent | 30ee158d2f40b6b9e18955970ccee4cba664d21b (diff) | |
download | mariadb-git-ab34adf66acb0830a1105777b31a427da1e49b63.tar.gz |
WL#1036 (print the db in slave error messages).
I extended the task to cleaning error messages, making them look nicer,
and making the output of SHOW SLAVE STATUS (column Last_error) be as complete
as what's printed on the .err file;
previously we would have, for a failure of a replicated LOAD DATA INFILE:
- in the .err, 2 lines:
"duplicate entry 2708 for key 1"
"failed loading SQL_LOAD-5-2-2.info"
- and in SHOW SLAVE STATUS, only:
"failed loading SQL_LOAD-5-2-2.info".
Now SHOW SLAVE STATUS will contain the concatenation of the 2 messages.
sql/log_event.cc:
Print the default database when replication stops because of an error. Previously, we had:
"error "Duplicate entry 87987 for key 1", query 'insert into t values(87987)'", ie the db
was not mentioned, making it hard for cases where the same table name is used in
several databases.
Lengthened some error messages (for failing replication of LOAD DATA: mention the table
and the db).
Changes so that SHOW SLAVE STATUS reports as complete errors as the .err file.
sql/slave.cc:
Removed a useless declaration (the rewrite_db() function is already declared in slave.h).
Added missing ')' in error messages.
Tried to make error messages look nicer (previously we had
"do START SLAVE;, error_code=1062"
now we'll have
"do START SLAVE; . Error_code=1062".
This form has been discussed, I agree it's no panacea, but it's still more readable
like this. To be improved in the future :)
sql/slave.h:
declarations.
Diffstat (limited to 'sql')
-rw-r--r-- | sql/log_event.cc | 111 | ||||
-rw-r--r-- | sql/slave.cc | 55 | ||||
-rw-r--r-- | sql/slave.h | 3 |
3 files changed, 117 insertions, 52 deletions
diff --git a/sql/log_event.cc b/sql/log_event.cc index a6d2abbf894..96bcf0a2779 100644 --- a/sql/log_event.cc +++ b/sql/log_event.cc @@ -1753,7 +1753,7 @@ void Unknown_log_event::print(FILE* file, bool short_form, char* last_db) #ifndef MYSQL_CLIENT int Query_log_event::exec_event(struct st_relay_log_info* rli) { - int expected_error,actual_error = 0; + int expected_error, actual_error= 0; init_sql_alloc(&thd->mem_root, 8192,0); thd->db = rewrite_db((char*)db); @@ -1801,19 +1801,30 @@ int Query_log_event::exec_event(struct st_relay_log_info* rli) else if (!strcmp(thd->query,"COMMIT")) rli->inside_transaction=0; + /* + If we expected a non-zero error code, and we don't get the same error + code, and none of them should be ignored. + */ if ((expected_error != (actual_error = thd->net.last_errno)) && expected_error && !ignored_error_code(actual_error) && !ignored_error_code(expected_error)) { - const char* errmsg = "Slave: did not get the expected error\ - running query from master - expected: '%s' (%d), got '%s' (%d)"; - sql_print_error(errmsg, ER_SAFE(expected_error), - expected_error, - actual_error ? thd->net.last_error: "no error", - actual_error); - thd->query_error = 1; + slave_print_error(rli, 0, + "Query '%s' did not get the same error as the query \ +got on master - got on master: '%s' (%d), got on slave: '%s' (%d) \ +(default database was '%s')", + query, + ER_SAFE(expected_error), + expected_error, + actual_error ? thd->net.last_error: "no error", + actual_error, + print_slave_db_safe((char*)db)); + thd->query_error= 1; } + /* + If we get the same error code as expected, or they should be ignored. + */ else if (expected_error == actual_error || ignored_error_code(actual_error)) { @@ -1821,37 +1832,38 @@ int Query_log_event::exec_event(struct st_relay_log_info* rli) *rli->last_slave_error = 0; rli->last_slave_errno = 0; } - } - else - { - // master could be inconsistent, abort and tell DBA to check/fix it - VOID(pthread_mutex_lock(&LOCK_thread_count)); - thd->db = thd->query = 0; - VOID(pthread_mutex_unlock(&LOCK_thread_count)); - thd->variables.convert_set = 0; - close_thread_tables(thd); - free_root(&thd->mem_root,0); - return 1; - } - } - thd->db= 0; // prevent db from being freed + /* + Other cases: mostly we expected no error and get one. + */ + else if (thd->query_error || thd->fatal_error) + { + slave_print_error(rli,actual_error, "Error '%s' on query '%s' \ +(default database was '%s')", + actual_error ? thd->net.last_error : + "unexpected success or fatal error", query, + print_slave_db_safe((char*)db)); + thd->query_error= 1; + } + } + /* + End of sanity check. If the test was wrong, the query got a really bad + error on the master, which could be inconsistent, abort and tell DBA to + check/fix it. check_expected_error() already printed the message to + stderr and rli, and set thd->query_error to 1. + */ + } /* End of if (db_ok(... */ + +end: + VOID(pthread_mutex_lock(&LOCK_thread_count)); + thd->db= 0; // prevent db from being freed thd->query= 0; // just to be sure VOID(pthread_mutex_unlock(&LOCK_thread_count)); // assume no convert for next query unless set explictly thd->variables.convert_set = 0; - close_thread_tables(thd); - - if (thd->query_error || thd->fatal_error) - { - slave_print_error(rli,actual_error, "error '%s' on query '%s'", - actual_error ? thd->net.last_error : - "unexpected success or fatal error", query); - free_root(&thd->mem_root,0); - return 1; - } + close_thread_tables(thd); free_root(&thd->mem_root,0); - return Log_event::exec_event(rli); + return (thd->query_error ? thd->query_error : Log_event::exec_event(rli)); } /* @@ -1972,8 +1984,11 @@ int Load_log_event::exec_event(NET* net, struct st_relay_log_info* rli, event in the master log */ sql_print_error("Slave: load data infile at position %s in log \ -'%s' produced %d warning(s)", llstr(log_pos,llbuff), RPL_LOG_NAME, - thd->cuted_fields ); +'%s' produced %d warning(s) (loaded table was '%s', database was '%s')", + llstr(log_pos,llbuff), RPL_LOG_NAME, + thd->cuted_fields, + (char*)table_name, + print_slave_db_safe((char*)db)); if (net) net->pkt_nr= thd->net.pkt_nr; } @@ -2005,8 +2020,9 @@ int Load_log_event::exec_event(NET* net, struct st_relay_log_info* rli, err=ER(sql_errno); } slave_print_error(rli,sql_errno, - "Error '%s' running load data infile", - err); + "Error '%s' running load data infile \ +(loaded table was '%s', database was '%s')", + err, (char*)table_name, print_slave_db_safe((char*)db)); free_root(&thd->mem_root,0); return 1; } @@ -2014,7 +2030,10 @@ int Load_log_event::exec_event(NET* net, struct st_relay_log_info* rli, if (thd->fatal_error) { - sql_print_error("Fatal error running LOAD DATA INFILE "); + slave_print_error(rli,ER_UNKNOWN_ERROR, +"Fatal error running \ +LOAD DATA INFILE (loaded table was '%s', database was '%s')", + (char*)table_name, print_slave_db_safe((char*)db)); return 1; } @@ -2311,7 +2330,21 @@ int Execute_load_log_event::exec_event(struct st_relay_log_info* rli) */ if (lev->exec_event(0,rli,1)) { - slave_print_error(rli,my_errno, "Failed executing load from '%s'", fname); + /* + We want to indicate the name of the file that could not be loaded + (SQL_LOADxxx). + But as we are here we are sure the error is in rli->last_slave_error and + rli->last_slave_errno (example of error: duplicate entry for key), so we + don't want to overwrite it with the filename. + What we want instead is add the filename to the current error message. + */ + char *tmp= my_strdup(rli->last_slave_error,MYF(MY_WME)); + if (!tmp) + goto err; + slave_print_error(rli,rli->last_slave_errno, /* ok to re-use the error code */ + "%s. Failed executing load from '%s'", + tmp, fname); + my_free(tmp,MYF(0)); thd->options = save_options; goto err; } diff --git a/sql/slave.cc b/sql/slave.cc index 6ace446a341..b4fa11f456e 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -74,7 +74,6 @@ static int request_table_dump(MYSQL* mysql, const char* db, const char* table); static int create_table_from_dump(THD* thd, NET* net, const char* db, const char* table_name); static int check_master_version(MYSQL* mysql, MASTER_INFO* mi); -char* rewrite_db(char* db); /* @@ -290,8 +289,6 @@ err: if (need_data_lock) pthread_mutex_unlock(&rli->data_lock); - /* Isn't this strange: if !need_data_lock, we broadcast with no lock ?? */ - pthread_mutex_unlock(log_lock); DBUG_RETURN ((*errmsg) ? 1 : 0); } @@ -362,7 +359,10 @@ int purge_relay_logs(RELAY_LOG_INFO* rli, THD *thd, bool just_reset, rli->pending= 0; if (!rli->inited) + { + DBUG_PRINT("info", ("rli->inited == 0")); DBUG_RETURN(0); + } DBUG_ASSERT(rli->slave_running == 0); DBUG_ASSERT(rli->mi->slave_running == 0); @@ -828,6 +828,21 @@ static bool sql_slave_killed(THD* thd, RELAY_LOG_INFO* rli) return rli->abort_slave || abort_loop || thd->killed; } +/* + Writes an error message to rli->last_slave_error and rli->last_slave_errno + (which will be displayed by SHOW SLAVE STATUS), and prints it to stderr. + + SYNOPSIS + slave_print_error() + rli + err_code The error code + msg The error message (usually related to the error code, but can + contain more information). + ... (this is printf-like format, with % symbols in msg) + + RETURN VALUES + void + */ void slave_print_error(RELAY_LOG_INFO* rli, int err_code, const char* msg, ...) { @@ -835,9 +850,16 @@ void slave_print_error(RELAY_LOG_INFO* rli, int err_code, const char* msg, ...) va_start(args,msg); my_vsnprintf(rli->last_slave_error, sizeof(rli->last_slave_error), msg, args); - sql_print_error("Slave: %s, error_code=%d", rli->last_slave_error, - err_code); rli->last_slave_errno = err_code; + /* If the error string ends with '.', do not add a ',' it would be ugly */ + if (rli->last_slave_error[0] && + (rli->last_slave_error[strlen(rli->last_slave_error)-1] == '.')) + sql_print_error("Slave: %s Error_code=%d", rli->last_slave_error, + err_code); + else + sql_print_error("Slave: %s, error_code=%d", rli->last_slave_error, + err_code); + } @@ -865,6 +887,16 @@ char* rewrite_db(char* db) return db; } +/* + From other comments and tests in code, it looks like + sometimes Query_log_event and Load_log_event can have db==0 + (see rewrite_db() above for example) + (cases where this happens are unclear; it may be when the master is 3.23). +*/ +char* print_slave_db_safe(char* db) +{ + return (db ? rewrite_db(db) : (char*) ""); +} /* Checks whether a db matches some do_db and ignore_db rules @@ -1282,7 +1314,7 @@ file '%s')", fname); &msg)) { sql_print_error("Failed to open the relay log (relay_log_name='FIRST', \ -relay_log_pos=4"); +relay_log_pos=4)"); goto err; } rli->master_log_name[0]= 0; @@ -1346,7 +1378,7 @@ file '%s')", fname); { char llbuf[22]; sql_print_error("Failed to open the relay log (relay_log_name='%s', \ -relay_log_pos=%s", rli->relay_log_name, llstr(rli->relay_log_pos, llbuf)); +relay_log_pos=%s)", rli->relay_log_name, llstr(rli->relay_log_pos, llbuf)); goto err; } } @@ -2142,14 +2174,13 @@ int check_expected_error(THD* thd, RELAY_LOG_INFO* rli, int expected_error) case ER_NET_ERROR_ON_WRITE: case ER_SERVER_SHUTDOWN: case ER_NEW_ABORTING_CONNECTION: - my_snprintf(rli->last_slave_error, sizeof(rli->last_slave_error), - "Slave: query '%s' partially completed on the master \ + slave_print_error(rli,expected_error, + "query '%s' partially completed on the master \ and was aborted. There is a chance that your master is inconsistent at this \ point. If you are sure that your master is ok, run this query manually on the\ slave and then restart the slave with SET GLOBAL SQL_SLAVE_SKIP_COUNTER=1;\ - SLAVE START;", thd->query); - rli->last_slave_errno = expected_error; - sql_print_error("%s",rli->last_slave_error); + SLAVE START; .", thd->query); + thd->query_error= 1; return 1; default: return 0; diff --git a/sql/slave.h b/sql/slave.h index 842ddca75f4..d1fd54d3c04 100644 --- a/sql/slave.h +++ b/sql/slave.h @@ -383,9 +383,10 @@ int add_wild_table_rule(DYNAMIC_ARRAY* a, const char* table_spec); void init_table_rule_hash(HASH* h, bool* h_inited); void init_table_rule_array(DYNAMIC_ARRAY* a, bool* a_inited); char* rewrite_db(char* db); +char* print_slave_db_safe(char* db); int check_expected_error(THD* thd, RELAY_LOG_INFO* rli, int error_code); void skip_load_data_infile(NET* net); -void slave_print_error(RELAY_LOG_INFO* rli,int err_code, const char* msg, ...); +void slave_print_error(RELAY_LOG_INFO* rli, int err_code, const char* msg, ...); void end_slave(); /* clean up */ int init_master_info(MASTER_INFO* mi, const char* master_info_fname, |