summaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorunknown <guilhem@mysql.com>2003-07-24 22:25:36 +0200
committerunknown <guilhem@mysql.com>2003-07-24 22:25:36 +0200
commitab34adf66acb0830a1105777b31a427da1e49b63 (patch)
tree38aa7e4dde6c59d05318d1c048b93848531ad47e /sql
parent30ee158d2f40b6b9e18955970ccee4cba664d21b (diff)
downloadmariadb-git-ab34adf66acb0830a1105777b31a427da1e49b63.tar.gz
WL#1036 (print the db in slave error messages).
I extended the task to cleaning error messages, making them look nicer, and making the output of SHOW SLAVE STATUS (column Last_error) be as complete as what's printed on the .err file; previously we would have, for a failure of a replicated LOAD DATA INFILE: - in the .err, 2 lines: "duplicate entry 2708 for key 1" "failed loading SQL_LOAD-5-2-2.info" - and in SHOW SLAVE STATUS, only: "failed loading SQL_LOAD-5-2-2.info". Now SHOW SLAVE STATUS will contain the concatenation of the 2 messages. sql/log_event.cc: Print the default database when replication stops because of an error. Previously, we had: "error "Duplicate entry 87987 for key 1", query 'insert into t values(87987)'", ie the db was not mentioned, making it hard for cases where the same table name is used in several databases. Lengthened some error messages (for failing replication of LOAD DATA: mention the table and the db). Changes so that SHOW SLAVE STATUS reports as complete errors as the .err file. sql/slave.cc: Removed a useless declaration (the rewrite_db() function is already declared in slave.h). Added missing ')' in error messages. Tried to make error messages look nicer (previously we had "do START SLAVE;, error_code=1062" now we'll have "do START SLAVE; . Error_code=1062". This form has been discussed, I agree it's no panacea, but it's still more readable like this. To be improved in the future :) sql/slave.h: declarations.
Diffstat (limited to 'sql')
-rw-r--r--sql/log_event.cc111
-rw-r--r--sql/slave.cc55
-rw-r--r--sql/slave.h3
3 files changed, 117 insertions, 52 deletions
diff --git a/sql/log_event.cc b/sql/log_event.cc
index a6d2abbf894..96bcf0a2779 100644
--- a/sql/log_event.cc
+++ b/sql/log_event.cc
@@ -1753,7 +1753,7 @@ void Unknown_log_event::print(FILE* file, bool short_form, char* last_db)
#ifndef MYSQL_CLIENT
int Query_log_event::exec_event(struct st_relay_log_info* rli)
{
- int expected_error,actual_error = 0;
+ int expected_error, actual_error= 0;
init_sql_alloc(&thd->mem_root, 8192,0);
thd->db = rewrite_db((char*)db);
@@ -1801,19 +1801,30 @@ int Query_log_event::exec_event(struct st_relay_log_info* rli)
else if (!strcmp(thd->query,"COMMIT"))
rli->inside_transaction=0;
+ /*
+ If we expected a non-zero error code, and we don't get the same error
+ code, and none of them should be ignored.
+ */
if ((expected_error != (actual_error = thd->net.last_errno)) &&
expected_error &&
!ignored_error_code(actual_error) &&
!ignored_error_code(expected_error))
{
- const char* errmsg = "Slave: did not get the expected error\
- running query from master - expected: '%s' (%d), got '%s' (%d)";
- sql_print_error(errmsg, ER_SAFE(expected_error),
- expected_error,
- actual_error ? thd->net.last_error: "no error",
- actual_error);
- thd->query_error = 1;
+ slave_print_error(rli, 0,
+ "Query '%s' did not get the same error as the query \
+got on master - got on master: '%s' (%d), got on slave: '%s' (%d) \
+(default database was '%s')",
+ query,
+ ER_SAFE(expected_error),
+ expected_error,
+ actual_error ? thd->net.last_error: "no error",
+ actual_error,
+ print_slave_db_safe((char*)db));
+ thd->query_error= 1;
}
+ /*
+ If we get the same error code as expected, or they should be ignored.
+ */
else if (expected_error == actual_error ||
ignored_error_code(actual_error))
{
@@ -1821,37 +1832,38 @@ int Query_log_event::exec_event(struct st_relay_log_info* rli)
*rli->last_slave_error = 0;
rli->last_slave_errno = 0;
}
- }
- else
- {
- // master could be inconsistent, abort and tell DBA to check/fix it
- VOID(pthread_mutex_lock(&LOCK_thread_count));
- thd->db = thd->query = 0;
- VOID(pthread_mutex_unlock(&LOCK_thread_count));
- thd->variables.convert_set = 0;
- close_thread_tables(thd);
- free_root(&thd->mem_root,0);
- return 1;
- }
- }
- thd->db= 0; // prevent db from being freed
+ /*
+ Other cases: mostly we expected no error and get one.
+ */
+ else if (thd->query_error || thd->fatal_error)
+ {
+ slave_print_error(rli,actual_error, "Error '%s' on query '%s' \
+(default database was '%s')",
+ actual_error ? thd->net.last_error :
+ "unexpected success or fatal error", query,
+ print_slave_db_safe((char*)db));
+ thd->query_error= 1;
+ }
+ }
+ /*
+ End of sanity check. If the test was wrong, the query got a really bad
+ error on the master, which could be inconsistent, abort and tell DBA to
+ check/fix it. check_expected_error() already printed the message to
+ stderr and rli, and set thd->query_error to 1.
+ */
+ } /* End of if (db_ok(... */
+
+end:
+
VOID(pthread_mutex_lock(&LOCK_thread_count));
+ thd->db= 0; // prevent db from being freed
thd->query= 0; // just to be sure
VOID(pthread_mutex_unlock(&LOCK_thread_count));
// assume no convert for next query unless set explictly
thd->variables.convert_set = 0;
- close_thread_tables(thd);
-
- if (thd->query_error || thd->fatal_error)
- {
- slave_print_error(rli,actual_error, "error '%s' on query '%s'",
- actual_error ? thd->net.last_error :
- "unexpected success or fatal error", query);
- free_root(&thd->mem_root,0);
- return 1;
- }
+ close_thread_tables(thd);
free_root(&thd->mem_root,0);
- return Log_event::exec_event(rli);
+ return (thd->query_error ? thd->query_error : Log_event::exec_event(rli));
}
/*
@@ -1972,8 +1984,11 @@ int Load_log_event::exec_event(NET* net, struct st_relay_log_info* rli,
event in the master log
*/
sql_print_error("Slave: load data infile at position %s in log \
-'%s' produced %d warning(s)", llstr(log_pos,llbuff), RPL_LOG_NAME,
- thd->cuted_fields );
+'%s' produced %d warning(s) (loaded table was '%s', database was '%s')",
+ llstr(log_pos,llbuff), RPL_LOG_NAME,
+ thd->cuted_fields,
+ (char*)table_name,
+ print_slave_db_safe((char*)db));
if (net)
net->pkt_nr= thd->net.pkt_nr;
}
@@ -2005,8 +2020,9 @@ int Load_log_event::exec_event(NET* net, struct st_relay_log_info* rli,
err=ER(sql_errno);
}
slave_print_error(rli,sql_errno,
- "Error '%s' running load data infile",
- err);
+ "Error '%s' running load data infile \
+(loaded table was '%s', database was '%s')",
+ err, (char*)table_name, print_slave_db_safe((char*)db));
free_root(&thd->mem_root,0);
return 1;
}
@@ -2014,7 +2030,10 @@ int Load_log_event::exec_event(NET* net, struct st_relay_log_info* rli,
if (thd->fatal_error)
{
- sql_print_error("Fatal error running LOAD DATA INFILE ");
+ slave_print_error(rli,ER_UNKNOWN_ERROR,
+"Fatal error running \
+LOAD DATA INFILE (loaded table was '%s', database was '%s')",
+ (char*)table_name, print_slave_db_safe((char*)db));
return 1;
}
@@ -2311,7 +2330,21 @@ int Execute_load_log_event::exec_event(struct st_relay_log_info* rli)
*/
if (lev->exec_event(0,rli,1))
{
- slave_print_error(rli,my_errno, "Failed executing load from '%s'", fname);
+ /*
+ We want to indicate the name of the file that could not be loaded
+ (SQL_LOADxxx).
+ But as we are here we are sure the error is in rli->last_slave_error and
+ rli->last_slave_errno (example of error: duplicate entry for key), so we
+ don't want to overwrite it with the filename.
+ What we want instead is add the filename to the current error message.
+ */
+ char *tmp= my_strdup(rli->last_slave_error,MYF(MY_WME));
+ if (!tmp)
+ goto err;
+ slave_print_error(rli,rli->last_slave_errno, /* ok to re-use the error code */
+ "%s. Failed executing load from '%s'",
+ tmp, fname);
+ my_free(tmp,MYF(0));
thd->options = save_options;
goto err;
}
diff --git a/sql/slave.cc b/sql/slave.cc
index 6ace446a341..b4fa11f456e 100644
--- a/sql/slave.cc
+++ b/sql/slave.cc
@@ -74,7 +74,6 @@ static int request_table_dump(MYSQL* mysql, const char* db, const char* table);
static int create_table_from_dump(THD* thd, NET* net, const char* db,
const char* table_name);
static int check_master_version(MYSQL* mysql, MASTER_INFO* mi);
-char* rewrite_db(char* db);
/*
@@ -290,8 +289,6 @@ err:
if (need_data_lock)
pthread_mutex_unlock(&rli->data_lock);
- /* Isn't this strange: if !need_data_lock, we broadcast with no lock ?? */
-
pthread_mutex_unlock(log_lock);
DBUG_RETURN ((*errmsg) ? 1 : 0);
}
@@ -362,7 +359,10 @@ int purge_relay_logs(RELAY_LOG_INFO* rli, THD *thd, bool just_reset,
rli->pending= 0;
if (!rli->inited)
+ {
+ DBUG_PRINT("info", ("rli->inited == 0"));
DBUG_RETURN(0);
+ }
DBUG_ASSERT(rli->slave_running == 0);
DBUG_ASSERT(rli->mi->slave_running == 0);
@@ -828,6 +828,21 @@ static bool sql_slave_killed(THD* thd, RELAY_LOG_INFO* rli)
return rli->abort_slave || abort_loop || thd->killed;
}
+/*
+ Writes an error message to rli->last_slave_error and rli->last_slave_errno
+ (which will be displayed by SHOW SLAVE STATUS), and prints it to stderr.
+
+ SYNOPSIS
+ slave_print_error()
+ rli
+ err_code The error code
+ msg The error message (usually related to the error code, but can
+ contain more information).
+ ... (this is printf-like format, with % symbols in msg)
+
+ RETURN VALUES
+ void
+ */
void slave_print_error(RELAY_LOG_INFO* rli, int err_code, const char* msg, ...)
{
@@ -835,9 +850,16 @@ void slave_print_error(RELAY_LOG_INFO* rli, int err_code, const char* msg, ...)
va_start(args,msg);
my_vsnprintf(rli->last_slave_error,
sizeof(rli->last_slave_error), msg, args);
- sql_print_error("Slave: %s, error_code=%d", rli->last_slave_error,
- err_code);
rli->last_slave_errno = err_code;
+ /* If the error string ends with '.', do not add a ',' it would be ugly */
+ if (rli->last_slave_error[0] &&
+ (rli->last_slave_error[strlen(rli->last_slave_error)-1] == '.'))
+ sql_print_error("Slave: %s Error_code=%d", rli->last_slave_error,
+ err_code);
+ else
+ sql_print_error("Slave: %s, error_code=%d", rli->last_slave_error,
+ err_code);
+
}
@@ -865,6 +887,16 @@ char* rewrite_db(char* db)
return db;
}
+/*
+ From other comments and tests in code, it looks like
+ sometimes Query_log_event and Load_log_event can have db==0
+ (see rewrite_db() above for example)
+ (cases where this happens are unclear; it may be when the master is 3.23).
+*/
+char* print_slave_db_safe(char* db)
+{
+ return (db ? rewrite_db(db) : (char*) "");
+}
/*
Checks whether a db matches some do_db and ignore_db rules
@@ -1282,7 +1314,7 @@ file '%s')", fname);
&msg))
{
sql_print_error("Failed to open the relay log (relay_log_name='FIRST', \
-relay_log_pos=4");
+relay_log_pos=4)");
goto err;
}
rli->master_log_name[0]= 0;
@@ -1346,7 +1378,7 @@ file '%s')", fname);
{
char llbuf[22];
sql_print_error("Failed to open the relay log (relay_log_name='%s', \
-relay_log_pos=%s", rli->relay_log_name, llstr(rli->relay_log_pos, llbuf));
+relay_log_pos=%s)", rli->relay_log_name, llstr(rli->relay_log_pos, llbuf));
goto err;
}
}
@@ -2142,14 +2174,13 @@ int check_expected_error(THD* thd, RELAY_LOG_INFO* rli, int expected_error)
case ER_NET_ERROR_ON_WRITE:
case ER_SERVER_SHUTDOWN:
case ER_NEW_ABORTING_CONNECTION:
- my_snprintf(rli->last_slave_error, sizeof(rli->last_slave_error),
- "Slave: query '%s' partially completed on the master \
+ slave_print_error(rli,expected_error,
+ "query '%s' partially completed on the master \
and was aborted. There is a chance that your master is inconsistent at this \
point. If you are sure that your master is ok, run this query manually on the\
slave and then restart the slave with SET GLOBAL SQL_SLAVE_SKIP_COUNTER=1;\
- SLAVE START;", thd->query);
- rli->last_slave_errno = expected_error;
- sql_print_error("%s",rli->last_slave_error);
+ SLAVE START; .", thd->query);
+ thd->query_error= 1;
return 1;
default:
return 0;
diff --git a/sql/slave.h b/sql/slave.h
index 842ddca75f4..d1fd54d3c04 100644
--- a/sql/slave.h
+++ b/sql/slave.h
@@ -383,9 +383,10 @@ int add_wild_table_rule(DYNAMIC_ARRAY* a, const char* table_spec);
void init_table_rule_hash(HASH* h, bool* h_inited);
void init_table_rule_array(DYNAMIC_ARRAY* a, bool* a_inited);
char* rewrite_db(char* db);
+char* print_slave_db_safe(char* db);
int check_expected_error(THD* thd, RELAY_LOG_INFO* rli, int error_code);
void skip_load_data_infile(NET* net);
-void slave_print_error(RELAY_LOG_INFO* rli,int err_code, const char* msg, ...);
+void slave_print_error(RELAY_LOG_INFO* rli, int err_code, const char* msg, ...);
void end_slave(); /* clean up */
int init_master_info(MASTER_INFO* mi, const char* master_info_fname,