summaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
Diffstat (limited to 'sql')
-rw-r--r--sql/handler.cc71
-rw-r--r--sql/handler.h73
-rw-r--r--sql/lock.cc6
-rw-r--r--sql/log.cc128
-rw-r--r--sql/log.h11
-rw-r--r--sql/log_event.cc432
-rw-r--r--sql/log_event.h11
-rw-r--r--sql/mysql_priv.h3
-rw-r--r--sql/opt_range.cc6
-rw-r--r--sql/rpl_rli.h16
-rw-r--r--sql/slave.cc4
-rw-r--r--sql/sql_acl.cc10
-rw-r--r--sql/sql_base.cc26
-rw-r--r--sql/sql_class.cc112
-rw-r--r--sql/sql_class.h25
-rw-r--r--sql/sql_insert.cc67
-rw-r--r--sql/sql_load.cc13
-rw-r--r--sql/sql_table.cc8
-rw-r--r--sql/table.h3
19 files changed, 552 insertions, 473 deletions
diff --git a/sql/handler.cc b/sql/handler.cc
index dc7ec37a23d..75961104d34 100644
--- a/sql/handler.cc
+++ b/sql/handler.cc
@@ -3052,40 +3052,81 @@ template int binlog_log_row<Update_rows_log_event>(TABLE *, const byte *, const
#endif /* HAVE_ROW_BASED_REPLICATION */
-int handler::ha_write_row(byte *buf)
+int handler::ha_external_lock(THD *thd, int lock_type)
{
+ DBUG_ENTER("handler::ha_external_lock");
int error;
- if (likely(!(error= write_row(buf))))
+ if (unlikely(error= external_lock(thd, lock_type)))
+ DBUG_RETURN(error);
+#ifdef HAVE_ROW_BASED_REPLICATION
+ if (table->file->is_injective())
+ DBUG_RETURN(0);
+
+ /*
+ There is a number of statements that are logged statement-based
+ but call external lock. For these, we do not need to generate a
+ table map.
+
+ TODO: The need for this switch is an indication that the model for
+ locking combined with row-based replication needs to be looked
+ over. Ideally, no such special handling should be needed.
+ */
+ switch (thd->lex->sql_command)
+ {
+ case SQLCOM_TRUNCATE:
+ case SQLCOM_ALTER_TABLE:
+ DBUG_RETURN(0);
+ }
+
+ /*
+ If we are locking a table for writing, we generate a table map.
+ For all other kinds of locks, we don't do anything.
+ */
+ if (lock_type == F_WRLCK && check_table_binlog_row_based(thd, table))
{
+ int const has_trans= table->file->has_transactions();
+ error= thd->binlog_write_table_map(table, has_trans);
+ if (unlikely(error))
+ DBUG_RETURN(error);
+ }
+#endif
+ DBUG_RETURN(0);
+}
+
+int handler::ha_write_row(byte *buf)
+{
+ int error;
+ if (unlikely(error= write_row(buf)))
+ return error;
#ifdef HAVE_ROW_BASED_REPLICATION
- error= binlog_log_row<Write_rows_log_event>(table, 0, buf);
+ if (unlikely(error= binlog_log_row<Write_rows_log_event>(table, 0, buf)))
+ return error;
#endif
- }
- return error;
+ return 0;
}
int handler::ha_update_row(const byte *old_data, byte *new_data)
{
int error;
- if (likely(!(error= update_row(old_data, new_data))))
- {
+ if (unlikely(error= update_row(old_data, new_data)))
+ return error;
#ifdef HAVE_ROW_BASED_REPLICATION
- error= binlog_log_row<Update_rows_log_event>(table, old_data, new_data);
+ if (unlikely(error= binlog_log_row<Update_rows_log_event>(table, old_data, new_data)))
+ return error;
#endif
- }
- return error;
+ return 0;
}
int handler::ha_delete_row(const byte *buf)
{
int error;
- if (likely(!(error= delete_row(buf))))
- {
+ if (unlikely(error= delete_row(buf)))
+ return error;
#ifdef HAVE_ROW_BASED_REPLICATION
- error= binlog_log_row<Delete_rows_log_event>(table, buf, 0);
+ if (unlikely(error= binlog_log_row<Delete_rows_log_event>(table, buf, 0)))
+ return error;
#endif
- }
- return error;
+ return 0;
}
diff --git a/sql/handler.h b/sql/handler.h
index b8852d35e66..dd445637b9f 100644
--- a/sql/handler.h
+++ b/sql/handler.h
@@ -1082,7 +1082,35 @@ typedef struct st_ha_create_information
bool store_on_disk; /* 1 if table stored on disk */
} HA_CREATE_INFO;
+/*
+ Class for maintaining hooks used inside operations on tables such
+ as: create table functions, delete table functions, and alter table
+ functions.
+
+ Class is using the Template Method pattern to separate the public
+ usage interface from the private inheritance interface. This
+ imposes no overhead, since the public non-virtual function is small
+ enough to be inlined.
+
+ The hooks are usually used for functions that does several things,
+ e.g., create_table_from_items(), which both create a table and lock
+ it.
+ */
+class TABLEOP_HOOKS
+{
+public:
+ inline void prelock(TABLE **tables, uint count)
+ {
+ do_prelock(tables, count);
+ }
+private:
+ /* Function primitive that is called prior to locking tables */
+ virtual void do_prelock(TABLE **tables, uint count)
+ {
+ /* Default is to do nothing */
+ }
+};
typedef struct st_savepoint SAVEPOINT;
extern ulong savepoint_alloc_size;
@@ -1476,15 +1504,24 @@ public:
uint get_index(void) const { return active_index; }
virtual int open(const char *name, int mode, uint test_if_locked)=0;
virtual int close(void)=0;
- virtual int ha_write_row(byte * buf);
- virtual int ha_update_row(const byte * old_data, byte * new_data);
- virtual int ha_delete_row(const byte * buf);
+
+ /*
+ These functions represent the public interface to *users* of the
+ handler class, hence they are *not* virtual. For the inheritance
+ interface, see the (private) functions write_row(), update_row(),
+ and delete_row() below.
+ */
+ int ha_external_lock(THD *thd, int lock_type);
+ int ha_write_row(byte * buf);
+ int ha_update_row(const byte * old_data, byte * new_data);
+ int ha_delete_row(const byte * buf);
+
/*
If the handler does it's own injection of the rows, this member function
should return 'true'.
*/
virtual bool is_injective() const { return false; }
-
+
/*
SYNOPSIS
start_bulk_update()
@@ -1616,7 +1653,6 @@ public:
{ return 0; }
virtual int extra_opt(enum ha_extra_function operation, ulong cache_size)
{ return extra(operation); }
- virtual int external_lock(THD *thd, int lock_type) { return 0; }
/*
In an UPDATE or DELETE, if the row under the cursor was locked by another
transaction, and the engine used an optimistic read of the last
@@ -1871,28 +1907,33 @@ public:
{ return COMPATIBLE_DATA_NO; }
private:
+ friend int ndb_add_binlog_index(THD *, void *);
/*
- Row-level primitives for storage engines.
- These should be overridden by the storage engine class. To call
- these methods, use the corresponding 'ha_*' method above.
+ Row-level primitives for storage engines. These should be
+ overridden by the storage engine class. To call these methods, use
+ the corresponding 'ha_*' method above.
*/
- friend int ndb_add_binlog_index(THD *, void *);
+ virtual int external_lock(THD *thd __attribute__((unused)),
+ int lock_type __attribute__((unused)))
+ {
+ return 0;
+ }
- virtual int write_row(byte *buf __attribute__((unused)))
- {
- return HA_ERR_WRONG_COMMAND;
+ virtual int write_row(byte *buf __attribute__((unused)))
+ {
+ return HA_ERR_WRONG_COMMAND;
}
virtual int update_row(const byte *old_data __attribute__((unused)),
byte *new_data __attribute__((unused)))
- {
- return HA_ERR_WRONG_COMMAND;
+ {
+ return HA_ERR_WRONG_COMMAND;
}
virtual int delete_row(const byte *buf __attribute__((unused)))
- {
- return HA_ERR_WRONG_COMMAND;
+ {
+ return HA_ERR_WRONG_COMMAND;
}
};
diff --git a/sql/lock.cc b/sql/lock.cc
index 9cd0dcce610..40a7a29942b 100644
--- a/sql/lock.cc
+++ b/sql/lock.cc
@@ -229,12 +229,12 @@ static int lock_external(THD *thd, TABLE **tables, uint count)
((*tables)->reginfo.lock_type >= TL_READ &&
(*tables)->reginfo.lock_type <= TL_READ_NO_INSERT))
lock_type=F_RDLCK;
- if ((error=(*tables)->file->external_lock(thd,lock_type)))
+ if ((error=(*tables)->file->ha_external_lock(thd,lock_type)))
{
print_lock_error(error, (*tables)->file->table_type());
for (; i-- ; tables--)
{
- (*tables)->file->external_lock(thd, F_UNLCK);
+ (*tables)->file->ha_external_lock(thd, F_UNLCK);
(*tables)->current_lock=F_UNLCK;
}
DBUG_RETURN(error);
@@ -578,7 +578,7 @@ static int unlock_external(THD *thd, TABLE **table,uint count)
if ((*table)->current_lock != F_UNLCK)
{
(*table)->current_lock = F_UNLCK;
- if ((error=(*table)->file->external_lock(thd, F_UNLCK)))
+ if ((error=(*table)->file->ha_external_lock(thd, F_UNLCK)))
{
error_code=error;
print_lock_error(error_code, (*table)->file->table_type());
diff --git a/sql/log.cc b/sql/log.cc
index a7e6a3da7f6..c2fadb9d845 100644
--- a/sql/log.cc
+++ b/sql/log.cc
@@ -1041,6 +1041,12 @@ binlog_end_trans(THD *thd, binlog_trx_data *trx_data, Log_event *end_ev)
if (end_ev)
{
+ /*
+ We can always end the statement when ending a transaction since
+ transactions are not allowed inside stored functions. If they
+ were, we would have to ensure that we're not ending a statement
+ inside a stored function.
+ */
thd->binlog_flush_pending_rows_event(true);
error= mysql_bin_log.write(thd, trans_log, end_ev);
}
@@ -1060,7 +1066,7 @@ binlog_end_trans(THD *thd, binlog_trx_data *trx_data, Log_event *end_ev)
generated instead of the one that was written to the thrown-away
transaction cache.
*/
- ++mysql_bin_log.m_table_map_version;
+ mysql_bin_log.update_table_map_version();
statistic_increment(binlog_cache_use, &LOCK_status);
if (trans_log->disk_writes != 0)
@@ -2596,6 +2602,38 @@ int THD::binlog_setup_trx_data()
DBUG_RETURN(0);
}
+int THD::binlog_write_table_map(TABLE *table, bool is_trans)
+{
+ DBUG_ENTER("THD::binlog_write_table_map");
+ DBUG_PRINT("enter", ("table=%p (%s: #%u)",
+ table, table->s->table_name, table->s->table_map_id));
+
+ /* Pre-conditions */
+ DBUG_ASSERT(binlog_row_based && mysql_bin_log.is_open());
+ DBUG_ASSERT(table->s->table_map_id != ULONG_MAX);
+
+ Table_map_log_event::flag_set const
+ flags= Table_map_log_event::TM_NO_FLAGS;
+
+ Table_map_log_event
+ the_event(this, table, table->s->table_map_id, is_trans, flags);
+
+ /*
+ This function is called from ha_external_lock() after the storage
+ engine has registered for the transaction.
+ */
+ if (is_trans)
+ trans_register_ha(this, options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN),
+ &binlog_hton);
+
+ if (int error= mysql_bin_log.write(&the_event))
+ DBUG_RETURN(error);
+
+ ++binlog_table_maps;
+ table->s->table_map_version= mysql_bin_log.table_map_version();
+ DBUG_RETURN(0);
+}
+
Rows_log_event*
THD::binlog_get_pending_rows_event() const
{
@@ -2613,8 +2651,12 @@ THD::binlog_get_pending_rows_event() const
void
THD::binlog_set_pending_rows_event(Rows_log_event* ev)
{
+ if (ha_data[binlog_hton.slot] == NULL)
+ binlog_setup_trx_data();
+
binlog_trx_data *const trx_data=
(binlog_trx_data*) ha_data[binlog_hton.slot];
+
DBUG_ASSERT(trx_data);
trx_data->pending= ev;
}
@@ -2658,15 +2700,6 @@ int MYSQL_LOG::flush_and_set_pending_rows_event(THD *thd, Rows_log_event* event)
pthread_mutex_lock(&LOCK_log);
/*
- Write a table map if necessary
- */
- if (pending->maybe_write_table_map(thd, file, this))
- {
- pthread_mutex_unlock(&LOCK_log);
- DBUG_RETURN(2);
- }
-
- /*
Write pending event to log file or transaction cache
*/
if (pending->write(file))
@@ -2707,18 +2740,8 @@ int MYSQL_LOG::flush_and_set_pending_rows_event(THD *thd, Rows_log_event* event)
pthread_mutex_unlock(&LOCK_log);
}
- else if (event && event->get_cache_stmt()) /* && pending == 0 */
- {
- /*
- If we are setting a non-null event for a table that is
- transactional, we start a transaction here as well.
- */
- trans_register_ha(thd,
- thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN),
- &binlog_hton);
- }
- trx_data->pending= event;
+ thd->binlog_set_pending_rows_event(event);
DBUG_RETURN(error);
}
@@ -2750,21 +2773,13 @@ bool MYSQL_LOG::write(Log_event *event_info)
mutex, we do this before aquiring the LOCK_log mutex in this
function.
- This is not optimal, but necessary in the current implementation
- since there is code that writes rows to system tables without
- using some way to flush the pending event (e.g., binlog_query()).
-
- TODO: There shall be no writes to any system table after calling
- binlog_query(), so these writes has to be moved to before the call
- of binlog_query() for correct functioning.
-
- This is necessesary not only for RBR, but the master might crash
- after binlogging the query but before changing the system tables.
- This means that the slave and the master are not in the same state
- (after the master has restarted), so therefore we have to
- eliminate this problem.
+ We only end the statement if we are in a top-level statement. If
+ we are inside a stored function, we do not end the statement since
+ this will close all tables on the slave.
*/
- thd->binlog_flush_pending_rows_event(true);
+ bool const end_stmt=
+ thd->prelocked_mode && thd->lex->requires_prelocking();
+ thd->binlog_flush_pending_rows_event(end_stmt);
pthread_mutex_lock(&LOCK_log);
@@ -2787,8 +2802,9 @@ bool MYSQL_LOG::write(Log_event *event_info)
(!binlog_filter->db_ok(local_db)))
{
VOID(pthread_mutex_unlock(&LOCK_log));
- DBUG_PRINT("info",("db_ok('%s')==%d", local_db,
- binlog_filter->db_ok(local_db)));
+ DBUG_PRINT("info",("OPTION_BIN_LOG is %s, db_ok('%s') == %d",
+ (thd->options & OPTION_BIN_LOG) ? "set" : "clear",
+ local_db, binlog_filter->db_ok(local_db)));
DBUG_RETURN(0);
}
#endif /* HAVE_REPLICATION */
@@ -3504,44 +3520,6 @@ void MYSQL_LOG::signal_update()
DBUG_VOID_RETURN;
}
-#ifndef MYSQL_CLIENT
-bool MYSQL_LOG::write_table_map(THD *thd, IO_CACHE *file, TABLE* table,
- bool is_transactional)
-{
- DBUG_ENTER("MYSQL_LOG::write_table_map()");
- DBUG_PRINT("enter", ("table=%p (%s: %u)",
- table, table->s->table_name, table->s->table_map_id));
-
- /* Pre-conditions */
- DBUG_ASSERT(binlog_row_based && is_open());
- DBUG_ASSERT(table->s->table_map_id != ULONG_MAX);
-
-#ifndef DBUG_OFF
- /*
- We only need to execute under the LOCK_log mutex if we are writing
- to the log file; otherwise, we are writing to a thread-specific
- transaction cache and there is no need to serialize this event
- with events in other threads.
- */
- if (file == &log_file)
- safe_mutex_assert_owner(&LOCK_log);
-#endif
-
- Table_map_log_event::flag_set const
- flags= Table_map_log_event::TM_NO_FLAGS;
-
- Table_map_log_event
- the_event(thd, table, table->s->table_map_id, is_transactional, flags);
-
- if (the_event.write(file))
- DBUG_RETURN(1);
-
- table->s->table_map_version= m_table_map_version;
- DBUG_RETURN(0);
-}
-#endif /* !defined(MYSQL_CLIENT) */
-
-
#ifdef __NT__
void print_buffer_to_nt_eventlog(enum loglevel level, char *buff,
uint length, int buffLen)
diff --git a/sql/log.h b/sql/log.h
index 98a86072fca..859423bd573 100644
--- a/sql/log.h
+++ b/sql/log.h
@@ -203,9 +203,11 @@ class MYSQL_LOG: public TC_LOG
bool no_auto_events;
friend class Log_event;
-public:
ulonglong m_table_map_version;
+ int write_to_file(IO_CACHE *cache);
+
+public:
/*
These describe the log's format. This is used only for relay logs.
_for_exec is used by the SQL thread, _for_queue by the I/O thread. It's
@@ -232,9 +234,12 @@ public:
#if !defined(MYSQL_CLIENT)
bool is_table_mapped(TABLE *table) const
{
- return table->s->table_map_version == m_table_map_version;
+ return table->s->table_map_version == table_map_version();
}
+ ulonglong table_map_version() const { return m_table_map_version; }
+ void update_table_map_version() { ++m_table_map_version; }
+
int flush_and_set_pending_rows_event(THD *thd, Rows_log_event* event);
#endif /* !defined(MYSQL_CLIENT) */
@@ -302,8 +307,6 @@ public:
bool write(Log_event* event_info); // binary log write
bool write(THD *thd, IO_CACHE *cache, Log_event *commit_event);
- bool write_table_map(THD *thd, IO_CACHE *cache, TABLE *table, bool is_trans);
-
void start_union_events(THD *thd);
void stop_union_events(THD *thd);
bool is_query_in_union(THD *thd, query_id_t query_id_param);
diff --git a/sql/log_event.cc b/sql/log_event.cc
index cd16745df90..9fbb29bbc8c 100644
--- a/sql/log_event.cc
+++ b/sql/log_event.cc
@@ -1686,6 +1686,7 @@ int Query_log_event::exec_event(struct st_relay_log_info* rli, const char *query
DBUG_PRINT("info", ("log_pos: %lu", (ulong) log_pos));
clear_all_errors(thd, rli);
+ rli->clear_tables_to_lock();
/*
Note: We do not need to execute reset_one_shot_variables() if this
@@ -5064,17 +5065,19 @@ char* sql_ex_info::init(char* buf,char* buf_end,bool use_new_format)
Rows_log_event::Rows_log_event(THD *thd_arg, TABLE *tbl_arg, ulong tid,
MY_BITMAP const *cols, bool is_transactional)
: Log_event(thd_arg, 0, is_transactional),
+ m_row_count(0),
m_table(tbl_arg),
m_table_id(tid),
- m_width(tbl_arg->s->fields),
- m_rows_buf((byte*)my_malloc(opt_binlog_rows_event_max_size *
- sizeof(*m_rows_buf), MYF(MY_WME))),
- m_rows_cur(m_rows_buf),
- m_rows_end(m_rows_buf + opt_binlog_rows_event_max_size),
+ m_width(tbl_arg ? tbl_arg->s->fields : 1),
+ m_rows_buf(0), m_rows_cur(0), m_rows_end(0),
m_flags(0)
{
- DBUG_ASSERT(m_table && m_table->s);
- DBUG_ASSERT(m_table_id != ULONG_MAX);
+ /*
+ We allow a special form of dummy event when the table, and cols
+ are null and the table id is ULONG_MAX.
+ */
+ DBUG_ASSERT(tbl_arg && tbl_arg->s && tid != ULONG_MAX ||
+ !tbl_arg && !cols && tid == ULONG_MAX);
if (thd_arg->options & OPTION_NO_FOREIGN_KEY_CHECKS)
set_flags(NO_FOREIGN_KEY_CHECKS_F);
@@ -5085,7 +5088,11 @@ Rows_log_event::Rows_log_event(THD *thd_arg, TABLE *tbl_arg, ulong tid,
m_width <= sizeof(m_bitbuf)*8 ? m_bitbuf : NULL,
(m_width + 7) & ~7UL,
false)))
- memcpy(m_cols.bitmap, cols->bitmap, no_bytes_in_map(cols));
+ {
+ /* Cols can be zero if this is a dummy binrows event */
+ if (likely(cols != NULL))
+ memcpy(m_cols.bitmap, cols->bitmap, no_bytes_in_map(cols));
+ }
else
m_cols.bitmap= 0; // to not free it
}
@@ -5096,6 +5103,7 @@ Rows_log_event::Rows_log_event(const char *buf, uint event_len,
const Format_description_log_event
*description_event)
: Log_event(buf, description_event),
+ m_row_count(0),
m_rows_buf(0), m_rows_cur(0), m_rows_end(0)
{
DBUG_ENTER("Rows_log_event::Rows_log_event(const char*,...)");
@@ -5121,8 +5129,6 @@ Rows_log_event::Rows_log_event(const char *buf, uint event_len,
post_start+= RW_FLAGS_OFFSET;
}
- DBUG_ASSERT(m_table_id != ULONG_MAX);
-
m_flags= uint2korr(post_start);
byte const *const var_start= (const byte *)buf + common_header_len +
@@ -5178,7 +5184,7 @@ int Rows_log_event::do_add_row_data(byte *const row_data,
DBUG_DUMP("row_data", (const char*)row_data, min(length, 32));
DBUG_ASSERT(m_rows_buf <= m_rows_cur);
- DBUG_ASSERT(m_rows_buf < m_rows_end);
+ DBUG_ASSERT(!m_rows_buf || m_rows_end && m_rows_buf < m_rows_end);
DBUG_ASSERT(m_rows_cur <= m_rows_end);
/* The cast will always work since m_rows_cur <= m_rows_end */
@@ -5186,12 +5192,12 @@ int Rows_log_event::do_add_row_data(byte *const row_data,
{
my_size_t const block_size= 1024;
my_ptrdiff_t const old_alloc= m_rows_end - m_rows_buf;
- my_ptrdiff_t const new_alloc=
- old_alloc + block_size * (length / block_size + block_size - 1);
my_ptrdiff_t const cur_size= m_rows_cur - m_rows_buf;
+ my_ptrdiff_t const new_alloc=
+ block_size * ((cur_size + length) / block_size + block_size - 1);
- byte* const new_buf=
- (byte*)my_realloc((gptr)m_rows_buf, new_alloc, MYF(MY_WME));
+ byte* const new_buf= (byte*)my_realloc((gptr)m_rows_buf, new_alloc,
+ MYF(MY_ALLOW_ZERO_PTR|MY_WME));
if (unlikely(!new_buf))
DBUG_RETURN(HA_ERR_OUT_OF_MEM);
@@ -5212,6 +5218,7 @@ int Rows_log_event::do_add_row_data(byte *const row_data,
DBUG_ASSERT(m_rows_cur + length < m_rows_end);
memcpy(m_rows_cur, row_data, length);
m_rows_cur+= length;
+ m_row_count++;
DBUG_RETURN(0);
}
#endif
@@ -5255,10 +5262,28 @@ static char const *unpack_row(TABLE *table,
int Rows_log_event::exec_event(st_relay_log_info *rli)
{
DBUG_ENTER("Rows_log_event::exec_event(st_relay_log_info*)");
- DBUG_ASSERT(m_table_id != ULONG_MAX);
int error= 0;
char const *row_start= (char const *)m_rows_buf;
- TABLE* table= rli->m_table_map.get_table(m_table_id);
+
+ /*
+ If m_table_id == ULONG_MAX, then we have a dummy event that does
+ not contain any data. In that case, we just remove all tables in
+ the tables_to_lock list, step the relay log position, and return
+ with success.
+ */
+ if (m_table_id == ULONG_MAX)
+ {
+ /*
+ This one is supposed to be set: just an extra check so that
+ nothing strange has happened.
+ */
+ DBUG_ASSERT(get_flags(STMT_END_F));
+
+ rli->clear_tables_to_lock();
+ thd->clear_error();
+ rli->inc_event_relay_log_pos();
+ DBUG_RETURN(0);
+ }
/*
'thd' has been set by exec_relay_log_event(), just before calling
@@ -5267,85 +5292,51 @@ int Rows_log_event::exec_event(st_relay_log_info *rli)
DBUG_ASSERT(rli->sql_thd == thd);
/*
- lock_tables() reads the contents of thd->lex, so they must be
- initialized, so we should call lex_start(); to be even safer, we call
- mysql_init_query() which does a more complete set of inits.
+ If there is no locks taken, this is the first binrow event seen
+ after the table map events. We should then lock all the tables
+ used in the transaction and proceed with execution of the actual
+ event.
*/
- mysql_init_query(thd, NULL, 0);
-
- if (table)
+ if (!thd->lock)
{
+ bool need_reopen= 1; /* To execute the first lap of the loop below */
+
/*
- table == NULL means that this table should not be
- replicated (this was set up by Table_map_log_event::exec_event() which
- tested replicate-* rules).
+ lock_tables() reads the contents of thd->lex, so they must be
+ initialized, so we should call lex_start(); to be even safer, we
+ call mysql_init_query() which does a more complete set of inits.
*/
- TABLE_LIST table_list;
- bool need_reopen;
- uint count= 1;
- bzero(&table_list, sizeof(table_list));
- table_list.lock_type= TL_WRITE;
- table_list.next_global= table_list.next_local= 0;
- table_list.table= table;
-
- for ( ; ; )
- {
- table_list.db= const_cast<char*>(table->s->db.str);
- table_list.alias= table_list.table_name=
- const_cast<char*>(table->s->table_name.str);
+ mysql_init_query(thd, NULL, 0);
- if ((error= lock_tables(thd, &table_list, count, &need_reopen)) == 0)
- break;
+ while ((error= lock_tables(thd, rli->tables_to_lock,
+ rli->tables_to_lock_count, &need_reopen)))
+ {
if (!need_reopen)
{
slave_print_msg(ERROR_LEVEL, rli, error,
- "Error in %s event: error during table %s.%s lock",
- get_type_str(), table->s->db.str,
- table->s->table_name.str);
+ "Error in %s event: when locking tables",
+ get_type_str());
DBUG_RETURN(error);
}
+
/*
- we need to store a local copy of the table names since the table object
- will become invalid after close_tables_for_reopen
- */
- char *db= my_strdup(table->s->db.str, MYF(MY_WME));
- char *table_name= my_strdup(table->s->table_name.str, MYF(MY_WME));
+ So we need to reopen the tables.
- if (db == 0 || table_name == 0)
- {
- /*
- Since the lock_tables() failed, the table is not locked, so
- we don't need to unlock them.
- */
- DBUG_RETURN(HA_ERR_OUT_OF_MEM);
- }
+ We need to flush the pending RBR event, since it keeps a
+ pointer to an open table.
- /*
- We also needs to flush the pending RBR event, since it keeps a
- pointer to an open table.
+ ALTERNATIVE SOLUTION (not implemented): Extract a pointer to
+ the pending RBR event and reset the table pointer after the
+ tables has been reopened.
- ALTERNATIVE SOLUTION: Extract a pointer to the pending RBR
- event and reset the table pointer after the tables has been
- reopened.
+ NOTE: For this new scheme there should be no pending event:
+ need to add code to assert that is the case.
*/
thd->binlog_flush_pending_rows_event(false);
+ close_tables_for_reopen(thd, rli->tables_to_lock);
- close_tables_for_reopen(thd, &table_list);
-
- /* open the table again, same as in Table_map_event::exec_event */
- table_list.db= const_cast<char*>(db);
- table_list.alias= table_list.table_name= const_cast<char*>(table_name);
- table_list.updating= 1;
- TABLE_LIST *tables= &table_list;
- if ((error= open_tables(thd, &tables, &count, 0)) == 0)
- {
- /* reset some variables for the table list*/
- table_list.updating= 0;
- /* retrieve the new table reference and update the table map */
- table= table_list.table;
- error= rli->m_table_map.set_table(m_table_id, table);
- }
- else /* error in open_tables */
+ if ((error= open_tables(thd, &rli->tables_to_lock,
+ &rli->tables_to_lock_count, 0)))
{
if (thd->query_error || thd->is_fatal_error)
{
@@ -5355,19 +5346,41 @@ int Rows_log_event::exec_event(st_relay_log_info *rli)
*/
uint actual_error= thd->net.last_errno;
slave_print_msg(ERROR_LEVEL, rli, actual_error,
- "Error '%s' on reopening table `%s`.`%s`",
+ "Error '%s' on reopening tables",
(actual_error ? thd->net.last_error :
- "unexpected success or fatal error"),
- db, table_name);
+ "unexpected success or fatal error"));
thd->query_error= 1;
}
- }
- my_free((char*) db, MYF(MY_ALLOW_ZERO_PTR));
- my_free((char*) table_name, MYF(MY_ALLOW_ZERO_PTR));
-
- if (error)
DBUG_RETURN(error);
+ }
}
+ /*
+ When the open and locking succeeded, we add all the tables to
+ the table map and remove them from tables to lock.
+ */
+
+ TABLE_LIST *ptr= rli->tables_to_lock;
+ while (ptr)
+ {
+ rli->m_table_map.set_table(ptr->table_id, ptr->table);
+ rli->touching_table(ptr->db, ptr->table_name, ptr->table_id);
+ char *to_free= reinterpret_cast<char*>(ptr);
+ ptr= ptr->next_global;
+ my_free(to_free, MYF(MY_WME));
+ }
+ rli->tables_to_lock= 0;
+ rli->tables_to_lock_count= 0;
+ }
+
+ TABLE* table= rli->m_table_map.get_table(m_table_id);
+
+ if (table)
+ {
+ /*
+ table == NULL means that this table should not be replicated
+ (this was set up by Table_map_log_event::exec_event() which
+ tested replicate-* rules).
+ */
/*
It's not needed to set_time() but
@@ -5520,52 +5533,25 @@ int Rows_log_event::exec_event(st_relay_log_info *rli)
DBUG_RETURN(error);
}
- if (table)
+ if (table && (table->s->primary_key == MAX_KEY) && !cache_stmt)
{
/*
- As "table" is not NULL, we did a successful lock_tables(), without any
- prior LOCK TABLES and are not in prelocked mode, so this assertion should
- be true.
- */
- DBUG_ASSERT(thd->lock);
- /*
- If we are here, there are more events to come which may use our mappings
- and our table. So don't clear mappings or close tables, just unlock
- tables.
- Why don't we lock the table once for all in
- Table_map_log_event::exec_event() ? Because we could have in binlog:
- BEGIN;
- Table_map t1 -> 1
- Write_rows to id 1
- Table_map t2 -> 2
- Write_rows to id 2
- Xid_log_event
- So we cannot lock t1 when executing the first Table_map, because at that
- moment we don't know we'll also have to lock t2, and all tables must be
- locked at once in MySQL.
+ ------------ Temporary fix until WL#2975 is implemented ---------
+
+ This event is not the last one (no STMT_END_F). If we stop now
+ (in case of terminate_slave_thread()), how will we restart? We
+ have to restart from Table_map_log_event, but as this table is
+ not transactional, the rows already inserted will still be
+ present, and idempotency is not guaranteed (no PK) so we risk
+ that repeating leads to double insert. So we desperately try to
+ continue, hope we'll eventually leave this buggy situation (by
+ executing the final Rows_log_event). If we are in a hopeless
+ wait (reached end of last relay log and nothing gets appended
+ there), we timeout after one minute, and notify DBA about the
+ problem. When WL#2975 is implemented, just remove the member
+ st_relay_log_info::unsafe_to_stop_at and all its occurences.
*/
- mysql_unlock_tables(thd, thd->lock);
- thd->lock= 0;
- if ((table->s->primary_key == MAX_KEY) &&
- !cache_stmt)
- {
- /*
- ------------ Temporary fix until WL#2975 is implemented ---------
- This event is not the last one (no STMT_END_F). If we stop now (in
- case of terminate_slave_thread()), how will we restart? We have to
- restart from Table_map_log_event, but as this table is not
- transactional, the rows already inserted will still be present, and
- idempotency is not guaranteed (no PK) so we risk that repeating leads
- to double insert. So we desperately try to continue, hope we'll
- eventually leave this buggy situation (by executing the final
- Rows_log_event). If we are in a hopeless wait (reached end of last
- relay log and nothing gets appended there), we timeout after one
- minute, and notify DBA about the problem.
- When WL#2975 is implemented, just remove the member
- st_relay_log_info::unsafe_to_stop_at and all its occurences.
- */
- rli->unsafe_to_stop_at= time(0);
- }
+ rli->unsafe_to_stop_at= time(0);
}
DBUG_ASSERT(error == 0);
@@ -5579,7 +5565,6 @@ int Rows_log_event::exec_event(st_relay_log_info *rli)
#ifndef MYSQL_CLIENT
bool Rows_log_event::write_data_header(IO_CACHE *file)
{
- DBUG_ASSERT(m_table_id != ULONG_MAX);
byte buf[ROWS_HEADER_LEN]; // No need to init the buffer
DBUG_EXECUTE_IF("old_row_based_repl_4_byte_map_id_master",
{
@@ -5611,16 +5596,17 @@ bool Rows_log_event::write_data_body(IO_CACHE*file)
}
#endif
-#if defined(HAVE_REPLICATION) && !defined(MYSQL_CLIENT) && defined(DBUG_RBR)
+#if defined(HAVE_REPLICATION) && !defined(MYSQL_CLIENT)
void Rows_log_event::pack_info(Protocol *protocol)
{
- char buf[256];
- char const *const flagstr= get_flags(STMT_END_F) ? "STMT_END_F" : "";
- char const *const dbnam= m_table->s->db.str;
- char const *const tblnam= m_table->s->table_name.str;
- my_size_t bytes= snprintf(buf, sizeof(buf),
- "%s.%s - %s", dbnam, tblnam, flagstr);
- protocol->store(buf, bytes, &my_charset_bin);
+#ifdef DBUG_RBR
+ char buf[256];
+ char const *const flagstr=
+ get_flags(STMT_END_F) ? " flags: STMT_END_F" : "";
+ my_size_t bytes= snprintf(buf, sizeof(buf),
+ "table_id: %lu%s", m_table_id, flagstr);
+ protocol->store(buf, bytes, &my_charset_bin);
+#endif
}
#endif
@@ -5763,59 +5749,6 @@ Table_map_log_event::~Table_map_log_event()
}
/*
- Find a table based on database name and table name.
-
- DESCRIPTION
-
- Currently, only the first table of the 'table_list' is located. If the
- table is found in the list of open tables for the thread, the 'table'
- field of 'table_list' is filled in.
-
- PARAMETERS
-
- thd Thread structure
- table_list List of tables to locate in the thd->open_tables list.
- count Pointer to a variable that will be set to the number of
- tables found. If the pointer is NULL, nothing will be stored.
-
- RETURN VALUE
-
- The number of tables found.
-
- TO DO
-
- Replace the list of table searches with a hash based on the combined
- database and table name. The handler_tables_hash is inappropriate since
- it hashes on the table alias. At the same time, the function can be
- extended to handle a full list of table names, in the same spirit as
- open_tables() and lock_tables().
-*/
-#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION)
-static uint find_tables(THD *thd, TABLE_LIST *table_list, uint *count)
-{
- uint result= 0;
-
- /* we verify that the caller knows our limitation */
- DBUG_ASSERT(table_list->next_global == 0);
- for (TABLE *table= thd->open_tables; table ; table= table->next)
- {
- if (strcmp(table->s->db.str, table_list->db) == 0
- && strcmp(table->s->table_name.str, table_list->table_name) == 0)
- {
- /* Copy the table pointer into the table list. */
- table_list->table= table;
- result= 1;
- break;
- }
- }
-
- if (count)
- *count= result;
- return result;
-}
-#endif
-
-/*
Return value is an error code, one of:
-1 Failure to open table [from open_tables()]
@@ -5838,20 +5771,37 @@ int Table_map_log_event::exec_event(st_relay_log_info *rli)
thd->query_id= next_query_id();
pthread_mutex_unlock(&LOCK_thread_count);
- TABLE_LIST table_list;
+ TABLE_LIST *table_list;
+ char *db_mem, *tname_mem;
+ void *const memory=
+ my_multi_malloc(MYF(MY_WME),
+ &table_list, sizeof(TABLE_LIST),
+ &db_mem, NAME_LEN + 1,
+ &tname_mem, NAME_LEN + 1,
+ NULL);
+
+ /*
+ If memory is allocated, it the pointer to it should be stored in
+ table_list. If this is not true, the memory will not be correctly
+ free:ed later.
+ */
+ DBUG_ASSERT(memory == NULL || memory == table_list);
+
uint32 dummy_len;
- bzero(&table_list, sizeof(table_list));
- table_list.db= const_cast<char *>
- (rpl_filter->get_rewrite_db(m_dbnam, &dummy_len));
- table_list.alias= table_list.table_name= const_cast<char*>(m_tblnam);
- table_list.lock_type= TL_WRITE;
- table_list.next_global= table_list.next_local= 0;
- table_list.updating= 1;
+ bzero(table_list, sizeof(*table_list));
+ table_list->db = db_mem;
+ table_list->alias= table_list->table_name = tname_mem;
+ table_list->lock_type= TL_WRITE;
+ table_list->next_global= table_list->next_local= 0;
+ table_list->table_id= m_table_id;
+ table_list->updating= 1;
+ strmov(table_list->db, rpl_filter->get_rewrite_db(m_dbnam, &dummy_len));
+ strmov(table_list->table_name, m_tblnam);
int error= 0;
- if (rpl_filter->db_ok(table_list.db) &&
- (!rpl_filter->is_on() || rpl_filter->tables_ok("", &table_list)))
+ if (rpl_filter->db_ok(table_list->db) &&
+ (!rpl_filter->is_on() || rpl_filter->tables_ok("", table_list)))
{
/*
Check if the slave is set to use SBR. If so, the slave should
@@ -5877,36 +5827,32 @@ int Table_map_log_event::exec_event(st_relay_log_info *rli)
be a no-op.
*/
uint count;
- if (find_tables(thd, &table_list, &count) == 0)
+ /*
+ open_tables() reads the contents of thd->lex, so they must be
+ initialized, so we should call lex_start(); to be even safer, we
+ call mysql_init_query() which does a more complete set of inits.
+ */
+ mysql_init_query(thd, NULL, 0);
+ if ((error= open_tables(thd, &table_list, &count, 0)))
{
- /*
- open_tables() reads the contents of thd->lex, so they must be
- initialized, so we should call lex_start(); to be even safer, we call
- mysql_init_query() which does a more complete set of inits.
- */
- mysql_init_query(thd, NULL, 0);
- TABLE_LIST *tables= &table_list;
- if ((error= open_tables(thd, &tables, &count, 0)))
+ if (thd->query_error || thd->is_fatal_error)
{
- if (thd->query_error || thd->is_fatal_error)
- {
- /*
- Error reporting borrowed from Query_log_event with many excessive
- simplifications (we don't honour --slave-skip-errors)
- */
- uint actual_error= thd->net.last_errno;
- slave_print_msg(ERROR_LEVEL, rli, actual_error,
- "Error '%s' on opening table `%s`.`%s`",
- (actual_error ? thd->net.last_error :
- "unexpected success or fatal error"),
- table_list.db, table_list.table_name);
- thd->query_error= 1;
- }
- DBUG_RETURN(error);
+ /*
+ Error reporting borrowed from Query_log_event with many excessive
+ simplifications (we don't honour --slave-skip-errors)
+ */
+ uint actual_error= thd->net.last_errno;
+ slave_print_msg(ERROR_LEVEL, rli, actual_error,
+ "Error '%s' on opening table `%s`.`%s`",
+ (actual_error ? thd->net.last_error :
+ "unexpected success or fatal error"),
+ table_list->db, table_list->table_name);
+ thd->query_error= 1;
}
+ DBUG_RETURN(error);
}
- m_table= table_list.table;
+ m_table= table_list->table;
/*
This will fail later otherwise, the 'in_use' field should be
@@ -5984,19 +5930,16 @@ int Table_map_log_event::exec_event(st_relay_log_info *rli)
}
/*
- We record in the slave's information that the number m_table_id is
- mapped to the m_table object
- */
- if (!error)
- error= rli->m_table_map.set_table(m_table_id, m_table);
-
- /*
- Tell the RLI that we are touching a table.
-
- TODO: Maybe we can combine this with the previous operation?
+ We record in the slave's information that the table should be
+ locked by linking the table into the list of tables to lock, and
+ tell the RLI that we are touching a table.
*/
if (!error)
- rli->touching_table(m_dbnam, m_tblnam, m_table_id);
+ {
+ table_list->next_global= table_list->next_local= rli->tables_to_lock;
+ rli->tables_to_lock= table_list;
+ rli->tables_to_lock_count++;
+ }
}
/*
@@ -6063,12 +6006,23 @@ bool Table_map_log_event::write_data_body(IO_CACHE *file)
field.
*/
+#if defined(HAVE_REPLICATION) && !defined(MYSQL_CLIENT)
void Table_map_log_event::pack_info(Protocol *protocol)
{
+#ifdef DBUG_RBR
+ char buf[256];
+ my_size_t bytes= snprintf(buf, sizeof(buf),
+ "table_id: %lu (%s.%s)",
+ m_table_id, m_dbnam, m_tblnam);
+ protocol->store(buf, bytes, &my_charset_bin);
+#else
char buf[256];
- my_size_t bytes= my_snprintf(buf, sizeof(buf), "%s.%s", m_dbnam, m_tblnam);
+ my_size_t bytes= snprintf(buf, sizeof(buf), "%s.%s", m_dbnam, m_tblnam);
protocol->store(buf, bytes, &my_charset_bin);
+#endif
}
+#endif
+
#endif
diff --git a/sql/log_event.h b/sql/log_event.h
index a7c532d4c24..7a0c24ea626 100644
--- a/sql/log_event.h
+++ b/sql/log_event.h
@@ -657,11 +657,16 @@ public:
{
return (void*) my_malloc((uint)size, MYF(MY_WME|MY_FAE));
}
+
static void operator delete(void *ptr, size_t size)
{
my_free((gptr) ptr, MYF(MY_WME|MY_ALLOW_ZERO_PTR));
}
+ /* Placement version of the above operators */
+ static void *operator new(size_t, void* ptr) { return ptr; }
+ static void operator delete(void*, void*) { }
+
#ifndef MYSQL_CLIENT
bool write_header(IO_CACHE* file, ulong data_length);
virtual bool write(IO_CACHE* file)
@@ -1795,10 +1800,8 @@ public:
#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION)
virtual int exec_event(struct st_relay_log_info *rli);
-#ifdef DBUG_RBR
virtual void pack_info(Protocol *protocol);
#endif
-#endif
#ifdef MYSQL_CLIENT
/* not for direct call, each derived has its own ::print() */
@@ -1850,6 +1853,7 @@ public:
Error code, or zero if write succeeded.
*/
#if !defined(MYSQL_CLIENT) && defined(HAVE_ROW_BASED_REPLICATION)
+#if 0
int maybe_write_table_map(THD *thd, IO_CACHE *file, MYSQL_LOG *log) const
{
/*
@@ -1864,6 +1868,9 @@ public:
return result;
}
#endif
+#endif
+
+ uint m_row_count; /* The number of rows added to the event */
protected:
/*
diff --git a/sql/mysql_priv.h b/sql/mysql_priv.h
index fc777c47818..70fca42da73 100644
--- a/sql/mysql_priv.h
+++ b/sql/mysql_priv.h
@@ -758,7 +758,8 @@ TABLE *create_table_from_items(THD *thd, HA_CREATE_INFO *create_info,
List<create_field> *extra_fields,
List<Key> *keys,
List<Item> *items,
- MYSQL_LOCK **lock);
+ MYSQL_LOCK **lock,
+ TABLEOP_HOOKS *hooks);
bool mysql_alter_table(THD *thd, char *new_db, char *new_name,
HA_CREATE_INFO *create_info,
TABLE_LIST *table_list,
diff --git a/sql/opt_range.cc b/sql/opt_range.cc
index ef6b3d941a1..c514326de8d 100644
--- a/sql/opt_range.cc
+++ b/sql/opt_range.cc
@@ -878,7 +878,7 @@ QUICK_RANGE_SELECT::~QUICK_RANGE_SELECT()
DBUG_PRINT("info", ("Freeing separate handler %p (free=%d)", file,
free_file));
file->ha_reset();
- file->external_lock(current_thd, F_UNLCK);
+ file->ha_external_lock(current_thd, F_UNLCK);
file->close();
delete file;
}
@@ -1042,14 +1042,14 @@ int QUICK_RANGE_SELECT::init_ror_merged_scan(bool reuse_handler)
/* Caller will free the memory */
goto failure;
}
- if (file->external_lock(thd, F_RDLCK))
+ if (file->ha_external_lock(thd, F_RDLCK))
goto failure;
if (file->extra(HA_EXTRA_KEYREAD) ||
file->ha_retrieve_all_pk() ||
init() || reset())
{
- file->external_lock(thd, F_UNLCK);
+ file->ha_external_lock(thd, F_UNLCK);
file->close();
goto failure;
}
diff --git a/sql/rpl_rli.h b/sql/rpl_rli.h
index 5500fdf1f64..cacae1aa4c2 100644
--- a/sql/rpl_rli.h
+++ b/sql/rpl_rli.h
@@ -276,7 +276,9 @@ typedef struct st_relay_log_info
group_relay_log_pos);
}
- table_mapping m_table_map;
+ TABLE_LIST *tables_to_lock; /* RBR: Tables to lock */
+ uint tables_to_lock_count; /* RBR: Count of tables to lock */
+ table_mapping m_table_map; /* RBR: Mapping table-id to table */
/*
Last charset (6 bytes) seen by slave SQL thread is cached here; it helps
@@ -306,6 +308,18 @@ typedef struct st_relay_log_info
void transaction_end(THD*);
void cleanup_context(THD *, bool);
+ void clear_tables_to_lock() {
+ TABLE_LIST *ptr= tables_to_lock;
+ while (ptr)
+ {
+ char *to_free= reinterpret_cast<char*>(ptr);
+ ptr= ptr->next_global;
+ my_free(to_free, MYF(MY_WME));
+ }
+ tables_to_lock= 0;
+ tables_to_lock_count= 0;
+ }
+
time_t unsafe_to_stop_at;
} RELAY_LOG_INFO;
diff --git a/sql/slave.cc b/sql/slave.cc
index e60521af3a0..57fc8534f8f 100644
--- a/sql/slave.cc
+++ b/sql/slave.cc
@@ -1444,6 +1444,8 @@ static int init_relay_log_info(RELAY_LOG_INFO* rli,
rli->abort_pos_wait=0;
rli->log_space_limit= relay_log_space_limit;
rli->log_space_total= 0;
+ rli->tables_to_lock= 0;
+ rli->tables_to_lock_count= 0;
/*
The relay log will now be opened, as a SEQ_READ_APPEND IO_CACHE.
@@ -2307,6 +2309,7 @@ st_relay_log_info::st_relay_log_info()
abort_pos_wait(0), slave_run_id(0), sql_thd(0), last_slave_errno(0),
inited(0), abort_slave(0), slave_running(0), until_condition(UNTIL_NONE),
until_log_pos(0), retried_trans(0), m_reload_flags(RELOAD_NONE_F),
+ tables_to_lock(0), tables_to_lock_count(0),
unsafe_to_stop_at(0)
{
group_relay_log_name[0]= event_relay_log_name[0]=
@@ -4960,6 +4963,7 @@ void st_relay_log_info::cleanup_context(THD *thd, bool error)
}
m_table_map.clear_tables();
close_thread_tables(thd);
+ clear_tables_to_lock();
unsafe_to_stop_at= 0;
}
#endif
diff --git a/sql/sql_acl.cc b/sql/sql_acl.cc
index bebee7df8b2..d32c5ea08d4 100644
--- a/sql/sql_acl.cc
+++ b/sql/sql_acl.cc
@@ -3266,16 +3266,6 @@ bool mysql_routine_grant(THD *thd, TABLE_LIST *table_list, bool is_proc,
}
grant_option=TRUE;
thd->mem_root= old_root;
- /*
- This flush is here only becuase there is code that writes rows to
- system tables after executing a binlog_query().
-
- TODO: Ensure that no writes are executed after a binlog_query() by
- moving the writes to before calling binlog_query(). Then remove
- this line (and add an assert inside send_ok() that checks that
- everything is in a consistent state).
- */
- thd->binlog_flush_pending_rows_event(true);
rw_unlock(&LOCK_grant);
if (!result && !no_error)
send_ok(thd);
diff --git a/sql/sql_base.cc b/sql/sql_base.cc
index 2d6576bff8b..98dff78a09a 100644
--- a/sql/sql_base.cc
+++ b/sql/sql_base.cc
@@ -1033,21 +1033,18 @@ void close_thread_tables(THD *thd, bool lock_in_use, bool skip_derived)
/* Fallthrough */
}
- /*
- For RBR: before calling close_thread_tables(), storage engines
- should autocommit. Hence if there is a a pending event, it belongs
- to a non-transactional engine, which writes directly to the table,
- and should therefore be flushed before unlocking and closing the
- tables. The test above for locked tables will not be triggered
- since RBR locks and unlocks tables on a per-event basis.
-
- TODO (WL#3023): Change the semantics so that RBR does not lock and
- unlock tables on a per-event basis.
- */
- thd->binlog_flush_pending_rows_event(true);
-
if (thd->lock)
{
+ /*
+ For RBR we flush the pending event just before we unlock all the
+ tables. This means that we are at the end of a topmost
+ statement, so we ensure that the STMT_END_F flag is set on the
+ pending event. For statements that are *inside* stored
+ functions, the pending event will not be flushed: that will be
+ handled either before writing a query log event (inside
+ binlog_query()) or when preparing a pending event.
+ */
+ thd->binlog_flush_pending_rows_event(true);
mysql_unlock_tables(thd, thd->lock);
thd->lock=0;
}
@@ -1059,7 +1056,8 @@ void close_thread_tables(THD *thd, bool lock_in_use, bool skip_derived)
saves some work in 2pc too)
see also sql_parse.cc - dispatch_command()
*/
- bzero(&thd->transaction.stmt, sizeof(thd->transaction.stmt));
+ if (!(thd->state_flags & Open_tables_state::BACKUPS_AVAIL))
+ bzero(&thd->transaction.stmt, sizeof(thd->transaction.stmt));
if (!thd->active_transaction())
thd->transaction.xid_state.xid.null();
diff --git a/sql/sql_class.cc b/sql/sql_class.cc
index e68bcb9e281..f89e7ecbfe5 100644
--- a/sql/sql_class.cc
+++ b/sql/sql_class.cc
@@ -162,7 +162,7 @@ bool foreign_key_prefix(Key *a, Key *b)
****************************************************************************/
Open_tables_state::Open_tables_state(ulong version_arg)
- :version(version_arg)
+ :version(version_arg), state_flags(0U)
{
reset_open_tables_state();
}
@@ -197,7 +197,8 @@ THD::THD()
:Statement(CONVENTIONAL_EXECUTION, 0, ALLOC_ROOT_MIN_BLOCK_SIZE, 0),
Open_tables_state(refresh_version), rli_fake(0),
lock_id(&main_lock_id),
- user_time(0), in_sub_stmt(0), global_read_lock(0), is_fatal_error(0),
+ user_time(0), in_sub_stmt(0), binlog_table_maps(0),
+ global_read_lock(0), is_fatal_error(0),
rand_used(0), time_zone_used(0),
last_insert_id_used(0), insert_id_used(0), clear_next_insert_id(0),
in_lock_tables(0), bootstrap(0), derived_tables_processing(FALSE),
@@ -1945,6 +1946,7 @@ void THD::reset_n_backup_open_tables_state(Open_tables_state *backup)
DBUG_ENTER("reset_n_backup_open_tables_state");
backup->set_open_tables_state(this);
reset_open_tables_state();
+ state_flags|= Open_tables_state::BACKUPS_AVAIL;
DBUG_VOID_RETURN;
}
@@ -2011,25 +2013,6 @@ void THD::reset_sub_statement_state(Sub_statement_state *backup,
backup->client_capabilities= client_capabilities;
backup->savepoints= transaction.savepoints;
-#ifdef HAVE_ROW_BASED_REPLICATION
- /*
- For row-based replication and before executing a function/trigger,
- the pending rows event has to be flushed. The function/trigger
- might execute statement that require the pending event to be
- flushed. A simple example:
-
- CREATE FUNCTION foo() RETURNS INT
- BEGIN
- SAVEPOINT x;
- RETURN 0;
- END
-
- INSERT INTO t1 VALUES (1), (foo()), (2);
- */
- if (binlog_row_based)
- binlog_flush_pending_rows_event(false);
-#endif /* HAVE_ROW_BASED_REPLICATION */
-
if ((!lex->requires_prelocking() || is_update_query(lex->sql_command)) &&
!binlog_row_based)
options&= ~OPTION_BIN_LOG;
@@ -2209,6 +2192,7 @@ THD::binlog_prepare_pending_rows_event(TABLE* table, uint32 serv_id,
bool is_transactional,
RowsEventT *hint __attribute__((unused)))
{
+ DBUG_ENTER("binlog_prepare_pending_rows_event");
/* Pre-conditions */
DBUG_ASSERT(table->s->table_map_id != ULONG_MAX);
@@ -2220,12 +2204,12 @@ THD::binlog_prepare_pending_rows_event(TABLE* table, uint32 serv_id,
have to do it here.
*/
if (binlog_setup_trx_data())
- return NULL;
+ DBUG_RETURN(NULL);
Rows_log_event* pending= binlog_get_pending_rows_event();
if (unlikely(pending && !pending->is_valid()))
- return NULL;
+ DBUG_RETURN(NULL);
/*
Check if the current event is non-NULL and a write-rows
@@ -2250,7 +2234,7 @@ THD::binlog_prepare_pending_rows_event(TABLE* table, uint32 serv_id,
ev= new RowsEventT(this, table, table->s->table_map_id, cols,
is_transactional);
if (unlikely(!ev))
- return NULL;
+ DBUG_RETURN(NULL);
ev->server_id= serv_id; // I don't like this, it's too easy to forget.
/*
flush the pending event and replace it with the newly created
@@ -2259,17 +2243,17 @@ THD::binlog_prepare_pending_rows_event(TABLE* table, uint32 serv_id,
if (unlikely(mysql_bin_log.flush_and_set_pending_rows_event(this, ev)))
{
delete ev;
- return NULL;
+ DBUG_RETURN(NULL);
}
- return ev; /* This is the new pending event */
+ DBUG_RETURN(ev); /* This is the new pending event */
}
- return pending; /* This is the current pending event */
+ DBUG_RETURN(pending); /* This is the current pending event */
}
#ifdef HAVE_EXPLICIT_TEMPLATE_INSTANTIATION
/*
- Instansiate the versions we need, we have -fno-implicit-template as
+ Instantiate the versions we need, we have -fno-implicit-template as
compiling option.
*/
template Rows_log_event*
@@ -2534,14 +2518,33 @@ int THD::binlog_flush_pending_rows_event(bool stmt_end)
{
pending->set_flags(Rows_log_event::STMT_END_F);
pending->flags|= LOG_EVENT_UPDATE_TABLE_MAP_VERSION_F;
+ binlog_table_maps= 0;
}
+ error= mysql_bin_log.flush_and_set_pending_rows_event(this, 0);
+ }
+ else if (stmt_end && binlog_table_maps > 0)
+ { /* there is no pending event at this point */
/*
- We only bother to set the pending event if it is non-NULL. This
- is essential for correctness, since there is not necessarily a
- trx_data created for the thread if the pending event is NULL.
+ If pending is null and we are going to end the statement, we
+ have to write an extra, empty, binrow event so that the slave
+ knows to discard the tables it has received. Otherwise, the
+ table maps written this far will be included in the table maps
+ for the following statement.
+
+ See if we can replace this with a dummy, maybe constant, event.
*/
+#if 0
+ static unsigned char memory[sizeof(Write_rows_log_event)];
+ void *const ptr= &memory;
+#endif
+ Rows_log_event *ev=
+ new Write_rows_log_event(this, 0, ULONG_MAX, 0, FALSE);
+ ev->set_flags(Rows_log_event::STMT_END_F);
+ binlog_set_pending_rows_event(ev);
+
error= mysql_bin_log.flush_and_set_pending_rows_event(this, 0);
+ binlog_table_maps= 0;
}
DBUG_RETURN(error);
@@ -2568,6 +2571,17 @@ void THD::binlog_delete_pending_rows_event()
functions have been issued, but before tables are unlocked and
closed.
+ OBSERVE
+ There shall be no writes to any system table after calling
+ binlog_query(), so these writes has to be moved to before the call
+ of binlog_query() for correct functioning.
+
+ This is necessesary not only for RBR, but the master might crash
+ after binlogging the query but before changing the system tables.
+ This means that the slave and the master are not in the same state
+ (after the master has restarted), so therefore we have to
+ eliminate this problem.
+
RETURN VALUE
Error code, or 0 if no error.
*/
@@ -2577,7 +2591,7 @@ int THD::binlog_query(THD::enum_binlog_query_type qtype,
{
DBUG_ENTER("THD::binlog_query");
DBUG_ASSERT(query && mysql_bin_log.is_open());
- int error= binlog_flush_pending_rows_event(true);
+
switch (qtype)
{
case THD::MYSQL_QUERY_TYPE:
@@ -2591,19 +2605,41 @@ int THD::binlog_query(THD::enum_binlog_query_type qtype,
*/
case THD::ROW_QUERY_TYPE:
if (binlog_row_based)
- DBUG_RETURN(binlog_flush_pending_rows_event(true));
+ {
+ /*
+ If thd->lock is set, then we are not inside a stored function.
+ In that case, mysql_unlock_tables() will be called after this
+ binlog_query(), so we have to flush the pending rows event
+ with the STMT_END_F set to unlock all tables at the slave side
+ as well.
+
+ We will not flush the pending event, if thd->lock is NULL.
+ This means that we are inside a stored function or trigger, so
+ the flushing will be done inside the top-most
+ close_thread_tables().
+ */
+ if (this->lock)
+ DBUG_RETURN(binlog_flush_pending_rows_event(TRUE));
+ DBUG_RETURN(0);
+ }
/* Otherwise, we fall through */
case THD::STMT_QUERY_TYPE:
/*
- Most callers of binlog_query() ignore the error code, assuming
- that the statement will always be written to the binlog. In
- case of error above, we therefore just continue and write the
- statement to the binary log.
+ The MYSQL_LOG::write() function will set the STMT_END_F flag and
+ flush the pending rows event if necessary.
*/
{
Query_log_event qinfo(this, query, query_len, is_trans, suppress_use);
qinfo.flags|= LOG_EVENT_UPDATE_TABLE_MAP_VERSION_F;
- DBUG_RETURN(mysql_bin_log.write(&qinfo));
+ /*
+ Binlog table maps will be irrelevant after a Query_log_event
+ (they are just removed on the slave side) so after the query
+ log event is written to the binary log, we pretend that no
+ table maps were written.
+ */
+ int error= mysql_bin_log.write(&qinfo);
+ binlog_table_maps= 0;
+ DBUG_RETURN(error);
}
break;
diff --git a/sql/sql_class.h b/sql/sql_class.h
index 00440449be8..c6ef684498a 100644
--- a/sql/sql_class.h
+++ b/sql/sql_class.h
@@ -738,11 +738,20 @@ public:
ulong version;
uint current_tablenr;
+ enum enum_flags {
+ BACKUPS_AVAIL = (1U << 0) /* There are backups available */
+ };
+
+ /*
+ Flags with information about the open tables state.
+ */
+ uint state_flags;
+
/*
This constructor serves for creation of Open_tables_state instances
which are used as backup storage.
*/
- Open_tables_state() {};
+ Open_tables_state() : state_flags(0U) { }
Open_tables_state(ulong version_arg);
@@ -756,6 +765,7 @@ public:
open_tables= temporary_tables= handler_tables= derived_tables= 0;
lock= locked_tables= 0;
prelocked_mode= NON_PRELOCKED;
+ state_flags= 0U;
}
};
@@ -906,8 +916,9 @@ public:
#ifndef MYSQL_CLIENT
/*
- Public interface to write rows to the binlog
+ Public interface to write RBR events to the binlog
*/
+ int binlog_write_table_map(TABLE *table, bool is_transactional);
int binlog_write_row(TABLE* table, bool is_transactional,
MY_BITMAP const* cols, my_size_t colcnt,
const byte *buf);
@@ -951,6 +962,11 @@ public:
int binlog_flush_pending_rows_event(bool stmt_end);
void binlog_delete_pending_rows_event();
+private:
+ uint binlog_table_maps; // Number of table maps currently in the binlog
+
+public:
+
#endif
#endif /* HAVE_ROW_BASED_REPLICATION */
#ifndef MYSQL_CLIENT
@@ -1555,7 +1571,6 @@ class select_create: public select_insert {
HA_CREATE_INFO *create_info;
MYSQL_LOCK *lock;
Field **field;
- bool create_table_written;
public:
select_create (TABLE_LIST *table,
HA_CREATE_INFO *create_info_par,
@@ -1564,11 +1579,11 @@ public:
List<Item> &select_fields,enum_duplicates duplic, bool ignore)
:select_insert (NULL, NULL, &select_fields, 0, 0, duplic, ignore), create_table(table),
extra_fields(&fields_par),keys(&keys_par), create_info(create_info_par),
- lock(0), create_table_written(FALSE)
+ lock(0)
{}
int prepare(List<Item> &list, SELECT_LEX_UNIT *u);
- void binlog_show_create_table();
+ void binlog_show_create_table(TABLE **tables, uint count);
void store_values(List<Item> &values);
void send_error(uint errcode,const char *err);
bool send_eof();
diff --git a/sql/sql_insert.cc b/sql/sql_insert.cc
index 6655491ca57..aa36551d0e5 100644
--- a/sql/sql_insert.cc
+++ b/sql/sql_insert.cc
@@ -1711,7 +1711,7 @@ pthread_handler_t handle_delayed_insert(void *arg)
{
thd->fatal_error();
strmov(thd->net.last_error,ER(thd->net.last_errno=ER_OUT_OF_RESOURCES));
- goto end;
+ goto err;
}
#if !defined(__WIN__) && !defined(OS2) && !defined(__NETWARE__)
sigset_t set;
@@ -1724,13 +1724,13 @@ pthread_handler_t handle_delayed_insert(void *arg)
if (!(di->table=open_ltable(thd,&di->table_list,TL_WRITE_DELAYED)))
{
thd->fatal_error(); // Abort waiting inserts
- goto end;
+ goto err;
}
if (!(di->table->file->table_flags() & HA_CAN_INSERT_DELAYED))
{
thd->fatal_error();
my_error(ER_ILLEGAL_HA, MYF(0), di->table_list.table_name);
- goto end;
+ goto err;
}
di->table->copy_blobs=1;
@@ -1859,6 +1859,16 @@ pthread_handler_t handle_delayed_insert(void *arg)
pthread_cond_broadcast(&di->cond_client); // If waiting clients
}
+err:
+ /*
+ mysql_lock_tables() can potentially start a transaction and write
+ a table map. In the event of an error, that transaction has to be
+ rolled back. We only need to roll back a potential statement
+ transaction, since real transactions are rolled back in
+ close_thread_tables().
+ */
+ ha_rollback_stmt(thd);
+
end:
/*
di should be unlinked from the thread handler list and have no active
@@ -2493,9 +2503,25 @@ select_create::prepare(List<Item> &values, SELECT_LEX_UNIT *u)
{
DBUG_ENTER("select_create::prepare");
+ class MY_HOOKS : public TABLEOP_HOOKS {
+ public:
+ MY_HOOKS(select_create *x) : ptr(x) { }
+ virtual void do_prelock(TABLE **tables, uint count)
+ {
+ if (binlog_row_based)
+ ptr->binlog_show_create_table(tables, count);
+ }
+
+ private:
+ select_create *ptr;
+ };
+
+ MY_HOOKS hooks(this);
+
unit= u;
table= create_table_from_items(thd, create_info, create_table,
- extra_fields, keys, &values, &lock);
+ extra_fields, keys, &values, &lock,
+ &hooks);
if (!table)
DBUG_RETURN(-1); // abort() deletes table
@@ -2533,7 +2559,7 @@ select_create::prepare(List<Item> &values, SELECT_LEX_UNIT *u)
void
-select_create::binlog_show_create_table()
+select_create::binlog_show_create_table(TABLE **tables, uint count)
{
/*
Note 1: In RBR mode, we generate a CREATE TABLE statement for the
@@ -2556,18 +2582,19 @@ select_create::binlog_show_create_table()
on rollback, we clear the OPTION_STATUS_NO_TRANS_UPDATE bit of
thd->options.
*/
- DBUG_ASSERT(binlog_row_based && !create_table_written);
+ DBUG_ASSERT(binlog_row_based);
+ DBUG_ASSERT(tables && *tables && count > 0);
thd->options&= ~OPTION_STATUS_NO_TRANS_UPDATE;
char buf[2048];
String query(buf, sizeof(buf), system_charset_info);
query.length(0); // Have to zero it since constructor doesn't
- TABLE_LIST tables;
- memset(&tables, 0, sizeof(tables));
- tables.table = table;
+ TABLE_LIST table_list;
+ memset(&table_list, 0, sizeof(table_list));
+ table_list.table = *tables;
- int result= store_create_info(thd, &tables, &query, create_info);
+ int result= store_create_info(thd, &table_list, &query, create_info);
DBUG_ASSERT(result == 0); /* store_create_info() always return 0 */
thd->binlog_query(THD::STMT_QUERY_TYPE,
query.ptr(), query.length(),
@@ -2578,16 +2605,6 @@ select_create::binlog_show_create_table()
void select_create::store_values(List<Item> &values)
{
- /*
- Before writing the first row, we write the CREATE TABLE statement
- to the binlog.
- */
- if (binlog_row_based && !create_table_written)
- {
- binlog_show_create_table();
- create_table_written= TRUE;
- }
-
fill_record_n_invoke_before_triggers(thd, field, values, 1,
table->triggers, TRG_EVENT_INSERT);
}
@@ -2607,16 +2624,6 @@ void select_create::send_error(uint errcode,const char *err)
bool select_create::send_eof()
{
- /*
- If no rows where written to the binary log, we write the CREATE
- TABLE statement to the binlog.
- */
- if (binlog_row_based && !create_table_written)
- {
- binlog_show_create_table();
- create_table_written= TRUE;
- }
-
bool tmp=select_insert::send_eof();
if (tmp)
abort();
diff --git a/sql/sql_load.cc b/sql/sql_load.cc
index 1d6442ba7d6..6454c7e80bb 100644
--- a/sql/sql_load.cc
+++ b/sql/sql_load.cc
@@ -417,19 +417,6 @@ bool mysql_load(THD *thd,sql_exchange *ex,TABLE_LIST *table_list,
#ifndef EMBEDDED_LIBRARY
if (mysql_bin_log.is_open())
{
-#ifdef HAVE_ROW_BASED_REPLICATION
- /*
- We need to do the job that is normally done inside
- binlog_query() here, which is to ensure that the pending event
- is written before tables are unlocked and before any other
- events are written. We also need to update the table map
- version for the binary log to mark that table maps are invalid
- after this point.
- */
- if (binlog_row_based)
- thd->binlog_flush_pending_rows_event(true);
- else
-#endif
{
/*
Make sure last block (the one which caused the error) gets
diff --git a/sql/sql_table.cc b/sql/sql_table.cc
index 92ade0ff43b..face6425b9a 100644
--- a/sql/sql_table.cc
+++ b/sql/sql_table.cc
@@ -2383,7 +2383,8 @@ TABLE *create_table_from_items(THD *thd, HA_CREATE_INFO *create_info,
List<create_field> *extra_fields,
List<Key> *keys,
List<Item> *items,
- MYSQL_LOCK **lock)
+ MYSQL_LOCK **lock,
+ TABLEOP_HOOKS *hooks)
{
TABLE tmp_table; // Used during 'create_field()'
TABLE_SHARE share;
@@ -2462,6 +2463,7 @@ TABLE *create_table_from_items(THD *thd, HA_CREATE_INFO *create_info,
save us from that ?
*/
table->reginfo.lock_type=TL_WRITE;
+ hooks->prelock(&table, 1); // Call prelock hooks
if (! ((*lock)= mysql_lock_tables(thd, &table, 1,
MYSQL_LOCK_IGNORE_FLUSH, &not_used)))
{
@@ -5105,7 +5107,7 @@ copy_data_between_tables(TABLE *from,TABLE *to,
if (!(copy= new Copy_field[to->s->fields]))
DBUG_RETURN(-1); /* purecov: inspected */
- if (to->file->external_lock(thd, F_WRLCK))
+ if (to->file->ha_external_lock(thd, F_WRLCK))
DBUG_RETURN(-1);
/* We can abort alter table for any table type */
@@ -5245,7 +5247,7 @@ copy_data_between_tables(TABLE *from,TABLE *to,
free_io_cache(from);
*copied= found_count;
*deleted=delete_count;
- if (to->file->external_lock(thd,F_UNLCK))
+ if (to->file->ha_external_lock(thd,F_UNLCK))
error=1;
DBUG_RETURN(error > 0 ? -1 : 0);
}
diff --git a/sql/table.h b/sql/table.h
index d5f09d1c216..c0e0961f467 100644
--- a/sql/table.h
+++ b/sql/table.h
@@ -543,7 +543,8 @@ typedef struct st_table_list
struct st_table_list *next_name_resolution_table;
/* Index names in a "... JOIN ... USE/IGNORE INDEX ..." clause. */
List<String> *use_index, *ignore_index;
- TABLE *table; /* opened table */
+ TABLE *table; /* opened table */
+ uint table_id; /* table id (from binlog) for opened table */
/*
select_result for derived table to pass it from table creation to table
filling procedure