summaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
Diffstat (limited to 'sql')
-rw-r--r--sql/Makefile.am6
-rw-r--r--sql/ha_innodb.cc65
-rw-r--r--sql/ha_innodb.h2
-rw-r--r--sql/ha_partition.cc4
-rw-r--r--sql/handler.cc110
-rw-r--r--sql/handler.h60
-rw-r--r--sql/item_sum.cc2
-rw-r--r--sql/log.cc427
-rw-r--r--sql/log.h332
-rw-r--r--sql/log_event.cc1914
-rw-r--r--sql/log_event.h596
-rw-r--r--sql/mysql_priv.h73
-rw-r--r--sql/mysqld.cc147
-rw-r--r--sql/rpl_filter.h1
-rw-r--r--sql/rpl_rli.h312
-rw-r--r--sql/rpl_tblmap.cc151
-rw-r--r--sql/rpl_tblmap.h109
-rw-r--r--sql/set_var.cc3
-rw-r--r--sql/share/errmsg.txt6
-rw-r--r--sql/slave.cc274
-rw-r--r--sql/slave.h291
-rw-r--r--sql/sp.cc12
-rw-r--r--sql/sp_head.cc89
-rw-r--r--sql/sql_acl.cc56
-rw-r--r--sql/sql_base.cc81
-rw-r--r--sql/sql_binlog.cc135
-rw-r--r--sql/sql_class.cc451
-rw-r--r--sql/sql_class.h414
-rw-r--r--sql/sql_delete.cc50
-rw-r--r--sql/sql_insert.cc228
-rw-r--r--sql/sql_lex.h3
-rw-r--r--sql/sql_list.h22
-rw-r--r--sql/sql_load.cc109
-rw-r--r--sql/sql_parse.cc54
-rw-r--r--sql/sql_plugin.cc4
-rw-r--r--sql/sql_rename.cc4
-rw-r--r--sql/sql_repl.h2
-rw-r--r--sql/sql_select.cc30
-rw-r--r--sql/sql_show.cc77
-rw-r--r--sql/sql_show.h17
-rw-r--r--sql/sql_table.cc180
-rw-r--r--sql/sql_udf.cc4
-rw-r--r--sql/sql_union.cc2
-rw-r--r--sql/sql_update.cc45
-rw-r--r--sql/sql_yacc.yy8
-rw-r--r--sql/table.cc20
-rw-r--r--sql/table.h5
47 files changed, 5849 insertions, 1138 deletions
diff --git a/sql/Makefile.am b/sql/Makefile.am
index a4f761fdc16..a9a0449fbb6 100644
--- a/sql/Makefile.am
+++ b/sql/Makefile.am
@@ -53,7 +53,7 @@ noinst_HEADERS = item.h item_func.h item_sum.h item_cmpfunc.h \
sql_manager.h sql_map.h sql_string.h unireg.h \
sql_error.h field.h handler.h mysqld_suffix.h \
ha_heap.h ha_myisam.h ha_myisammrg.h ha_partition.h \
- opt_range.h protocol.h \
+ opt_range.h protocol.h rpl_tblmap.h \
sql_select.h structs.h table.h sql_udf.h hash_filo.h\
lex.h lex_symbol.h sql_acl.h sql_crypt.h \
log_event.h sql_repl.h slave.h rpl_filter.h \
@@ -87,7 +87,7 @@ mysqld_SOURCES = sql_lex.cc sql_handler.cc sql_partition.cc \
sql_db.cc sql_table.cc sql_rename.cc sql_crypt.cc \
sql_load.cc mf_iocache.cc field_conv.cc sql_show.cc \
sql_udf.cc sql_analyse.cc sql_analyse.h sql_cache.cc \
- slave.cc sql_repl.cc rpl_filter.cc \
+ slave.cc sql_repl.cc rpl_filter.cc rpl_tblmap.cc \
sql_union.cc sql_derived.cc \
client.c sql_client.cc mini_client_errors.c pack.c\
stacktrace.c repl_failsafe.h repl_failsafe.cc \
@@ -96,7 +96,7 @@ mysqld_SOURCES = sql_lex.cc sql_handler.cc sql_partition.cc \
tztime.cc my_time.c my_decimal.cc\
sp_head.cc sp_pcontext.cc sp_rcontext.cc sp.cc \
sp_cache.cc parse_file.cc sql_trigger.cc \
- sql_plugin.cc\
+ sql_plugin.cc sql_binlog.cc \
handlerton.cc
EXTRA_mysqld_SOURCES = ha_innodb.cc ha_berkeley.cc ha_archive.cc \
ha_innodb.h ha_berkeley.h ha_archive.h \
diff --git a/sql/ha_innodb.cc b/sql/ha_innodb.cc
index 8b0cbe87562..d978327f2ce 100644
--- a/sql/ha_innodb.cc
+++ b/sql/ha_innodb.cc
@@ -827,6 +827,7 @@ ha_innobase::ha_innobase(TABLE_SHARE *table_arg)
HA_CAN_INDEX_BLOBS |
HA_CAN_SQL_HANDLER |
HA_NOT_EXACT_COUNT |
+ HA_PRIMARY_KEY_ALLOW_RANDOM_ACCESS |
HA_PRIMARY_KEY_IN_READ_INDEX |
HA_CAN_GEOMETRY |
HA_TABLE_SCAN_ON_INDEX),
@@ -3052,6 +3053,9 @@ ha_innobase::store_key_val_for_row(
continue;
}
+ /* In a column prefix index, we may need to truncate
+ the stored value: */
+
cs = key_part->field->charset();
src_start = record + key_part->offset;
@@ -3068,7 +3072,11 @@ ha_innobase::store_key_val_for_row(
memcpy(buff, src_start, len);
buff+=len;
- /* Pad the unused space with spaces */
+ /* Pad the unused space with spaces. Note that no
+ padding is ever needed for UCS-2 because in MySQL,
+ all UCS2 characters are 2 bytes, as MySQL does not
+ support surrogate pairs, which are needed to represent
+ characters in the range U+10000 to U+10FFFF. */
if (len < key_part->length) {
len = key_part->length - len;
@@ -3791,9 +3799,9 @@ ha_innobase::delete_row(
}
/**************************************************************************
-Removes a new lock set on a row. This can be called after a row has been read
-in the processing of an UPDATE or a DELETE query, if the option
-innodb_locks_unsafe_for_binlog is set. */
+Removes a new lock set on a row, if it was not read optimistically. This can
+be called after a row has been read in the processing of an UPDATE or a DELETE
+query, if the option innodb_locks_unsafe_for_binlog is set. */
void
ha_innobase::unlock_row(void)
@@ -3803,7 +3811,7 @@ ha_innobase::unlock_row(void)
DBUG_ENTER("ha_innobase::unlock_row");
- if (last_query_id != user_thd->query_id) {
+ if (UNIV_UNLIKELY(last_query_id != user_thd->query_id)) {
ut_print_timestamp(stderr);
sql_print_error("last_query_id is %lu != user_thd_query_id is "
"%lu", (ulong) last_query_id,
@@ -3811,9 +3819,45 @@ ha_innobase::unlock_row(void)
mem_analyze_corruption((byte *) prebuilt->trx);
ut_error;
}
-
- if (srv_locks_unsafe_for_binlog) {
+
+ switch (prebuilt->row_read_type) {
+ case ROW_READ_WITH_LOCKS:
+ if (!srv_locks_unsafe_for_binlog) {
+ break;
+ }
+ /* fall through */
+ case ROW_READ_TRY_SEMI_CONSISTENT:
row_unlock_for_mysql(prebuilt, FALSE);
+ break;
+ case ROW_READ_DID_SEMI_CONSISTENT:
+ prebuilt->row_read_type = ROW_READ_TRY_SEMI_CONSISTENT;
+ break;
+ }
+
+ DBUG_VOID_RETURN;
+}
+
+/* See handler.h and row0mysql.h for docs on this function. */
+bool
+ha_innobase::was_semi_consistent_read(void)
+/*=======================================*/
+{
+ row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt;
+
+ return(prebuilt->row_read_type == ROW_READ_DID_SEMI_CONSISTENT);
+}
+
+/* See handler.h and row0mysql.h for docs on this function. */
+void
+ha_innobase::try_semi_consistent_read(bool yes)
+/*===========================================*/
+{
+ row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt;
+
+ if (yes && srv_locks_unsafe_for_binlog) {
+ prebuilt->row_read_type = ROW_READ_TRY_SEMI_CONSISTENT;
+ } else {
+ prebuilt->row_read_type = ROW_READ_WITH_LOCKS;
}
}
@@ -4328,6 +4372,13 @@ ha_innobase::rnd_init(
err = change_active_index(primary_key);
}
+ /* Don't use semi-consistent read in random row reads (by position).
+ This means we must disable semi_consistent_read if scan is false */
+
+ if (!scan) {
+ try_semi_consistent_read(0);
+ }
+
start_of_scan = 1;
return(err);
diff --git a/sql/ha_innodb.h b/sql/ha_innodb.h
index f9a185bd885..fd0d3aa7e8c 100644
--- a/sql/ha_innodb.h
+++ b/sql/ha_innodb.h
@@ -122,6 +122,8 @@ class ha_innobase: public handler
int write_row(byte * buf);
int update_row(const byte * old_data, byte * new_data);
int delete_row(const byte * buf);
+ bool was_semi_consistent_read();
+ void try_semi_consistent_read(bool yes);
void unlock_row();
int index_init(uint index, bool sorted);
diff --git a/sql/ha_partition.cc b/sql/ha_partition.cc
index 8b4e0d9cfee..f20fb7304ba 100644
--- a/sql/ha_partition.cc
+++ b/sql/ha_partition.cc
@@ -647,7 +647,7 @@ bool ha_partition::create_handler_file(const char *name)
if (!m_is_sub_partitioned)
{
name_buffer_ptr= strmov(name_buffer_ptr, part_elem->partition_name)+1;
- *engine_array= (uchar) part_elem->engine_type;
+ *engine_array= (uchar) ha_legacy_type(part_elem->engine_type);
DBUG_PRINT("info", ("engine: %u", *engine_array));
engine_array++;
}
@@ -660,7 +660,7 @@ bool ha_partition::create_handler_file(const char *name)
name_buffer_ptr+= name_add(name_buffer_ptr,
part_elem->partition_name,
subpart_elem->partition_name);
- *engine_array= (uchar) part_elem->engine_type;
+ *engine_array= (uchar) ha_legacy_type(part_elem->engine_type);
engine_array++;
}
}
diff --git a/sql/handler.cc b/sql/handler.cc
index bcccdf2e2b0..59445a1b2f1 100644
--- a/sql/handler.cc
+++ b/sql/handler.cc
@@ -22,6 +22,7 @@
#endif
#include "mysql_priv.h"
+#include "rpl_filter.h"
#include "ha_heap.h"
#include "ha_myisam.h"
#include "ha_myisammrg.h"
@@ -29,7 +30,7 @@
#include <myisampack.h>
#include <errno.h>
-
+
#ifdef WITH_NDBCLUSTER_STORAGE_ENGINE
#define NDB_MAX_ATTRIBUTES_IN_TABLE 128
#include "ha_ndbcluster.h"
@@ -37,12 +38,15 @@
#ifdef WITH_PARTITION_STORAGE_ENGINE
#include "ha_partition.h"
#endif
+
#ifdef WITH_INNOBASE_STORAGE_ENGINE
#include "ha_innodb.h"
#endif
extern handlerton *sys_table_types[];
+#define BITMAP_STACKBUF_SIZE (128/8)
+
/* static functions defined in this file */
static handler *create_default(TABLE_SHARE *table);
@@ -1937,6 +1941,9 @@ void handler::print_error(int error, myf errflag)
my_error(ER_NO_SUCH_TABLE, MYF(0), table_share->db.str,
table_share->table_name.str);
break;
+ case HA_ERR_RBR_LOGGING_FAILED:
+ textno= ER_BINLOG_ROW_LOGGING_FAILED;
+ break;
default:
{
/* The error was "unknown" to this function.
@@ -2721,6 +2728,7 @@ TYPELIB *ha_known_exts(void)
return &known_extensions;
}
+
static bool stat_print(THD *thd, const char *type, uint type_len,
const char *file, uint file_len,
const char *status, uint status_len)
@@ -2781,6 +2789,106 @@ bool ha_show_status(THD *thd, handlerton *db_type, enum ha_stat_type stat)
return result;
}
+/*
+ Function to check if the conditions for row-based binlogging is
+ correct for the table.
+
+ A row in the given table should be replicated if:
+ - Row-based replication is on
+ - It is not a temporary table
+ - The binlog is enabled
+ - The table shall be binlogged (binlog_*_db rules) [Seems disabled /Matz]
+*/
+
+#ifdef HAVE_ROW_BASED_REPLICATION
+static bool check_table_binlog_row_based(THD *thd, TABLE *table)
+{
+ return
+ binlog_row_based &&
+ thd && (thd->options & OPTION_BIN_LOG) &&
+ (table->s->tmp_table == NO_TMP_TABLE);
+}
+
+template<class RowsEventT> int binlog_log_row(TABLE* table,
+ const byte *before_record,
+ const byte *after_record)
+{
+ bool error= 0;
+ THD *const thd= current_thd;
+
+ if (check_table_binlog_row_based(thd, table))
+ {
+ MY_BITMAP cols;
+ /* Potential buffer on the stack for the bitmap */
+ uint32 bitbuf[BITMAP_STACKBUF_SIZE/sizeof(uint32)];
+ uint n_fields= table->s->fields;
+ my_bool use_bitbuf= n_fields <= sizeof(bitbuf)*8;
+ if (likely(!(error= bitmap_init(&cols,
+ use_bitbuf ? bitbuf : NULL,
+ (n_fields + 7) & ~7UL,
+ false))))
+ {
+ bitmap_set_all(&cols);
+ error=
+ RowsEventT::binlog_row_logging_function(thd, table,
+ table->file->has_transactions(),
+ &cols, table->s->fields,
+ before_record, after_record);
+ if (!use_bitbuf)
+ bitmap_free(&cols);
+ }
+ }
+ return error ? HA_ERR_RBR_LOGGING_FAILED : 0;
+}
+
+
+/*
+ Instantiate the versions we need for the above template function, because we
+ have -fno-implicit-template as compiling option.
+*/
+
+template int binlog_log_row<Write_rows_log_event>(TABLE *, const byte *, const byte *);
+template int binlog_log_row<Delete_rows_log_event>(TABLE *, const byte *, const byte *);
+template int binlog_log_row<Update_rows_log_event>(TABLE *, const byte *, const byte *);
+
+#endif /* HAVE_ROW_BASED_REPLICATION */
+
+int handler::ha_write_row(byte *buf)
+{
+ int error;
+ if (likely(!(error= write_row(buf))))
+ {
+#ifdef HAVE_ROW_BASED_REPLICATION
+ error= binlog_log_row<Write_rows_log_event>(table, 0, buf);
+#endif
+ }
+ return error;
+}
+
+int handler::ha_update_row(const byte *old_data, byte *new_data)
+{
+ int error;
+ if (likely(!(error= update_row(old_data, new_data))))
+ {
+#ifdef HAVE_ROW_BASED_REPLICATION
+ error= binlog_log_row<Update_rows_log_event>(table, old_data, new_data);
+#endif
+ }
+ return error;
+}
+
+int handler::ha_delete_row(const byte *buf)
+{
+ int error;
+ if (likely(!(error= delete_row(buf))))
+ {
+#ifdef HAVE_ROW_BASED_REPLICATION
+ error= binlog_log_row<Delete_rows_log_event>(table, buf, 0);
+#endif
+ }
+ return error;
+}
+
#ifdef HAVE_REPLICATION
/*
diff --git a/sql/handler.h b/sql/handler.h
index ff81a259a73..27b3ed3fab1 100644
--- a/sql/handler.h
+++ b/sql/handler.h
@@ -74,6 +74,13 @@
*/
#define HA_CAN_INSERT_DELAYED (1 << 14)
#define HA_PRIMARY_KEY_IN_READ_INDEX (1 << 15)
+/*
+ If HA_PRIMARY_KEY_ALLOW_RANDOM_ACCESS is set, it means that the engine can
+ do this: the position of an arbitrary record can be retrieved using
+ position() when the table has a primary key, effectively allowing random
+ access on the table based on a given record.
+*/
+#define HA_PRIMARY_KEY_ALLOW_RANDOM_ACCESS (1 << 16)
#define HA_NOT_DELETE_WITH_CACHE (1 << 18)
#define HA_NO_PREFIX_CHAR_KEYS (1 << 20)
#define HA_CAN_FULLTEXT (1 << 21)
@@ -1054,11 +1061,9 @@ public:
uint get_index(void) const { return active_index; }
virtual int open(const char *name, int mode, uint test_if_locked)=0;
virtual int close(void)=0;
- virtual int write_row(byte * buf) { return HA_ERR_WRONG_COMMAND; }
- virtual int update_row(const byte * old_data, byte * new_data)
- { return HA_ERR_WRONG_COMMAND; }
- virtual int delete_row(const byte * buf)
- { return HA_ERR_WRONG_COMMAND; }
+ virtual int ha_write_row(byte * buf);
+ virtual int ha_update_row(const byte * old_data, byte * new_data);
+ virtual int ha_delete_row(const byte * buf);
/*
SYNOPSIS
start_bulk_update()
@@ -1189,6 +1194,26 @@ public:
virtual int extra_opt(enum ha_extra_function operation, ulong cache_size)
{ return extra(operation); }
virtual int external_lock(THD *thd, int lock_type) { return 0; }
+ /*
+ In an UPDATE or DELETE, if the row under the cursor was locked by another
+ transaction, and the engine used an optimistic read of the last
+ committed row value under the cursor, then the engine returns 1 from this
+ function. MySQL must NOT try to update this optimistic value. If the
+ optimistic value does not match the WHERE condition, MySQL can decide to
+ skip over this row. Currently only works for InnoDB. This can be used to
+ avoid unnecessary lock waits.
+
+ If this method returns nonzero, it will also signal the storage
+ engine that the next read will be a locking re-read of the row.
+ */
+ virtual bool was_semi_consistent_read() { return 0; }
+ /*
+ Tell the engine whether it should avoid unnecessary lock waits.
+ If yes, in an UPDATE or DELETE, if the row under the cursor was locked
+ by another transaction, the engine may try an optimistic read of
+ the last committed row value under the cursor.
+ */
+ virtual void try_semi_consistent_read(bool) {}
virtual void unlock_row() {}
virtual int start_stmt(THD *thd, thr_lock_type lock_type) {return 0;}
/*
@@ -1405,6 +1430,31 @@ public:
virtual bool check_if_incompatible_data(HA_CREATE_INFO *create_info,
uint table_changes)
{ return COMPATIBLE_DATA_NO; }
+
+private:
+
+ /*
+ Row-level primitives for storage engines.
+ These should be overridden by the storage engine class. To call
+ these methods, use the corresponding 'ha_*' method above.
+ */
+ friend int ndb_add_binlog_index(THD *, void *);
+
+ virtual int write_row(byte *buf __attribute__((unused)))
+ {
+ return HA_ERR_WRONG_COMMAND;
+ }
+
+ virtual int update_row(const byte *old_data __attribute__((unused)),
+ byte *new_data __attribute__((unused)))
+ {
+ return HA_ERR_WRONG_COMMAND;
+ }
+
+ virtual int delete_row(const byte *buf __attribute__((unused)))
+ {
+ return HA_ERR_WRONG_COMMAND;
+ }
};
/* Some extern variables used with handlers */
diff --git a/sql/item_sum.cc b/sql/item_sum.cc
index a8163a1758d..dc1cf6cc8b7 100644
--- a/sql/item_sum.cc
+++ b/sql/item_sum.cc
@@ -2662,7 +2662,7 @@ bool Item_sum_count_distinct::add()
*/
return tree->unique_add(table->record[0] + table->s->null_bytes);
}
- if ((error= table->file->write_row(table->record[0])) &&
+ if ((error= table->file->ha_write_row(table->record[0])) &&
error != HA_ERR_FOUND_DUPP_KEY &&
error != HA_ERR_FOUND_DUPP_UNIQUE)
return TRUE;
diff --git a/sql/log.cc b/sql/log.cc
index d30cf3266f9..44d3869e9d5 100644
--- a/sql/log.cc
+++ b/sql/log.cc
@@ -47,6 +47,19 @@ static int binlog_commit(THD *thd, bool all);
static int binlog_rollback(THD *thd, bool all);
static int binlog_prepare(THD *thd, bool all);
+/*
+ This is a POD. Please keep it that way!
+
+ Don't add constructors, destructors, or virtual functions.
+*/
+struct binlog_trx_data {
+ bool empty() const {
+ return pending == NULL && my_b_tell(&trans_log) == 0;
+ }
+ IO_CACHE trans_log; // The transaction cache
+ Rows_log_event *pending; // The pending binrows event
+};
+
handlerton binlog_hton = {
MYSQL_HANDLERTON_INTERFACE_VERSION,
"binlog",
@@ -92,19 +105,45 @@ bool binlog_init()
static int binlog_close_connection(THD *thd)
{
- IO_CACHE *trans_log= (IO_CACHE*)thd->ha_data[binlog_hton.slot];
- DBUG_ASSERT(mysql_bin_log.is_open() && !my_b_tell(trans_log));
+ binlog_trx_data *const trx_data=
+ (binlog_trx_data*) thd->ha_data[binlog_hton.slot];
+ IO_CACHE *trans_log= &trx_data->trans_log;
+ DBUG_ASSERT(mysql_bin_log.is_open() && trx_data->empty());
close_cached_file(trans_log);
- my_free((gptr)trans_log, MYF(0));
+ thd->ha_data[binlog_hton.slot]= 0;
+ my_free((gptr)trx_data, MYF(0));
return 0;
}
-static int binlog_end_trans(THD *thd, IO_CACHE *trans_log, Log_event *end_ev)
+static int
+binlog_end_trans(THD *thd, binlog_trx_data *trx_data, Log_event *end_ev)
{
- int error=0;
DBUG_ENTER("binlog_end_trans");
+ int error=0;
+ IO_CACHE *trans_log= &trx_data->trans_log;
+
if (end_ev)
+ {
+ thd->binlog_flush_pending_rows_event(true);
error= mysql_bin_log.write(thd, trans_log, end_ev);
+ }
+ else
+ {
+ thd->binlog_delete_pending_rows_event();
+ }
+
+ /*
+ We need to step the table map version both after writing the
+ entire transaction to the log file and after rolling back the
+ transaction.
+
+ We need to step the table map version after writing the
+ transaction cache to disk. In addition, we need to step the table
+ map version on a rollback to ensure that a new table map event is
+ generated instead of the one that was written to the thrown-away
+ transaction cache.
+ */
+ ++mysql_bin_log.m_table_map_version;
statistic_increment(binlog_cache_use, &LOCK_status);
if (trans_log->disk_writes != 0)
@@ -130,32 +169,36 @@ static int binlog_prepare(THD *thd, bool all)
static int binlog_commit(THD *thd, bool all)
{
- IO_CACHE *trans_log= (IO_CACHE*)thd->ha_data[binlog_hton.slot];
DBUG_ENTER("binlog_commit");
+ binlog_trx_data *const trx_data=
+ (binlog_trx_data*) thd->ha_data[binlog_hton.slot];
+ IO_CACHE *trans_log= &trx_data->trans_log;
DBUG_ASSERT(mysql_bin_log.is_open() &&
(all || !(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))));
- if (!my_b_tell(trans_log))
+ if (trx_data->empty())
{
// we're here because trans_log was flushed in MYSQL_LOG::log()
DBUG_RETURN(0);
}
Query_log_event qev(thd, STRING_WITH_LEN("COMMIT"), TRUE, FALSE);
- DBUG_RETURN(binlog_end_trans(thd, trans_log, &qev));
+ DBUG_RETURN(binlog_end_trans(thd, trx_data, &qev));
}
static int binlog_rollback(THD *thd, bool all)
{
- int error=0;
- IO_CACHE *trans_log= (IO_CACHE*)thd->ha_data[binlog_hton.slot];
DBUG_ENTER("binlog_rollback");
+ int error=0;
+ binlog_trx_data *const trx_data=
+ (binlog_trx_data*) thd->ha_data[binlog_hton.slot];
+ IO_CACHE *trans_log= &trx_data->trans_log;
/*
First assert is guaranteed - see trans_register_ha() call below.
The second must be true. If it is not, we're registering
unnecessary, doing extra work. The cause should be found and eliminated
*/
DBUG_ASSERT(all || !(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)));
- DBUG_ASSERT(mysql_bin_log.is_open() && my_b_tell(trans_log));
+ DBUG_ASSERT(mysql_bin_log.is_open() && !trx_data->empty());
/*
Update the binary log with a BEGIN/ROLLBACK block if we have
cached some queries and we updated some non-transactional
@@ -165,10 +208,10 @@ static int binlog_rollback(THD *thd, bool all)
if (unlikely(thd->options & OPTION_STATUS_NO_TRANS_UPDATE))
{
Query_log_event qev(thd, STRING_WITH_LEN("ROLLBACK"), TRUE, FALSE);
- error= binlog_end_trans(thd, trans_log, &qev);
+ error= binlog_end_trans(thd, trx_data, &qev);
}
else
- error= binlog_end_trans(thd, trans_log, 0);
+ error= binlog_end_trans(thd, trx_data, 0);
DBUG_RETURN(error);
}
@@ -195,8 +238,10 @@ static int binlog_rollback(THD *thd, bool all)
static int binlog_savepoint_set(THD *thd, void *sv)
{
- IO_CACHE *trans_log= (IO_CACHE*)thd->ha_data[binlog_hton.slot];
DBUG_ENTER("binlog_savepoint_set");
+ binlog_trx_data *const trx_data=
+ (binlog_trx_data*) thd->ha_data[binlog_hton.slot];
+ IO_CACHE *trans_log= &trx_data->trans_log;
DBUG_ASSERT(mysql_bin_log.is_open() && my_b_tell(trans_log));
*(my_off_t *)sv= my_b_tell(trans_log);
@@ -207,8 +252,10 @@ static int binlog_savepoint_set(THD *thd, void *sv)
static int binlog_savepoint_rollback(THD *thd, void *sv)
{
- IO_CACHE *trans_log= (IO_CACHE*)thd->ha_data[binlog_hton.slot];
DBUG_ENTER("binlog_savepoint_rollback");
+ binlog_trx_data *const trx_data=
+ (binlog_trx_data*) thd->ha_data[binlog_hton.slot];
+ IO_CACHE *trans_log= &trx_data->trans_log;
DBUG_ASSERT(mysql_bin_log.is_open() && my_b_tell(trans_log));
/*
@@ -367,6 +414,7 @@ MYSQL_LOG::MYSQL_LOG()
:bytes_written(0), last_time(0), query_start(0), name(0),
prepared_xids(0), log_type(LOG_CLOSED), file_id(1), open_count(1),
write_error(FALSE), inited(FALSE), need_start_event(TRUE),
+ m_table_map_version(0),
description_event_for_exec(0), description_event_for_queue(0)
{
/*
@@ -1363,7 +1411,7 @@ void MYSQL_LOG::new_file(bool need_lock)
to change base names at some point.
*/
THD *thd = current_thd; /* may be 0 if we are reacting to SIGHUP */
- Rotate_log_event r(thd,new_name+dirname_length(new_name),
+ Rotate_log_event r(new_name+dirname_length(new_name),
0, LOG_EVENT_OFFSET, 0);
r.write(&log_file);
bytes_written += r.data_written;
@@ -1589,6 +1637,162 @@ bool MYSQL_LOG::is_query_in_union(THD *thd, query_id_t query_id_param)
query_id_param >= thd->binlog_evt_union.first_query_id);
}
+
+/*
+ These functions are placed in this file since they need access to
+ binlog_hton, which has internal linkage.
+*/
+
+int THD::binlog_setup_trx_data()
+{
+ DBUG_ENTER("THD::binlog_setup_trx_data");
+ binlog_trx_data *trx_data=
+ (binlog_trx_data*) ha_data[binlog_hton.slot];
+
+ if (trx_data)
+ DBUG_RETURN(0); // Already set up
+
+ ha_data[binlog_hton.slot]= trx_data=
+ (binlog_trx_data*) my_malloc(sizeof(binlog_trx_data), MYF(MY_ZEROFILL));
+ if (!trx_data ||
+ open_cached_file(&trx_data->trans_log, mysql_tmpdir,
+ LOG_PREFIX, binlog_cache_size, MYF(MY_WME)))
+ {
+ my_free((gptr)trx_data, MYF(MY_ALLOW_ZERO_PTR));
+ ha_data[binlog_hton.slot]= 0;
+ DBUG_RETURN(1); // Didn't manage to set it up
+ }
+ trx_data->trans_log.end_of_file= max_binlog_cache_size;
+ DBUG_RETURN(0);
+}
+
+Rows_log_event*
+THD::binlog_get_pending_rows_event() const
+{
+ binlog_trx_data *const trx_data=
+ (binlog_trx_data*) ha_data[binlog_hton.slot];
+ /*
+ This is less than ideal, but here's the story: If there is no
+ trx_data, prepare_pending_rows_event() has never been called
+ (since the trx_data is set up there). In that case, we just return
+ NULL.
+ */
+ return trx_data ? trx_data->pending : NULL;
+}
+
+void
+THD::binlog_set_pending_rows_event(Rows_log_event* ev)
+{
+ binlog_trx_data *const trx_data=
+ (binlog_trx_data*) ha_data[binlog_hton.slot];
+ DBUG_ASSERT(trx_data);
+ trx_data->pending= ev;
+}
+
+
+/*
+ Moves the last bunch of rows from the pending Rows event to the binlog
+ (either cached binlog if transaction, or disk binlog). Sets a new pending
+ event.
+*/
+int MYSQL_LOG::flush_and_set_pending_rows_event(THD *thd, Rows_log_event* event)
+{
+ DBUG_ENTER("MYSQL_LOG::flush_and_set_pending_rows_event(event)");
+ DBUG_ASSERT(binlog_row_based && mysql_bin_log.is_open());
+ DBUG_PRINT("enter", ("event=%p", event));
+
+ int error= 0;
+
+ binlog_trx_data *const trx_data=
+ (binlog_trx_data*) thd->ha_data[binlog_hton.slot];
+
+ DBUG_ASSERT(trx_data);
+
+ if (Rows_log_event* pending= trx_data->pending)
+ {
+ IO_CACHE *file= &log_file;
+
+ /*
+ Decide if we should write to the log file directly or to the
+ transaction log.
+ */
+ if (pending->get_cache_stmt() || my_b_tell(&trx_data->trans_log))
+ file= &trx_data->trans_log;
+
+ /*
+ If we are writing to the log file directly, we could avoid
+ locking the log. This does not work since we need to step the
+ m_table_map_version below, and that change has to be protected
+ by the LOCK_log mutex.
+ */
+ pthread_mutex_lock(&LOCK_log);
+
+ /*
+ Write a table map if necessary
+ */
+ if (pending->maybe_write_table_map(thd, file, this))
+ {
+ pthread_mutex_unlock(&LOCK_log);
+ DBUG_RETURN(2);
+ }
+
+ /*
+ Write pending event to log file or transaction cache
+ */
+ if (pending->write(file))
+ {
+ pthread_mutex_unlock(&LOCK_log);
+ DBUG_RETURN(1);
+ }
+
+ /*
+ We step the table map version if we are writing an event
+ representing the end of a statement. We do this regardless of
+ wheather we write to the transaction cache or to directly to the
+ file.
+
+ In an ideal world, we could avoid stepping the table map version
+ if we were writing to a transaction cache, since we could then
+ reuse the table map that was written earlier in the transaction
+ cache. This does not work since STMT_END_F implies closing all
+ table mappings on the slave side.
+
+ TODO: Find a solution so that table maps does not have to be
+ written several times within a transaction.
+ */
+ if (pending->get_flags(Rows_log_event::STMT_END_F))
+ ++m_table_map_version;
+
+ delete pending;
+
+ if (file == &log_file)
+ {
+ error= flush_and_sync();
+ if (!error)
+ {
+ signal_update();
+ rotate_and_purge(RP_LOCK_LOG_IS_ALREADY_LOCKED);
+ }
+ }
+
+ pthread_mutex_unlock(&LOCK_log);
+ }
+ else if (event && event->get_cache_stmt()) /* && pending == 0 */
+ {
+ /*
+ If we are setting a non-null event for a table that is
+ transactional, we start a transaction here as well.
+ */
+ trans_register_ha(thd,
+ thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN),
+ &binlog_hton);
+ }
+
+ trx_data->pending= event;
+
+ DBUG_RETURN(error);
+}
+
/*
Write an event to the binary log
*/
@@ -1609,7 +1813,29 @@ bool MYSQL_LOG::write(Log_event *event_info)
thd->binlog_evt_union.unioned_events_trans |= event_info->cache_stmt;
DBUG_RETURN(0);
}
-
+
+ /*
+ Flush the pending rows event to the transaction cache or to the
+ log file. Since this function potentially aquire the LOCK_log
+ mutex, we do this before aquiring the LOCK_log mutex in this
+ function.
+
+ This is not optimal, but necessary in the current implementation
+ since there is code that writes rows to system tables without
+ using some way to flush the pending event (e.g., binlog_query()).
+
+ TODO: There shall be no writes to any system table after calling
+ binlog_query(), so these writes has to be moved to before the call
+ of binlog_query() for correct functioning.
+
+ This is necessesary not only for RBR, but the master might crash
+ after binlogging the query but before changing the system tables.
+ This means that the slave and the master are not in the same state
+ (after the master has restarted), so therefore we have to
+ eliminate this problem.
+ */
+ thd->binlog_flush_pending_rows_event(true);
+
pthread_mutex_lock(&LOCK_log);
/*
@@ -1649,37 +1875,26 @@ bool MYSQL_LOG::write(Log_event *event_info)
*/
if (opt_using_transactions && thd)
{
- IO_CACHE *trans_log= (IO_CACHE*)thd->ha_data[binlog_hton.slot];
+ if (thd->binlog_setup_trx_data())
+ goto err;
- if (event_info->get_cache_stmt())
- {
- if (!trans_log)
- {
- thd->ha_data[binlog_hton.slot]= trans_log= (IO_CACHE *)
- my_malloc(sizeof(IO_CACHE), MYF(MY_ZEROFILL));
- if (!trans_log || open_cached_file(trans_log, mysql_tmpdir,
- LOG_PREFIX,
- binlog_cache_size, MYF(MY_WME)))
- {
- my_free((gptr)trans_log, MYF(MY_ALLOW_ZERO_PTR));
- thd->ha_data[binlog_hton.slot]= trans_log= 0;
- goto err;
- }
- trans_log->end_of_file= max_binlog_cache_size;
- trans_register_ha(thd,
- thd->options & (OPTION_NOT_AUTOCOMMIT |
- OPTION_BEGIN),
- &binlog_hton);
- }
- else if (!my_b_tell(trans_log))
- trans_register_ha(thd,
- thd->options & (OPTION_NOT_AUTOCOMMIT |
- OPTION_BEGIN),
- &binlog_hton);
- file= trans_log;
- }
- else if (trans_log && my_b_tell(trans_log))
+ binlog_trx_data *const trx_data=
+ (binlog_trx_data*) thd->ha_data[binlog_hton.slot];
+ IO_CACHE *trans_log= &trx_data->trans_log;
+
+ if (event_info->get_cache_stmt() && !my_b_tell(trans_log))
+ trans_register_ha(thd,
+ thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN),
+ &binlog_hton);
+
+ if (event_info->get_cache_stmt() || my_b_tell(trans_log))
file= trans_log;
+ /*
+ Note: as Mats suggested, for all the cases above where we write to
+ trans_log, it sounds unnecessary to lock LOCK_log. We should rather
+ test first if we want to write to trans_log, and if not, lock
+ LOCK_log. TODO.
+ */
}
#endif
DBUG_PRINT("info",("event type=%d",event_info->get_type_code()));
@@ -1694,42 +1909,49 @@ bool MYSQL_LOG::write(Log_event *event_info)
of the SQL command
*/
+ /*
+ If row-based binlogging, Insert_id, Rand and other kind of "setting
+ context" events are not needed.
+ */
if (thd)
{
- if (thd->last_insert_id_used)
+ if (!binlog_row_based)
{
- Intvar_log_event e(thd,(uchar) LAST_INSERT_ID_EVENT,
- thd->current_insert_id);
- if (e.write(file))
- goto err;
- }
- if (thd->insert_id_used)
- {
- Intvar_log_event e(thd,(uchar) INSERT_ID_EVENT,thd->last_insert_id);
- if (e.write(file))
- goto err;
- }
- if (thd->rand_used)
- {
- Rand_log_event e(thd,thd->rand_saved_seed1,thd->rand_saved_seed2);
- if (e.write(file))
- goto err;
- }
- if (thd->user_var_events.elements)
- {
- for (uint i= 0; i < thd->user_var_events.elements; i++)
- {
- BINLOG_USER_VAR_EVENT *user_var_event;
- get_dynamic(&thd->user_var_events,(gptr) &user_var_event, i);
- User_var_log_event e(thd, user_var_event->user_var_event->name.str,
- user_var_event->user_var_event->name.length,
- user_var_event->value,
- user_var_event->length,
- user_var_event->type,
- user_var_event->charset_number);
- if (e.write(file))
- goto err;
- }
+ if (thd->last_insert_id_used)
+ {
+ Intvar_log_event e(thd,(uchar) LAST_INSERT_ID_EVENT,
+ thd->current_insert_id);
+ if (e.write(file))
+ goto err;
+ }
+ if (thd->insert_id_used)
+ {
+ Intvar_log_event e(thd,(uchar) INSERT_ID_EVENT,thd->last_insert_id);
+ if (e.write(file))
+ goto err;
+ }
+ if (thd->rand_used)
+ {
+ Rand_log_event e(thd,thd->rand_saved_seed1,thd->rand_saved_seed2);
+ if (e.write(file))
+ goto err;
+ }
+ if (thd->user_var_events.elements)
+ {
+ for (uint i= 0; i < thd->user_var_events.elements; i++)
+ {
+ BINLOG_USER_VAR_EVENT *user_var_event;
+ get_dynamic(&thd->user_var_events,(gptr) &user_var_event, i);
+ User_var_log_event e(thd, user_var_event->user_var_event->name.str,
+ user_var_event->user_var_event->name.length,
+ user_var_event->value,
+ user_var_event->length,
+ user_var_event->type,
+ user_var_event->charset_number);
+ if (e.write(file))
+ goto err;
+ }
+ }
}
}
@@ -1760,6 +1982,9 @@ err:
}
}
+ if (event_info->flags & LOG_EVENT_UPDATE_TABLE_MAP_VERSION_F)
+ ++m_table_map_version;
+
pthread_mutex_unlock(&LOCK_log);
DBUG_RETURN(error);
}
@@ -2307,6 +2532,44 @@ void MYSQL_LOG::signal_update()
DBUG_VOID_RETURN;
}
+#ifndef MYSQL_CLIENT
+bool MYSQL_LOG::write_table_map(THD *thd, IO_CACHE *file, TABLE* table,
+ bool is_transactional)
+{
+ DBUG_ENTER("MYSQL_LOG::write_table_map()");
+ DBUG_PRINT("enter", ("table=%p (%s: %u)",
+ table, table->s->table_name, table->s->table_map_id));
+
+ /* Pre-conditions */
+ DBUG_ASSERT(binlog_row_based && is_open());
+ DBUG_ASSERT(table->s->table_map_id != ULONG_MAX);
+
+#ifndef DBUG_OFF
+ /*
+ We only need to execute under the LOCK_log mutex if we are writing
+ to the log file; otherwise, we are writing to a thread-specific
+ transaction cache and there is no need to serialize this event
+ with events in other threads.
+ */
+ if (file == &log_file)
+ safe_mutex_assert_owner(&LOCK_log);
+#endif
+
+ Table_map_log_event::flag_set const
+ flags= Table_map_log_event::NO_FLAGS;
+
+ Table_map_log_event
+ the_event(thd, table, table->s->table_map_id, is_transactional, flags);
+
+ if (the_event.write(file))
+ DBUG_RETURN(1);
+
+ table->s->table_map_version= m_table_map_version;
+ DBUG_RETURN(0);
+}
+#endif /* !defined(MYSQL_CLIENT) */
+
+
#ifdef __NT__
void print_buffer_to_nt_eventlog(enum loglevel level, char *buff,
uint length, int buffLen)
@@ -3013,9 +3276,11 @@ void TC_LOG_BINLOG::close()
*/
int TC_LOG_BINLOG::log(THD *thd, my_xid xid)
{
+ DBUG_ENTER("TC_LOG_BINLOG::log");
Xid_log_event xle(thd, xid);
- IO_CACHE *trans_log= (IO_CACHE*)thd->ha_data[binlog_hton.slot];
- return !binlog_end_trans(thd, trans_log, &xle); // invert return value
+ binlog_trx_data *trx_data=
+ (binlog_trx_data*) thd->ha_data[binlog_hton.slot];
+ DBUG_RETURN(!binlog_end_trans(thd, trx_data, &xle)); // invert return value
}
void TC_LOG_BINLOG::unlog(ulong cookie, my_xid xid)
diff --git a/sql/log.h b/sql/log.h
new file mode 100644
index 00000000000..ea2946c2d86
--- /dev/null
+++ b/sql/log.h
@@ -0,0 +1,332 @@
+/* Copyright (C) 2005 MySQL AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#ifndef LOG_H
+#define LOG_H
+
+struct st_relay_log_info;
+
+class Format_description_log_event;
+
+/*
+ Transaction Coordinator log - a base abstract class
+ for two different implementations
+*/
+class TC_LOG
+{
+ public:
+ int using_heuristic_recover();
+ TC_LOG() {}
+ virtual ~TC_LOG() {}
+
+ virtual int open(const char *opt_name)=0;
+ virtual void close()=0;
+ virtual int log(THD *thd, my_xid xid)=0;
+ virtual void unlog(ulong cookie, my_xid xid)=0;
+};
+
+class TC_LOG_DUMMY: public TC_LOG // use it to disable the logging
+{
+ public:
+ int open(const char *opt_name) { return 0; }
+ void close() { }
+ int log(THD *thd, my_xid xid) { return 1; }
+ void unlog(ulong cookie, my_xid xid) { }
+};
+
+#ifdef HAVE_MMAP
+class TC_LOG_MMAP: public TC_LOG
+{
+ public: // only to keep Sun Forte on sol9x86 happy
+ typedef enum {
+ POOL, // page is in pool
+ ERROR, // last sync failed
+ DIRTY // new xids added since last sync
+ } PAGE_STATE;
+
+ private:
+ typedef struct st_page {
+ struct st_page *next; // page a linked in a fifo queue
+ my_xid *start, *end; // usable area of a page
+ my_xid *ptr; // next xid will be written here
+ int size, free; // max and current number of free xid slots on the page
+ int waiters; // number of waiters on condition
+ PAGE_STATE state; // see above
+ pthread_mutex_t lock; // to access page data or control structure
+ pthread_cond_t cond; // to wait for a sync
+ } PAGE;
+
+ char logname[FN_REFLEN];
+ File fd;
+ my_off_t file_length;
+ uint npages, inited;
+ uchar *data;
+ struct st_page *pages, *syncing, *active, *pool, *pool_last;
+ /*
+ note that, e.g. LOCK_active is only used to protect
+ 'active' pointer, to protect the content of the active page
+ one has to use active->lock.
+ Same for LOCK_pool and LOCK_sync
+ */
+ pthread_mutex_t LOCK_active, LOCK_pool, LOCK_sync;
+ pthread_cond_t COND_pool, COND_active;
+
+ public:
+ TC_LOG_MMAP(): inited(0) {}
+ int open(const char *opt_name);
+ void close();
+ int log(THD *thd, my_xid xid);
+ void unlog(ulong cookie, my_xid xid);
+ int recover();
+
+ private:
+ void get_active_from_pool();
+ int sync();
+ int overflow();
+};
+#else
+#define TC_LOG_MMAP TC_LOG_DUMMY
+#endif
+
+extern TC_LOG *tc_log;
+extern TC_LOG_MMAP tc_log_mmap;
+extern TC_LOG_DUMMY tc_log_dummy;
+
+/* log info errors */
+#define LOG_INFO_EOF -1
+#define LOG_INFO_IO -2
+#define LOG_INFO_INVALID -3
+#define LOG_INFO_SEEK -4
+#define LOG_INFO_MEM -6
+#define LOG_INFO_FATAL -7
+#define LOG_INFO_IN_USE -8
+
+/* bitmap to SQL_LOG::close() */
+#define LOG_CLOSE_INDEX 1
+#define LOG_CLOSE_TO_BE_OPENED 2
+#define LOG_CLOSE_STOP_EVENT 4
+
+struct st_relay_log_info;
+
+typedef struct st_log_info
+{
+ char log_file_name[FN_REFLEN];
+ my_off_t index_file_offset, index_file_start_offset;
+ my_off_t pos;
+ bool fatal; // if the purge happens to give us a negative offset
+ pthread_mutex_t lock;
+ st_log_info():fatal(0) { pthread_mutex_init(&lock, MY_MUTEX_INIT_FAST);}
+ ~st_log_info() { pthread_mutex_destroy(&lock);}
+} LOG_INFO;
+
+class Log_event;
+class Rows_log_event;
+
+enum enum_log_type { LOG_CLOSED, LOG_TO_BE_OPENED, LOG_NORMAL, LOG_NEW, LOG_BIN};
+
+/*
+ TODO split MYSQL_LOG into base MYSQL_LOG and
+ MYSQL_QUERY_LOG, MYSQL_SLOW_LOG, MYSQL_BIN_LOG
+ most of the code from MYSQL_LOG should be in the MYSQL_BIN_LOG
+ only (TC_LOG included)
+
+ TODO use mmap instead of IO_CACHE for binlog
+ (mmap+fsync is two times faster than write+fsync)
+*/
+
+class MYSQL_LOG: public TC_LOG
+{
+ private:
+ /* LOCK_log and LOCK_index are inited by init_pthread_objects() */
+ pthread_mutex_t LOCK_log, LOCK_index;
+ pthread_mutex_t LOCK_prep_xids;
+ pthread_cond_t COND_prep_xids;
+ pthread_cond_t update_cond;
+ ulonglong bytes_written;
+ time_t last_time,query_start;
+ IO_CACHE log_file;
+ IO_CACHE index_file;
+ char *name;
+ char time_buff[20],db[NAME_LEN+1];
+ char log_file_name[FN_REFLEN],index_file_name[FN_REFLEN];
+ /*
+ The max size before rotation (usable only if log_type == LOG_BIN: binary
+ logs and relay logs).
+ For a binlog, max_size should be max_binlog_size.
+ For a relay log, it should be max_relay_log_size if this is non-zero,
+ max_binlog_size otherwise.
+ max_size is set in init(), and dynamically changed (when one does SET
+ GLOBAL MAX_BINLOG_SIZE|MAX_RELAY_LOG_SIZE) by fix_max_binlog_size and
+ fix_max_relay_log_size).
+ */
+ ulong max_size;
+ ulong prepared_xids; /* for tc log - number of xids to remember */
+ volatile enum_log_type log_type;
+ enum cache_type io_cache_type;
+ // current file sequence number for load data infile binary logging
+ uint file_id;
+ uint open_count; // For replication
+ int readers_count;
+ bool write_error, inited;
+ bool need_start_event;
+ /*
+ no_auto_events means we don't want any of these automatic events :
+ Start/Rotate/Stop. That is, in 4.x when we rotate a relay log, we don't
+ want a Rotate_log event to be written to the relay log. When we start a
+ relay log etc. So in 4.x this is 1 for relay logs, 0 for binlogs.
+ In 5.0 it's 0 for relay logs too!
+ */
+ bool no_auto_events;
+ friend class Log_event;
+
+public:
+ ulonglong m_table_map_version;
+
+ /*
+ These describe the log's format. This is used only for relay logs.
+ _for_exec is used by the SQL thread, _for_queue by the I/O thread. It's
+ necessary to have 2 distinct objects, because the I/O thread may be reading
+ events in a different format from what the SQL thread is reading (consider
+ the case of a master which has been upgraded from 5.0 to 5.1 without doing
+ RESET MASTER, or from 4.x to 5.0).
+ */
+ Format_description_log_event *description_event_for_exec,
+ *description_event_for_queue;
+
+ MYSQL_LOG();
+ /*
+ note that there's no destructor ~MYSQL_LOG() !
+ The reason is that we don't want it to be automatically called
+ on exit() - but only during the correct shutdown process
+ */
+
+ int open(const char *opt_name);
+ void close();
+ int log(THD *thd, my_xid xid);
+ void unlog(ulong cookie, my_xid xid);
+ int recover(IO_CACHE *log, Format_description_log_event *fdle);
+#if !defined(MYSQL_CLIENT)
+ bool is_table_mapped(TABLE *table) const
+ {
+ return table->s->table_map_version == m_table_map_version;
+ }
+
+ int flush_and_set_pending_rows_event(THD *thd, Rows_log_event* event);
+
+#endif /* !defined(MYSQL_CLIENT) */
+ void reset_bytes_written()
+ {
+ bytes_written = 0;
+ }
+ void harvest_bytes_written(ulonglong* counter)
+ {
+#ifndef DBUG_OFF
+ char buf1[22],buf2[22];
+#endif
+ DBUG_ENTER("harvest_bytes_written");
+ (*counter)+=bytes_written;
+ DBUG_PRINT("info",("counter: %s bytes_written: %s", llstr(*counter,buf1),
+ llstr(bytes_written,buf2)));
+ bytes_written=0;
+ DBUG_VOID_RETURN;
+ }
+ void set_max_size(ulong max_size_arg);
+ void signal_update();
+ void wait_for_update(THD* thd, bool master_or_slave);
+ void set_need_start_event() { need_start_event = 1; }
+ void init(enum_log_type log_type_arg,
+ enum cache_type io_cache_type_arg,
+ bool no_auto_events_arg, ulong max_size);
+ void init_pthread_objects();
+ void cleanup();
+ bool open(const char *log_name,
+ enum_log_type log_type,
+ const char *new_name,
+ enum cache_type io_cache_type_arg,
+ bool no_auto_events_arg, ulong max_size,
+ bool null_created);
+ const char *generate_name(const char *log_name, const char *suffix,
+ bool strip_ext, char *buff);
+ /* simplified open_xxx wrappers for the gigantic open above */
+ bool open_query_log(const char *log_name)
+ {
+ char buf[FN_REFLEN];
+ return open(generate_name(log_name, ".log", 0, buf),
+ LOG_NORMAL, 0, WRITE_CACHE, 0, 0, 0);
+ }
+ bool open_slow_log(const char *log_name)
+ {
+ char buf[FN_REFLEN];
+ return open(generate_name(log_name, "-slow.log", 0, buf),
+ LOG_NORMAL, 0, WRITE_CACHE, 0, 0, 0);
+ }
+ bool open_index_file(const char *index_file_name_arg,
+ const char *log_name);
+ void new_file(bool need_lock);
+ bool write(THD *thd, enum enum_server_command command,
+ const char *format,...);
+ bool write(THD *thd, const char *query, uint query_length,
+ time_t query_start=0);
+ bool write(Log_event* event_info); // binary log write
+ bool write(THD *thd, IO_CACHE *cache, Log_event *commit_event);
+
+ bool write_table_map(THD *thd, IO_CACHE *cache, TABLE *table, bool is_trans);
+
+ void start_union_events(THD *thd);
+ void stop_union_events(THD *thd);
+ bool is_query_in_union(THD *thd, query_id_t query_id_param);
+
+ /*
+ v stands for vector
+ invoked as appendv(buf1,len1,buf2,len2,...,bufn,lenn,0)
+ */
+ bool appendv(const char* buf,uint len,...);
+ bool append(Log_event* ev);
+
+ int generate_new_name(char *new_name,const char *old_name);
+ void make_log_name(char* buf, const char* log_ident);
+ bool is_active(const char* log_file_name);
+ int update_log_index(LOG_INFO* linfo, bool need_update_threads);
+ void rotate_and_purge(uint flags);
+ bool flush_and_sync();
+ int purge_logs(const char *to_log, bool included,
+ bool need_mutex, bool need_update_threads,
+ ulonglong *decrease_log_space);
+ int purge_logs_before_date(time_t purge_time);
+ int purge_first_log(struct st_relay_log_info* rli, bool included);
+ bool reset_logs(THD* thd);
+ void close(uint exiting);
+
+ // iterating through the log index file
+ int find_log_pos(LOG_INFO* linfo, const char* log_name,
+ bool need_mutex);
+ int find_next_log(LOG_INFO* linfo, bool need_mutex);
+ int get_current_log(LOG_INFO* linfo);
+ uint next_file_id();
+ inline bool is_open() { return log_type != LOG_CLOSED; }
+ inline char* get_index_fname() { return index_file_name;}
+ inline char* get_log_fname() { return log_file_name; }
+ inline char* get_name() { return name; }
+ inline pthread_mutex_t* get_log_lock() { return &LOCK_log; }
+ inline IO_CACHE* get_log_file() { return &log_file; }
+
+ inline void lock_index() { pthread_mutex_lock(&LOCK_index);}
+ inline void unlock_index() { pthread_mutex_unlock(&LOCK_index);}
+ inline IO_CACHE *get_index_file() { return &index_file;}
+ inline uint32 get_open_count() { return open_count; }
+};
+
+#endif /* LOG_H */
diff --git a/sql/log_event.cc b/sql/log_event.cc
index c8f8ff40700..6e256a0c295 100644
--- a/sql/log_event.cc
+++ b/sql/log_event.cc
@@ -21,11 +21,14 @@
#pragma implementation // gcc: Class implementation
#endif
-#include "mysql_priv.h"
+#include "mysql_priv.h"
#include "slave.h"
#include "rpl_filter.h"
#include <my_dir.h>
#endif /* MYSQL_CLIENT */
+#include <base64.h>
+#include <my_bitmap.h>
+#include <my_vle.h>
#define log_cs &my_charset_latin1
@@ -232,6 +235,7 @@ char *str_to_hex(char *to, const char *from, uint len)
commands just before it prints a query.
*/
+#ifdef MYSQL_CLIENT
static void print_set_option(FILE* file, uint32 bits_changed, uint32 option,
uint32 flags, const char* name, bool* need_comma)
{
@@ -243,6 +247,7 @@ static void print_set_option(FILE* file, uint32 bits_changed, uint32 option,
*need_comma= 1;
}
}
+#endif
/**************************************************************************
Log_event methods (= the parent class of all events)
@@ -271,6 +276,10 @@ const char* Log_event::get_type_str()
case XID_EVENT: return "Xid";
case USER_VAR_EVENT: return "User var";
case FORMAT_DESCRIPTION_EVENT: return "Format_desc";
+ case TABLE_MAP_EVENT: return "Table_map";
+ case WRITE_ROWS_EVENT: return "Write_rows";
+ case UPDATE_ROWS_EVENT: return "Update_rows";
+ case DELETE_ROWS_EVENT: return "Delete_rows";
case BEGIN_LOAD_QUERY_EVENT: return "Begin_load_query";
case EXECUTE_LOAD_QUERY_EVENT: return "Execute_load_query";
default: return "Unknown"; /* impossible */
@@ -778,6 +787,9 @@ Log_event* Log_event::read_log_event(const char* buf, uint event_len,
DBUG_RETURN(NULL); // general sanity check - will fail on a partial read
}
+ /* To check the integrity of the Log_event_type enumeration */
+ DBUG_ASSERT(buf[EVENT_TYPE_OFFSET] < ENUM_END_EVENT);
+
switch(buf[EVENT_TYPE_OFFSET]) {
case QUERY_EVENT:
ev = new Query_log_event(buf, event_len, description_event, QUERY_EVENT);
@@ -829,6 +841,20 @@ Log_event* Log_event::read_log_event(const char* buf, uint event_len,
case FORMAT_DESCRIPTION_EVENT:
ev = new Format_description_log_event(buf, event_len, description_event);
break;
+#if defined(HAVE_REPLICATION) && defined(HAVE_ROW_BASED_REPLICATION)
+ case WRITE_ROWS_EVENT:
+ ev = new Write_rows_log_event(buf, event_len, description_event);
+ break;
+ case UPDATE_ROWS_EVENT:
+ ev = new Update_rows_log_event(buf, event_len, description_event);
+ break;
+ case DELETE_ROWS_EVENT:
+ ev = new Delete_rows_log_event(buf, event_len, description_event);
+ break;
+ case TABLE_MAP_EVENT:
+ ev = new Table_map_log_event(buf, event_len, description_event);
+ break;
+#endif
case BEGIN_LOAD_QUERY_EVENT:
ev = new Begin_load_query_log_event(buf, event_len, description_event);
break;
@@ -952,6 +978,24 @@ void Log_event::print_header(FILE* file, PRINT_EVENT_INFO* print_event_info)
}
+void Log_event::print_base64(FILE* file, PRINT_EVENT_INFO* print_event_info)
+{
+ uchar *ptr= (uchar*)temp_buf;
+ my_off_t size= uint4korr(ptr + EVENT_LEN_OFFSET);
+
+ char *tmp_str=
+ (char *) my_malloc(base64_needed_encoded_length(size), MYF(MY_WME));
+ if (!tmp_str) {
+ fprintf(stderr, "\nError: Out of memory. "
+ "Could not print correct binlog event.\n");
+ return;
+ }
+ int res= base64_encode(ptr, size, tmp_str);
+ fprintf(file, "\nBINLOG '\n%s\n';\n", tmp_str);
+ my_free(tmp_str, MYF(0));
+}
+
+
/*
Log_event::print_timestamp()
*/
@@ -1714,7 +1758,7 @@ int Query_log_event::exec_event(struct st_relay_log_info* rli, const char *query
clear_all_errors(thd, rli); /* Can ignore query */
else
{
- slave_print_error(rli,expected_error,
+ slave_print_msg(ERROR_LEVEL, rli, expected_error,
"\
Query partially completed on the master (error on master: %d) \
and was aborted. There is a chance that your master is inconsistent at this \
@@ -1743,16 +1787,16 @@ compare_errors:
!ignored_error_code(actual_error) &&
!ignored_error_code(expected_error))
{
- slave_print_error(rli, 0,
- "\
-Query caused different errors on master and slave. \
+ slave_print_msg(ERROR_LEVEL, rli, 0,
+ "\
+Query caused different errors on master and slave. \
Error on master: '%s' (%d), Error on slave: '%s' (%d). \
Default database: '%s'. Query: '%s'",
- ER_SAFE(expected_error),
- expected_error,
- actual_error ? thd->net.last_error: "no error",
- actual_error,
- print_slave_db_safe(db), query_arg);
+ ER_SAFE(expected_error),
+ expected_error,
+ actual_error ? thd->net.last_error: "no error",
+ actual_error,
+ print_slave_db_safe(db), query_arg);
thd->query_error= 1;
}
/*
@@ -1769,11 +1813,11 @@ Default database: '%s'. Query: '%s'",
*/
else if (thd->query_error || thd->is_fatal_error)
{
- slave_print_error(rli,actual_error,
- "Error '%s' on query. Default database: '%s'. Query: '%s'",
- (actual_error ? thd->net.last_error :
- "unexpected success or fatal error"),
- print_slave_db_safe(thd->db), query_arg);
+ slave_print_msg(ERROR_LEVEL, rli, actual_error,
+ "Error '%s' on query. Default database: '%s'. Query: '%s'",
+ (actual_error ? thd->net.last_error :
+ "unexpected success or fatal error"),
+ print_slave_db_safe(thd->db), query_arg);
thd->query_error= 1;
}
@@ -2055,6 +2099,25 @@ Format_description_log_event(uint8 binlog_ver, const char* server_ver)
post_header_len[DELETE_FILE_EVENT-1]= DELETE_FILE_HEADER_LEN;
post_header_len[NEW_LOAD_EVENT-1]= post_header_len[LOAD_EVENT-1];
post_header_len[FORMAT_DESCRIPTION_EVENT-1]= FORMAT_DESCRIPTION_HEADER_LEN;
+ post_header_len[TABLE_MAP_EVENT-1]= TABLE_MAP_HEADER_LEN;
+ post_header_len[WRITE_ROWS_EVENT-1]= ROWS_HEADER_LEN;
+ post_header_len[UPDATE_ROWS_EVENT-1]= ROWS_HEADER_LEN;
+ post_header_len[DELETE_ROWS_EVENT-1]= ROWS_HEADER_LEN;
+ /*
+ We here have the possibility to simulate a master of before we changed
+ the table map id to be stored in 6 bytes: when it was stored in 4
+ bytes (=> post_header_len was 6). This is used to test backward
+ compatibility.
+ This code can be removed after a few months (today is Dec 21st 2005),
+ when we know that the 4-byte masters are not deployed anymore (check
+ with Tomas Ulin first!), and the accompanying test (rpl_row_4_bytes)
+ too.
+ */
+ DBUG_EXECUTE_IF("old_row_based_repl_4_byte_map_id_master",
+ post_header_len[TABLE_MAP_EVENT-1]=
+ post_header_len[WRITE_ROWS_EVENT-1]=
+ post_header_len[UPDATE_ROWS_EVENT-1]=
+ post_header_len[DELETE_ROWS_EVENT-1]= 6;);
post_header_len[BEGIN_LOAD_QUERY_EVENT-1]= post_header_len[APPEND_BLOCK_EVENT-1];
post_header_len[EXECUTE_LOAD_QUERY_EVENT-1]= EXECUTE_LOAD_QUERY_HEADER_LEN;
}
@@ -2189,10 +2252,8 @@ int Format_description_log_event::exec_event(struct st_relay_log_info* rli)
As a transaction NEVER spans on 2 or more binlogs:
if we have an active transaction at this point, the master died
while writing the transaction to the binary log, i.e. while
- flushing the binlog cache to the binlog. As the write was started,
- the transaction had been committed on the master, so we lack of
- information to replay this transaction on the slave; all we can do
- is stop with error.
+ flushing the binlog cache to the binlog. XA guarantees that master has
+ rolled back. So we roll back.
Note: this event could be sent by the master to inform us of the
format of its binlog; in other words maybe it is not at its
original place when it comes to us; we'll know this by checking
@@ -2200,11 +2261,13 @@ int Format_description_log_event::exec_event(struct st_relay_log_info* rli)
*/
if (!artificial_event && created && thd->transaction.all.nht)
{
- slave_print_error(rli, 0, "Rolling back unfinished transaction (no "
- "COMMIT or ROLLBACK) from relay log. A probable cause "
- "is that the master died while writing the transaction "
- "to its binary log.");
- end_trans(thd, ROLLBACK);
+ /* This is not an error (XA is safe), just an information */
+ slave_print_msg(INFORMATION_LEVEL, rli, 0,
+ "Rolling back unfinished transaction (no COMMIT "
+ "or ROLLBACK in relay log). A probable cause is that "
+ "the master died while writing the transaction to "
+ "its binary log, thus rolled back too.");
+ rli->cleanup_context(thd, 1);
}
#endif
/*
@@ -2751,6 +2814,9 @@ int Load_log_event::exec_event(NET* net, struct st_relay_log_info* rli,
thd->query_length= 0; // Should not be needed
thd->query_error= 0;
clear_all_errors(thd, rli);
+
+ /* see Query_log_event::exec_event() and BUG#13360 */
+ DBUG_ASSERT(!rli->m_table_map.count());
/*
Usually mysql_init_query() is called by mysql_parse(), but we need it here
as the present method does not call mysql_parse().
@@ -2962,9 +3028,9 @@ error:
sql_errno=ER_UNKNOWN_ERROR;
err=ER(sql_errno);
}
- slave_print_error(rli,sql_errno,"\
+ slave_print_msg(ERROR_LEVEL, rli, sql_errno,"\
Error '%s' running LOAD DATA INFILE on table '%s'. Default database: '%s'",
- err, (char*)table_name, print_slave_db_safe(save_db));
+ err, (char*)table_name, print_slave_db_safe(save_db));
free_root(thd->mem_root,MYF(MY_KEEP_PREALLOC));
return 1;
}
@@ -2972,9 +3038,9 @@ Error '%s' running LOAD DATA INFILE on table '%s'. Default database: '%s'",
if (thd->is_fatal_error)
{
- slave_print_error(rli,ER_UNKNOWN_ERROR, "\
+ slave_print_msg(ERROR_LEVEL, rli, ER_UNKNOWN_ERROR, "\
Fatal error running LOAD DATA INFILE on table '%s'. Default database: '%s'",
- (char*)table_name, print_slave_db_safe(save_db));
+ (char*)table_name, print_slave_db_safe(save_db));
return 1;
}
@@ -3035,8 +3101,7 @@ void Rotate_log_event::print(FILE* file, PRINT_EVENT_INFO* print_event_info)
#ifndef MYSQL_CLIENT
-Rotate_log_event::Rotate_log_event(THD* thd_arg,
- const char* new_log_ident_arg,
+Rotate_log_event::Rotate_log_event(const char* new_log_ident_arg,
uint ident_len_arg, ulonglong pos_arg,
uint flags_arg)
:Log_event(), new_log_ident(new_log_ident_arg),
@@ -3045,7 +3110,7 @@ Rotate_log_event::Rotate_log_event(THD* thd_arg,
{
#ifndef DBUG_OFF
char buff[22];
- DBUG_ENTER("Rotate_log_event::Rotate_log_event(THD*,...)");
+ DBUG_ENTER("Rotate_log_event::Rotate_log_event(...,flags)");
DBUG_PRINT("enter",("new_log_ident %s pos %s flags %lu", new_log_ident_arg,
llstr(pos_arg, buff), flags));
#endif
@@ -3353,12 +3418,24 @@ int Rand_log_event::exec_event(struct st_relay_log_info* rli)
Xid_log_event methods
**************************************************************************/
+#if !defined(DBUG_OFF) && !defined(MYSQL_CLIENT)
+/*
+ This static class member could be removed when mysqltest is made to support
+ a --replace-regex command: then tests which have XIDs in their output can
+ use this command to suppress non-deterministic XID values.
+*/
+my_bool Xid_log_event::show_xid;
+#endif
+
#if defined(HAVE_REPLICATION) && !defined(MYSQL_CLIENT)
void Xid_log_event::pack_info(Protocol *protocol)
{
char buf[128], *pos;
pos= strmov(buf, "COMMIT /* xid=");
- pos= longlong10_to_str(xid, pos, 10);
+#if !defined(DBUG_OFF) && !defined(MYSQL_CLIENT)
+ if (show_xid)
+#endif
+ pos= longlong10_to_str(xid, pos, 10);
pos= strmov(pos, " */");
protocol->store(buf, (uint) (pos-buf), &my_charset_bin);
}
@@ -4179,7 +4256,8 @@ int Create_file_log_event::exec_event(struct st_relay_log_info* rli)
init_io_cache(&file, fd, IO_SIZE, WRITE_CACHE, (my_off_t)0, 0,
MYF(MY_WME|MY_NABP)))
{
- slave_print_error(rli,my_errno, "Error in Create_file event: could not open file '%s'", fname_buf);
+ slave_print_msg(ERROR_LEVEL, rli, my_errno, "Error in Create_file event: "
+ "could not open file '%s'", fname_buf);
goto err;
}
@@ -4190,9 +4268,9 @@ int Create_file_log_event::exec_event(struct st_relay_log_info* rli)
if (write_base(&file))
{
strmov(p, ".info"); // to have it right in the error message
- slave_print_error(rli,my_errno,
- "Error in Create_file event: could not write to file '%s'",
- fname_buf);
+ slave_print_msg(ERROR_LEVEL, rli, my_errno,
+ "Error in Create_file event: could not write to file '%s'",
+ fname_buf);
goto err;
}
end_io_cache(&file);
@@ -4204,12 +4282,14 @@ int Create_file_log_event::exec_event(struct st_relay_log_info* rli)
O_WRONLY | O_BINARY | O_EXCL | O_NOFOLLOW,
MYF(MY_WME))) < 0)
{
- slave_print_error(rli,my_errno, "Error in Create_file event: could not open file '%s'", fname_buf);
+ slave_print_msg(ERROR_LEVEL, rli, my_errno, "Error in Create_file event: "
+ "could not open file '%s'", fname_buf);
goto err;
}
if (my_write(fd, (byte*) block, block_len, MYF(MY_WME+MY_NABP)))
{
- slave_print_error(rli,my_errno, "Error in Create_file event: write to '%s' failed", fname_buf);
+ slave_print_msg(ERROR_LEVEL, rli, my_errno, "Error in Create_file event: "
+ "write to '%s' failed", fname_buf);
goto err;
}
error=0; // Everything is ok
@@ -4348,25 +4428,25 @@ int Append_block_log_event::exec_event(struct st_relay_log_info* rli)
O_WRONLY | O_BINARY | O_EXCL | O_NOFOLLOW,
MYF(MY_WME))) < 0)
{
- slave_print_error(rli, my_errno,
- "Error in %s event: could not create file '%s'",
- get_type_str(), fname);
+ slave_print_msg(ERROR_LEVEL, rli, my_errno,
+ "Error in %s event: could not create file '%s'",
+ get_type_str(), fname);
goto err;
}
}
else if ((fd = my_open(fname, O_WRONLY | O_APPEND | O_BINARY | O_NOFOLLOW,
MYF(MY_WME))) < 0)
{
- slave_print_error(rli, my_errno,
- "Error in %s event: could not open file '%s'",
- get_type_str(), fname);
+ slave_print_msg(ERROR_LEVEL, rli, my_errno,
+ "Error in %s event: could not open file '%s'",
+ get_type_str(), fname);
goto err;
}
if (my_write(fd, (byte*) block, block_len, MYF(MY_WME+MY_NABP)))
{
- slave_print_error(rli, my_errno,
- "Error in %s event: write to '%s' failed",
- get_type_str(), fname);
+ slave_print_msg(ERROR_LEVEL, rli, my_errno,
+ "Error in %s event: write to '%s' failed",
+ get_type_str(), fname);
goto err;
}
error=0;
@@ -4573,7 +4653,8 @@ int Execute_load_log_event::exec_event(struct st_relay_log_info* rli)
init_io_cache(&file, fd, IO_SIZE, READ_CACHE, (my_off_t)0, 0,
MYF(MY_WME|MY_NABP)))
{
- slave_print_error(rli,my_errno, "Error in Exec_load event: could not open file '%s'", fname);
+ slave_print_msg(ERROR_LEVEL, rli, my_errno, "Error in Exec_load event: "
+ "could not open file '%s'", fname);
goto err;
}
if (!(lev = (Load_log_event*)Log_event::read_log_event(&file,
@@ -4581,7 +4662,8 @@ int Execute_load_log_event::exec_event(struct st_relay_log_info* rli)
rli->relay_log.description_event_for_exec)) ||
lev->get_type_code() != NEW_LOAD_EVENT)
{
- slave_print_error(rli,0, "Error in Exec_load event: file '%s' appears corrupted", fname);
+ slave_print_msg(ERROR_LEVEL, rli, 0, "Error in Exec_load event: "
+ "file '%s' appears corrupted", fname);
goto err;
}
@@ -4607,10 +4689,10 @@ int Execute_load_log_event::exec_event(struct st_relay_log_info* rli)
char *tmp= my_strdup(rli->last_slave_error,MYF(MY_WME));
if (tmp)
{
- slave_print_error(rli,
- rli->last_slave_errno, /* ok to re-use error code */
- "%s. Failed executing load from '%s'",
- tmp, fname);
+ slave_print_msg(ERROR_LEVEL, rli,
+ rli->last_slave_errno, /* ok to re-use error code */
+ "%s. Failed executing load from '%s'",
+ tmp, fname);
my_free(tmp,MYF(0));
}
goto err;
@@ -4816,7 +4898,7 @@ Execute_load_query_log_event::exec_event(struct st_relay_log_info* rli)
if (!(buf = my_malloc(q_len + 1 - (fn_pos_end - fn_pos_start) +
(FN_REFLEN + 10) + 10 + 8 + 5, MYF(MY_WME))))
{
- slave_print_error(rli, my_errno, "Not enough memory");
+ slave_print_msg(ERROR_LEVEL, rli, my_errno, "Not enough memory");
return 1;
}
@@ -4942,3 +5024,1727 @@ char* sql_ex_info::init(char* buf,char* buf_end,bool use_new_format)
}
return buf;
}
+
+
+#ifdef HAVE_ROW_BASED_REPLICATION
+
+/**************************************************************************
+ Rows_log_event member functions
+**************************************************************************/
+
+#ifndef MYSQL_CLIENT
+Rows_log_event::Rows_log_event(THD *thd_arg, TABLE *tbl_arg, ulong tid,
+ MY_BITMAP const *cols, bool is_transactional)
+ : Log_event(thd_arg, 0, is_transactional),
+ m_table(tbl_arg),
+ m_table_id(tid),
+ m_width(tbl_arg->s->fields),
+ m_rows_buf(my_malloc(opt_binlog_rows_event_max_size * sizeof(*m_rows_buf), MYF(MY_WME))),
+ m_rows_cur(m_rows_buf),
+ m_rows_end(m_rows_buf + opt_binlog_rows_event_max_size),
+ m_flags(0)
+{
+ DBUG_ASSERT(m_table && m_table->s);
+ DBUG_ASSERT(m_table_id != ULONG_MAX);
+
+ if (thd_arg->options & OPTION_NO_FOREIGN_KEY_CHECKS)
+ set_flags(NO_FOREIGN_KEY_CHECKS_F);
+ if (thd_arg->options & OPTION_RELAXED_UNIQUE_CHECKS)
+ set_flags(RELAXED_UNIQUE_CHECKS_F);
+ /* if bitmap_init fails, catched in is_valid() */
+ if (likely(!bitmap_init(&m_cols,
+ m_width <= sizeof(m_bitbuf)*8 ? m_bitbuf : NULL,
+ (m_width + 7) & ~7UL,
+ false)))
+ memcpy(m_cols.bitmap, cols->bitmap, no_bytes_in_map(cols));
+ else
+ m_cols.bitmap= 0; // to not free it
+}
+#endif
+
+Rows_log_event::Rows_log_event(const char *buf, uint event_len,
+ Log_event_type event_type,
+ const Format_description_log_event
+ *description_event)
+ : Log_event(buf, description_event),
+ m_rows_buf(0), m_rows_cur(0), m_rows_end(0)
+{
+ DBUG_ENTER("Rows_log_event::Rows_log_event(const char*,...)");
+ uint8 const common_header_len= description_event->common_header_len;
+ uint8 const post_header_len= description_event->post_header_len[event_type-1];
+
+ DBUG_PRINT("enter",("event_len=%ld, common_header_len=%d, "
+ "post_header_len=%d",
+ event_len, common_header_len,
+ post_header_len));
+
+ const char *post_start= buf + common_header_len;
+ post_start+= RW_MAPID_OFFSET;
+ if (post_header_len == 6)
+ {
+ /* Master is of an intermediate source tree before 5.1.4. Id is 4 bytes */
+ m_table_id= uint4korr(post_start);
+ post_start+= 4;
+ }
+ else
+ {
+ m_table_id= uint6korr(post_start);
+ post_start+= RW_FLAGS_OFFSET;
+ }
+
+ DBUG_ASSERT(m_table_id != ULONG_MAX);
+
+ m_flags= uint2korr(post_start);
+
+ byte const *const var_start= buf + common_header_len + post_header_len;
+ byte const *const ptr_width= var_start;
+ byte const *const ptr_after_width= my_vle_decode(&m_width, ptr_width);
+
+ const uint byte_count= (m_width + 7) / 8;
+ const char* const ptr_rows_data= var_start + byte_count + 1;
+
+ my_size_t const data_size= event_len - (ptr_rows_data - buf);
+ DBUG_PRINT("info",("m_table_id=%lu, m_flags=%d, m_width=%u, data_size=%lu",
+ m_table_id, m_flags, m_width, data_size));
+
+ m_rows_buf= my_malloc(data_size, MYF(MY_WME));
+ if (likely((bool)m_rows_buf))
+ {
+ /* if bitmap_init fails, catched in is_valid() */
+ if (likely(!bitmap_init(&m_cols,
+ m_width <= sizeof(m_bitbuf)*8 ? m_bitbuf : NULL,
+ (m_width + 7) & ~7UL,
+ false)))
+ memcpy(m_cols.bitmap, ptr_after_width, byte_count);
+ m_rows_end= m_rows_buf + data_size;
+ m_rows_cur= m_rows_end;
+ memcpy(m_rows_buf, ptr_rows_data, data_size);
+ }
+ else
+ m_cols.bitmap= 0; // to not free it
+
+ DBUG_VOID_RETURN;
+}
+
+Rows_log_event::~Rows_log_event()
+{
+ if (m_cols.bitmap == m_bitbuf) // no my_malloc happened
+ m_cols.bitmap= 0; // so no my_free in bitmap_free
+ bitmap_free(&m_cols); // To pair with bitmap_init().
+ my_free(m_rows_buf, MYF(MY_ALLOW_ZERO_PTR));
+}
+
+#ifndef MYSQL_CLIENT
+int Rows_log_event::do_add_row_data(byte *const row_data,
+ my_size_t const length)
+{
+ /*
+ When the table has a primary key, we would probably want, by default, to
+ log only the primary key value instead of the entire "before image". This
+ would save binlog space. TODO
+ */
+ DBUG_ENTER("Rows_log_event::do_add_row_data(byte *data, my_size_t length)");
+ DBUG_PRINT("enter", ("row_data= %p, length= %lu", row_data, length));
+ DBUG_DUMP("row_data", row_data, min(length, 32));
+
+ DBUG_ASSERT(m_rows_buf <= m_rows_cur);
+ DBUG_ASSERT(m_rows_buf < m_rows_end);
+ DBUG_ASSERT(m_rows_cur <= m_rows_end);
+
+ /* The cast will always work since m_rows_cur <= m_rows_end */
+ if (static_cast<my_size_t>(m_rows_end - m_rows_cur) < length)
+ {
+ my_size_t const block_size= 1024;
+ my_ptrdiff_t const old_alloc= m_rows_end - m_rows_buf;
+ my_ptrdiff_t const new_alloc=
+ old_alloc + block_size * (length / block_size + block_size - 1);
+ my_ptrdiff_t const cur_size= m_rows_cur - m_rows_buf;
+
+ byte* const new_buf= my_realloc(m_rows_buf, new_alloc, MYF(MY_WME));
+ if (unlikely(!new_buf))
+ DBUG_RETURN(HA_ERR_OUT_OF_MEM);
+
+ /* If the memory moved, we need to move the pointers */
+ if (new_buf != m_rows_buf)
+ {
+ m_rows_buf= new_buf;
+ m_rows_cur= m_rows_buf + cur_size;
+ }
+
+ /*
+ The end pointer should always be changed to point to the end of
+ the allocated memory.
+ */
+ m_rows_end= m_rows_buf + new_alloc;
+ }
+
+ DBUG_ASSERT(m_rows_cur + length < m_rows_end);
+ memcpy(m_rows_cur, row_data, length);
+ m_rows_cur+= length;
+ DBUG_RETURN(0);
+}
+#endif
+
+#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION)
+/*
+ Unpack a row into a record. The row is assumed to only consist of the fields
+ for which the bitset represented by 'arr' and 'bits'; the other parts of the
+ record are left alone.
+ */
+static char const *unpack_row(TABLE *table,
+ char *record, char const *row,
+ MY_BITMAP const *cols)
+{
+ DBUG_ASSERT(record && row);
+
+ MY_BITMAP *write_set= table->file->write_set;
+ my_size_t const n_null_bytes= table->s->null_bytes;
+ my_ptrdiff_t const offset= record - (byte*) table->record[0];
+
+ memcpy(record, row, n_null_bytes);
+ char const *ptr= row + n_null_bytes;
+
+ bitmap_set_all(write_set);
+ Field **const begin_ptr = table->field;
+ for (Field **field_ptr= begin_ptr ; *field_ptr ; ++field_ptr)
+ {
+ Field *const f= *field_ptr;
+
+ if (bitmap_is_set(cols, field_ptr - begin_ptr))
+ {
+ /* Field...::unpack() cannot return 0 */
+ ptr= f->unpack(f->ptr + offset, ptr);
+ }
+ else
+ bitmap_clear_bit(write_set, (field_ptr - begin_ptr) + 1);
+ }
+ return ptr;
+}
+
+int Rows_log_event::exec_event(st_relay_log_info *rli)
+{
+ DBUG_ENTER("Rows_log_event::exec_event(st_relay_log_info*)");
+ DBUG_ASSERT(m_table_id != ULONG_MAX);
+ int error= 0;
+ char const *row_start= m_rows_buf;
+ TABLE* table= rli->m_table_map.get_table(m_table_id);
+
+ /*
+ 'thd' has been set by exec_relay_log_event(), just before calling
+ exec_event(). We still check here to prevent future coding errors.
+ */
+ DBUG_ASSERT(rli->sql_thd == thd);
+
+ /*
+ lock_tables() reads the contents of thd->lex, so they must be
+ initialized, so we should call lex_start(); to be even safer, we call
+ mysql_init_query() which does a more complete set of inits.
+ */
+ mysql_init_query(thd, NULL, 0);
+
+ if (table)
+ {
+ /*
+ table == NULL means that this table should not be
+ replicated (this was set up by Table_map_log_event::exec_event() which
+ tested replicate-* rules).
+ */
+ TABLE_LIST table_list;
+ bool need_reopen;
+ uint count= 1;
+ bzero(&table_list, sizeof(table_list));
+ table_list.lock_type= TL_WRITE;
+ table_list.next_global= table_list.next_local= 0;
+ table_list.table= table;
+
+ for ( ; ; )
+ {
+ table_list.db= const_cast<char*>(table->s->db.str);
+ table_list.alias= table_list.table_name=
+ const_cast<char*>(table->s->table_name.str);
+
+ if ((error= lock_tables(thd, &table_list, count, &need_reopen)) == 0)
+ break;
+ if (!need_reopen)
+ {
+ slave_print_msg(ERROR_LEVEL, rli, error,
+ "Error in %s event: error during table %s.%s lock",
+ get_type_str(), table->s->db, table->s->table_name);
+ DBUG_RETURN(error);
+ }
+ /*
+ we need to store a local copy of the table names since the table object
+ will become invalid after close_tables_for_reopen
+ */
+ char *db= my_strdup(table->s->db.str, MYF(MY_WME));
+ char *table_name= my_strdup(table->s->table_name.str, MYF(MY_WME));
+
+ if (db == 0 || table_name == 0)
+ {
+ /*
+ Since the lock_tables() failed, the table is not locked, so
+ we don't need to unlock them.
+ */
+ DBUG_RETURN(HA_ERR_OUT_OF_MEM);
+ }
+
+ /*
+ We also needs to flush the pending RBR event, since it keeps a
+ pointer to an open table.
+
+ ALTERNATIVE SOLUTION: Extract a pointer to the pending RBR
+ event and reset the table pointer after the tables has been
+ reopened.
+ */
+ thd->binlog_flush_pending_rows_event(false);
+
+ close_tables_for_reopen(thd, &table_list);
+
+ /* open the table again, same as in Table_map_event::exec_event */
+ table_list.db= const_cast<char*>(db);
+ table_list.alias= table_list.table_name= const_cast<char*>(table_name);
+ table_list.updating= 1;
+ TABLE_LIST *tables= &table_list;
+ if ((error= open_tables(thd, &tables, &count, 0)) == 0)
+ {
+ /* reset some variables for the table list*/
+ table_list.updating= 0;
+ /* retrieve the new table reference and update the table map */
+ table= table_list.table;
+ error= rli->m_table_map.set_table(m_table_id, table);
+ }
+ else /* error in open_tables */
+ {
+ if (thd->query_error || thd->is_fatal_error)
+ {
+ /*
+ Error reporting borrowed from Query_log_event with many excessive
+ simplifications (we don't honour --slave-skip-errors)
+ */
+ uint actual_error= thd->net.last_errno;
+ slave_print_msg(ERROR_LEVEL, rli, actual_error,
+ "Error '%s' on reopening table `%s`.`%s`",
+ (actual_error ? thd->net.last_error :
+ "unexpected success or fatal error"),
+ db, table_name);
+ thd->query_error= 1;
+ }
+ }
+ my_free((char*) db, MYF(MY_ALLOW_ZERO_PTR));
+ my_free((char*) table_name, MYF(MY_ALLOW_ZERO_PTR));
+
+ if (error)
+ DBUG_RETURN(error);
+ }
+
+ /*
+ It's not needed to set_time() but
+ 1) it continues the property that "Time" in SHOW PROCESSLIST shows how
+ much slave is behind
+ 2) it will be needed when we allow replication from a table with no
+ TIMESTAMP column to a table with one.
+ So we call set_time(), like in SBR. Presently it changes nothing.
+ */
+ thd->set_time((time_t)when);
+ /*
+ There are a few flags that are replicated with each row event.
+ Make sure to set/clear them before executing the main body of
+ the event.
+ */
+ if (get_flags(NO_FOREIGN_KEY_CHECKS_F))
+ thd->options|= OPTION_NO_FOREIGN_KEY_CHECKS;
+ else
+ thd->options&= ~OPTION_NO_FOREIGN_KEY_CHECKS;
+
+ if (get_flags(RELAXED_UNIQUE_CHECKS_F))
+ thd->options|= OPTION_RELAXED_UNIQUE_CHECKS;
+ else
+ thd->options&= ~OPTION_RELAXED_UNIQUE_CHECKS;
+ /* A small test to verify that objects have consistent types */
+ DBUG_ASSERT(sizeof(thd->options) == sizeof(OPTION_RELAXED_UNIQUE_CHECKS));
+
+ error= do_before_row_operations(table);
+ while (error == 0 && row_start < m_rows_end) {
+ char const *row_end= do_prepare_row(thd, table, row_start);
+ DBUG_ASSERT(row_end != NULL); // cannot happen
+ DBUG_ASSERT(row_end <= m_rows_end);
+
+ /* in_use can have been set to NULL in close_tables_for_reopen */
+ THD* old_thd= table->in_use;
+ if (!table->in_use)
+ table->in_use= thd;
+ error= do_exec_row(table);
+ table->in_use = old_thd;
+ switch (error)
+ {
+ /* Some recoverable errors */
+ case HA_ERR_RECORD_CHANGED:
+ case HA_ERR_KEY_NOT_FOUND: /* Idempotency support: OK if
+ tuple does not exist */
+ error= 0;
+ case 0:
+ break;
+
+ default:
+ slave_print_msg(ERROR_LEVEL, rli, error,
+ "Error in %s event: row application failed",
+ get_type_str());
+ thd->query_error= 1;
+ break;
+ }
+
+ row_start= row_end;
+ }
+ DBUG_EXECUTE_IF("STOP_SLAVE_after_first_Rows_event",
+ rli->abort_slave=1;);
+ error= do_after_row_operations(table, error);
+ if (!cache_stmt)
+ thd->options|= OPTION_STATUS_NO_TRANS_UPDATE;
+
+ }
+
+ if (error)
+ { /* error has occured during the transaction */
+ /*
+ If one day we honour --skip-slave-errors in row-based replication, and
+ the error should be skipped, then we would clear mappings, rollback,
+ close tables, but the slave SQL thread would not stop and then may
+ assume the mapping is still available, the tables are still open...
+ So then we should clear mappings/rollback/close here only if this is a
+ STMT_END_F.
+ For now we code, knowing that error is not skippable and so slave SQL
+ thread is certainly going to stop.
+ */
+ rli->cleanup_context(thd, 1);
+ thd->query_error= 1;
+ DBUG_RETURN(error);
+ }
+
+ if (get_flags(STMT_END_F))
+ {
+ /*
+ This is the end of a statement or transaction, so close (and
+ unlock) the tables we opened when processing the
+ Table_map_log_event starting the statement.
+
+ OBSERVER. This will clear *all* mappings, not only those that
+ are open for the table. There is not good handle for on-close
+ actions for tables.
+
+ NOTE. Even if we have no table ('table' == 0) we still need to be
+ here, so that we increase the group relay log position. If we didn't, we
+ could have a group relay log position which lags behind "forever"
+ (assume the last master's transaction is ignored by the slave because of
+ replicate-ignore rules).
+ */
+ thd->binlog_flush_pending_rows_event(true);
+ /*
+ If this event is not in a transaction, the call below will, if some
+ transactional storage engines are involved, commit the statement into
+ them and flush the pending event to binlog.
+ If this event is in a transaction, the call will do nothing, but a
+ Xid_log_event will come next which will, if some transactional engines
+ are involved, commit the transaction and flush the pending event to the
+ binlog.
+ */
+ error= ha_autocommit_or_rollback(thd, 0);
+ /*
+ Now what if this is not a transactional engine? we still need to
+ flush the pending event to the binlog; we did it with
+ thd->binlog_flush_pending_rows_event(). Note that we imitate
+ what is done for real queries: a call to
+ ha_autocommit_or_rollback() (sometimes only if involves a
+ transactional engine), and a call to be sure to have the pending
+ event flushed.
+ */
+
+ rli->cleanup_context(thd, 0);
+ rli->transaction_end(thd);
+
+ if (error == 0)
+ {
+ /*
+ Clear any errors pushed in thd->net.last_err* if for example "no key
+ found" (as this is allowed). This is a safety measure; apparently
+ those errors (e.g. when executing a Delete_rows_log_event of a
+ non-existing row, like in rpl_row_mystery22.test,
+ thd->net.last_error = "Can't find record in 't1'" and last_errno=1032)
+ do not become visible. We still prefer to wipe them out.
+ */
+ thd->clear_error();
+ error= Log_event::exec_event(rli);
+ }
+ else
+ slave_print_msg(ERROR_LEVEL, rli, error,
+ "Error in %s event: commit of row events failed, "
+ "table `%s`.`%s`",
+ get_type_str(), table->s->db, table->s->table_name);
+ DBUG_RETURN(error);
+ }
+
+ if (table)
+ {
+ /*
+ As "table" is not NULL, we did a successful lock_tables(), without any
+ prior LOCK TABLES and are not in prelocked mode, so this assertion should
+ be true.
+ */
+ DBUG_ASSERT(thd->lock);
+ /*
+ If we are here, there are more events to come which may use our mappings
+ and our table. So don't clear mappings or close tables, just unlock
+ tables.
+ Why don't we lock the table once for all in
+ Table_map_log_event::exec_event() ? Because we could have in binlog:
+ BEGIN;
+ Table_map t1 -> 1
+ Write_rows to id 1
+ Table_map t2 -> 2
+ Write_rows to id 2
+ Xid_log_event
+ So we cannot lock t1 when executing the first Table_map, because at that
+ moment we don't know we'll also have to lock t2, and all tables must be
+ locked at once in MySQL.
+ */
+ mysql_unlock_tables(thd, thd->lock);
+ thd->lock= 0;
+ if ((table->s->primary_key == MAX_KEY) &&
+ !cache_stmt)
+ {
+ /*
+ ------------ Temporary fix until WL#2975 is implemented ---------
+ This event is not the last one (no STMT_END_F). If we stop now (in
+ case of terminate_slave_thread()), how will we restart? We have to
+ restart from Table_map_log_event, but as this table is not
+ transactional, the rows already inserted will still be present, and
+ idempotency is not guaranteed (no PK) so we risk that repeating leads
+ to double insert. So we desperately try to continue, hope we'll
+ eventually leave this buggy situation (by executing the final
+ Rows_log_event). If we are in a hopeless wait (reached end of last
+ relay log and nothing gets appended there), we timeout after one
+ minute, and notify DBA about the problem.
+ When WL#2975 is implemented, just remove the member
+ st_relay_log_info::unsafe_to_stop_at and all its occurences.
+ */
+ rli->unsafe_to_stop_at= time(0);
+ }
+ }
+
+ DBUG_ASSERT(error == 0);
+ thd->clear_error();
+ rli->inc_event_relay_log_pos();
+
+ DBUG_RETURN(0);
+}
+#endif /* !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) */
+
+#ifndef MYSQL_CLIENT
+bool Rows_log_event::write_data_header(IO_CACHE *file)
+{
+ DBUG_ASSERT(m_table_id != ULONG_MAX);
+ byte buf[ROWS_HEADER_LEN]; // No need to init the buffer
+ DBUG_EXECUTE_IF("old_row_based_repl_4_byte_map_id_master",
+ {
+ int4store(buf + 0, m_table_id);
+ int2store(buf + 4, m_flags);
+ return (my_b_safe_write(file, buf, 6));
+ });
+ int6store(buf + RW_MAPID_OFFSET, (ulonglong)m_table_id);
+ int2store(buf + RW_FLAGS_OFFSET, m_flags);
+ return (my_b_safe_write(file, buf, ROWS_HEADER_LEN));
+}
+
+bool Rows_log_event::write_data_body(IO_CACHE*file)
+{
+ /*
+ Note that this should be the number of *bits*, not the number of
+ bytes.
+ */
+ byte sbuf[my_vle_sizeof(m_width)];
+ my_ptrdiff_t const data_size= m_rows_cur - m_rows_buf;
+
+ char *const sbuf_end= my_vle_encode(sbuf, sizeof(sbuf), m_width);
+ DBUG_ASSERT(static_cast<my_size_t>(sbuf_end - sbuf) <= sizeof(sbuf));
+
+ return (my_b_safe_write(file, sbuf, sbuf_end - sbuf) ||
+ my_b_safe_write(file, reinterpret_cast<byte*>(m_cols.bitmap),
+ no_bytes_in_map(&m_cols)) ||
+ my_b_safe_write(file, m_rows_buf, data_size));
+}
+#endif
+
+#if defined(HAVE_REPLICATION) && !defined(MYSQL_CLIENT) && defined(DBUG_RBR)
+void Rows_log_event::pack_info(Protocol *protocol)
+{
+ char buf[256];
+ char const *const flagstr= get_flags(STMT_END_F) ? "STMT_END_F" : "";
+ char const *const dbnam= m_table->s->db;
+ char const *const tblnam= m_table->s->table_name;
+ my_size_t bytes= snprintf(buf, sizeof(buf),
+ "%s.%s - %s", dbnam, tblnam, flagstr);
+ protocol->store(buf, bytes, &my_charset_bin);
+}
+#endif
+
+/**************************************************************************
+ Table_map_log_event member functions
+**************************************************************************/
+
+/*
+ Constructor used to build an event for writing to the binary log.
+ Mats says tbl->s lives longer than this event so it's ok to copy pointers
+ (tbl->s->db etc) and not pointer content.
+ */
+#if !defined(MYSQL_CLIENT)
+Table_map_log_event::Table_map_log_event(THD *thd, TABLE *tbl, ulong tid,
+ bool is_transactional, uint16 flags)
+ : Log_event(thd, 0, is_transactional),
+ m_table(tbl),
+ m_dbnam(tbl->s->db.str),
+ m_dblen(m_dbnam ? tbl->s->db.length : 0),
+ m_tblnam(tbl->s->table_name.str),
+ m_tbllen(tbl->s->table_name.length),
+ m_colcnt(tbl->s->fields), m_coltype(0),
+ m_table_id(tid),
+ m_flags(flags)
+{
+ DBUG_ASSERT(m_table_id != ULONG_MAX);
+ /*
+ In TABLE_SHARE, "db" and "table_name" are 0-terminated (see this comment in
+ table.cc / alloc_table_share():
+ Use the fact the key is db/0/table_name/0
+ As we rely on this let's assert it.
+ */
+ DBUG_ASSERT((tbl->s->db.str == 0) ||
+ (tbl->s->db.str[tbl->s->db.length] == 0));
+ DBUG_ASSERT(tbl->s->table_name.str[tbl->s->table_name.length] == 0);
+
+
+ m_data_size= TABLE_MAP_HEADER_LEN;
+ DBUG_EXECUTE_IF("old_row_based_repl_4_byte_map_id_master", m_data_size= 6;)
+ m_data_size+= m_dblen + 2; // Include length and terminating \0
+ m_data_size+= m_tbllen + 2; // Include length and terminating \0
+ m_data_size+= 1 + m_colcnt; // COLCNT and column types
+
+ /* If malloc fails, catched in is_valid() */
+ if ((m_memory= my_malloc(m_colcnt, MYF(MY_WME))))
+ {
+ m_coltype= reinterpret_cast<unsigned char*>(m_memory);
+ for (unsigned int i= 0 ; i < m_table->s->fields ; ++i)
+ m_coltype[i]= m_table->field[i]->type();
+ }
+}
+#endif /* !defined(MYSQL_CLIENT) */
+
+/*
+ Constructor used by slave to read the event from the binary log.
+ */
+#if defined(HAVE_REPLICATION)
+Table_map_log_event::Table_map_log_event(const char *buf, uint event_len,
+ const Format_description_log_event
+ *description_event)
+
+ : Log_event(buf, description_event),
+#ifndef MYSQL_CLIENT
+ m_table(NULL),
+#endif
+ m_memory(NULL)
+{
+ DBUG_ENTER("Table_map_log_event::Table_map_log_event(const char*,uint,...)");
+
+ uint8 common_header_len= description_event->common_header_len;
+ uint8 post_header_len= description_event->post_header_len[TABLE_MAP_EVENT-1];
+ DBUG_PRINT("info",("event_len=%ld, common_header_len=%d, post_header_len=%d",
+ event_len, common_header_len, post_header_len));
+
+ DBUG_DUMP("event buffer", buf, event_len);
+
+ /* Read the post-header */
+ const char *post_start= buf + common_header_len;
+
+ post_start+= TM_MAPID_OFFSET;
+ if (post_header_len == 6)
+ {
+ /* Master is of an intermediate source tree before 5.1.4. Id is 4 bytes */
+ m_table_id= uint4korr(post_start);
+ post_start+= 4;
+ }
+ else
+ {
+ DBUG_ASSERT(post_header_len == TABLE_MAP_HEADER_LEN);
+ m_table_id= uint6korr(post_start);
+ post_start+= TM_FLAGS_OFFSET;
+ }
+
+ DBUG_ASSERT(m_table_id != ULONG_MAX);
+
+ m_flags= uint2korr(post_start);
+
+ /* Read the variable part of the event */
+ const char *const vpart= buf + common_header_len + post_header_len;
+
+ /* Extract the length of the various parts from the buffer */
+ byte const* const ptr_dblen= vpart + 0;
+ m_dblen= *(unsigned char*) ptr_dblen;
+
+ /* Length of database name + counter + terminating null */
+ byte const* const ptr_tbllen= ptr_dblen + m_dblen + 2;
+ m_tbllen= *(unsigned char*) ptr_tbllen;
+
+ /* Length of table name + counter + terminating null */
+ byte const* const ptr_colcnt= ptr_tbllen + m_tbllen + 2;
+ byte const* const ptr_after_colcnt= my_vle_decode(&m_colcnt, ptr_colcnt);
+
+ DBUG_PRINT("info",("m_dblen=%d off=%d m_tbllen=%d off=%d m_colcnt=%d off=%d",
+ m_dblen, ptr_dblen-vpart, m_tbllen, ptr_tbllen-vpart,
+ m_colcnt, ptr_colcnt-vpart));
+
+ /* Allocate mem for all fields in one go. If fails, catched in is_valid() */
+ m_memory= my_multi_malloc(MYF(MY_WME),
+ &m_dbnam, m_dblen + 1,
+ &m_tblnam, m_tbllen + 1,
+ &m_coltype, m_colcnt,
+ NULL);
+
+ if (m_memory)
+ {
+ /* Copy the different parts into their memory */
+ strncpy(const_cast<char*>(m_dbnam), ptr_dblen + 1, m_dblen + 1);
+ strncpy(const_cast<char*>(m_tblnam), ptr_tbllen + 1, m_tbllen + 1);
+ memcpy(m_coltype, ptr_after_colcnt, m_colcnt);
+ }
+
+ DBUG_VOID_RETURN;
+}
+#endif
+
+Table_map_log_event::~Table_map_log_event()
+{
+ my_free(m_memory, MYF(MY_ALLOW_ZERO_PTR));
+}
+
+/*
+ Find a table based on database name and table name.
+
+ DESCRIPTION
+
+ Currently, only the first table of the 'table_list' is located. If the
+ table is found in the list of open tables for the thread, the 'table'
+ field of 'table_list' is filled in.
+
+ PARAMETERS
+
+ thd Thread structure
+ table_list List of tables to locate in the thd->open_tables list.
+ count Pointer to a variable that will be set to the number of
+ tables found. If the pointer is NULL, nothing will be stored.
+
+ RETURN VALUE
+
+ The number of tables found.
+
+ TO DO
+
+ Replace the list of table searches with a hash based on the combined
+ database and table name. The handler_tables_hash is inappropriate since
+ it hashes on the table alias. At the same time, the function can be
+ extended to handle a full list of table names, in the same spirit as
+ open_tables() and lock_tables().
+*/
+#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION)
+static uint find_tables(THD *thd, TABLE_LIST *table_list, uint *count)
+{
+ uint result= 0;
+
+ /* we verify that the caller knows our limitation */
+ DBUG_ASSERT(table_list->next_global == 0);
+ for (TABLE *table= thd->open_tables; table ; table= table->next)
+ {
+ if (strcmp(table->s->db.str, table_list->db) == 0
+ && strcmp(table->s->table_name.str, table_list->table_name) == 0)
+ {
+ /* Copy the table pointer into the table list. */
+ table_list->table= table;
+ result= 1;
+ break;
+ }
+ }
+
+ if (count)
+ *count= result;
+ return result;
+}
+#endif
+
+/*
+ Return value is an error code, one of:
+
+ -1 Failure to open table [from open_tables()]
+ 0 Success
+ 1 No room for more tables [from set_table()]
+ 2 Out of memory [from set_table()]
+ 3 Wrong table definition
+ 4 Daisy-chaining RBR with SBR not possible
+ */
+
+#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION)
+int Table_map_log_event::exec_event(st_relay_log_info *rli)
+{
+ DBUG_ENTER("Table_map_log_event::exec_event(st_relay_log_info*)");
+
+ DBUG_ASSERT(rli->sql_thd == thd);
+
+ /* Step the query id to mark what columns that are actually used. */
+ pthread_mutex_lock(&LOCK_thread_count);
+ thd->query_id= next_query_id();
+ pthread_mutex_unlock(&LOCK_thread_count);
+
+ TABLE_LIST table_list;
+ uint32 dummy_len;
+ bzero(&table_list, sizeof(table_list));
+ table_list.db= const_cast<char *>
+ (rpl_filter->get_rewrite_db(m_dbnam, &dummy_len));
+ table_list.alias= table_list.table_name= const_cast<char*>(m_tblnam);
+ table_list.lock_type= TL_WRITE;
+ table_list.next_global= table_list.next_local= 0;
+ table_list.updating= 1;
+
+ int error= 0;
+
+ if (rpl_filter->db_ok(table_list.db) &&
+ (!rpl_filter->is_on() || rpl_filter->tables_ok("", &table_list)))
+ {
+ /*
+ Check if the slave is set to use SBR. If so, the slave should
+ stop immediately since it is not possible to daisy-chain from
+ RBR to SBR. Once RBR is used, the rest of the chain has to use
+ RBR.
+ */
+ if (mysql_bin_log.is_open() && (thd->options & OPTION_BIN_LOG) &&
+ !binlog_row_based)
+ {
+ slave_print_msg(ERROR_LEVEL, rli, ER_BINLOG_ROW_RBR_TO_SBR,
+ "It is not possible to use statement-based binlogging "
+ "on a slave that replicates row-based. Please use "
+ "--binrow-format=row on slave if you want to use "
+ "--log-slave-updates and read row-based binlog events.");
+ DBUG_RETURN(ERR_RBR_TO_SBR);
+ }
+
+ /*
+ Open the table if it is not already open and add the table to table map.
+ If the table should not be replicated, we don't bother to do anything.
+ The table map will return NULL and the row-level event will effectively
+ be a no-op.
+ */
+ uint count;
+ if (find_tables(thd, &table_list, &count) == 0)
+ {
+ /*
+ open_tables() reads the contents of thd->lex, so they must be
+ initialized, so we should call lex_start(); to be even safer, we call
+ mysql_init_query() which does a more complete set of inits.
+ */
+ mysql_init_query(thd, NULL, 0);
+ TABLE_LIST *tables= &table_list;
+ if ((error= open_tables(thd, &tables, &count, 0)))
+ {
+ if (thd->query_error || thd->is_fatal_error)
+ {
+ /*
+ Error reporting borrowed from Query_log_event with many excessive
+ simplifications (we don't honour --slave-skip-errors)
+ */
+ uint actual_error= thd->net.last_errno;
+ slave_print_msg(ERROR_LEVEL, rli, actual_error,
+ "Error '%s' on opening table `%s`.`%s`",
+ (actual_error ? thd->net.last_error :
+ "unexpected success or fatal error"),
+ table_list.db, table_list.table_name);
+ thd->query_error= 1;
+ }
+ DBUG_RETURN(error);
+ }
+ }
+
+ m_table= table_list.table;
+
+ /*
+ This will fail later otherwise, the 'in_use' field should be
+ set to the current thread.
+ */
+ DBUG_ASSERT(m_table->in_use);
+
+ /*
+ Check that the number of columns and the field types in the
+ event match the number of columns and field types in the opened
+ table.
+ */
+ uint col= m_table->s->fields;
+
+ if (col == m_colcnt)
+ {
+ while (col-- > 0)
+ if (m_table->field[col]->type() != m_coltype[col])
+ break;
+ }
+
+ TABLE_SHARE const *const tsh= m_table->s;
+
+ /*
+ Check the following termination conditions:
+
+ (col == m_table->s->fields)
+ ==> (m_table->s->fields != m_colcnt)
+ (0 <= col < m_table->s->fields)
+ ==> (m_table->field[col]->type() != m_coltype[col])
+
+ Logically, A ==> B is equivalent to !A || B
+
+ Since col is unsigned, is suffices to check that col <=
+ tsh->fields. If col wrapped (by decreasing col when it is 0),
+ the number will be UINT_MAX, which is greater than tsh->fields.
+ */
+ DBUG_ASSERT(!(col == tsh->fields) || tsh->fields != m_colcnt);
+ DBUG_ASSERT(!(col < tsh->fields) ||
+ (m_table->field[col]->type() != m_coltype[col]));
+
+ if (col <= tsh->fields)
+ {
+ /*
+ If we get here, the number of columns in the event didn't
+ match the number of columns in the table on the slave, *or*
+ there were a column in the table on the slave that did not
+ have the same type as given in the event.
+
+ If 'col' has the value that was assigned to it, it was a
+ mismatch between the number of columns on the master and the
+ slave.
+ */
+ if (col == tsh->fields)
+ {
+ DBUG_ASSERT(tsh->db.str && tsh->table_name.str);
+ slave_print_msg(ERROR_LEVEL, rli, ER_BINLOG_ROW_WRONG_TABLE_DEF,
+ "Table width mismatch - "
+ "received %u columns, %s.%s has %u columns",
+ m_colcnt, tsh->db.str, tsh->table_name.str, tsh->fields);
+ }
+ else
+ {
+ DBUG_ASSERT(col < m_colcnt && col < tsh->fields);
+ DBUG_ASSERT(tsh->db.str && tsh->table_name.str);
+ slave_print_msg(ERROR_LEVEL, rli, ER_BINLOG_ROW_WRONG_TABLE_DEF,
+ "Column %d type mismatch - "
+ "received type %d, %s.%s has type %d",
+ col, m_coltype[col], tsh->db.str, tsh->table_name.str,
+ m_table->field[col]->type());
+ }
+
+ thd->query_error= 1;
+ DBUG_RETURN(ERR_BAD_TABLE_DEF);
+ }
+
+ /*
+ We record in the slave's information that the number m_table_id is
+ mapped to the m_table object
+ */
+ if (!error)
+ error= rli->m_table_map.set_table(m_table_id, m_table);
+
+ /*
+ Tell the RLI that we are touching a table.
+
+ TODO: Maybe we can combine this with the previous operation?
+ */
+ if (!error)
+ rli->touching_table(m_dbnam, m_tblnam, m_table_id);
+ }
+
+ /*
+ We explicitly do not call Log_event::exec_event() here since we do not
+ want the relay log position to be flushed to disk. The flushing will be
+ done by the last Rows_log_event that either ends a statement (outside a
+ transaction) or a transaction.
+
+ A table map event can *never* end a transaction or a statement, so we
+ just step the relay log position.
+ */
+
+ if (likely(!error))
+ rli->inc_event_relay_log_pos();
+
+ DBUG_RETURN(error);
+}
+#endif /* !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) */
+
+#ifndef MYSQL_CLIENT
+bool Table_map_log_event::write_data_header(IO_CACHE *file)
+{
+ DBUG_ASSERT(m_table_id != ULONG_MAX);
+ byte buf[TABLE_MAP_HEADER_LEN];
+ DBUG_EXECUTE_IF("old_row_based_repl_4_byte_map_id_master",
+ {
+ int4store(buf + 0, m_table_id);
+ int2store(buf + 4, m_flags);
+ return (my_b_safe_write(file, buf, 6));
+ });
+ int6store(buf + TM_MAPID_OFFSET, (ulonglong)m_table_id);
+ int2store(buf + TM_FLAGS_OFFSET, m_flags);
+ return (my_b_safe_write(file, buf, TABLE_MAP_HEADER_LEN));
+}
+
+bool Table_map_log_event::write_data_body(IO_CACHE *file)
+{
+ DBUG_ASSERT(m_dbnam != NULL);
+ DBUG_ASSERT(m_tblnam != NULL);
+ /* We use only one byte per length for storage in event: */
+ DBUG_ASSERT(m_dblen < 128);
+ DBUG_ASSERT(m_tbllen < 128);
+
+ byte const dbuf[]= { m_dblen };
+ byte const tbuf[]= { m_tbllen };
+
+ byte cbuf[my_vle_sizeof(m_colcnt)];
+ byte *const cbuf_end= my_vle_encode(cbuf, sizeof(cbuf), m_colcnt);
+ DBUG_ASSERT(static_cast<my_size_t>(cbuf_end - cbuf) <= sizeof(cbuf));
+
+ return (my_b_safe_write(file, dbuf, sizeof(dbuf)) ||
+ my_b_safe_write(file, m_dbnam, m_dblen+1) ||
+ my_b_safe_write(file, tbuf, sizeof(tbuf)) ||
+ my_b_safe_write(file, m_tblnam, m_tbllen+1) ||
+ my_b_safe_write(file, cbuf, cbuf_end - cbuf) ||
+ my_b_safe_write(file, reinterpret_cast<char*>(m_coltype), m_colcnt));
+ }
+#endif
+
+#if defined(HAVE_REPLICATION) && !defined(MYSQL_CLIENT)
+
+/*
+ Print some useful information for the SHOW BINARY LOG information
+ field.
+ */
+
+void Table_map_log_event::pack_info(Protocol *protocol)
+{
+ char buf[256];
+ my_size_t bytes= snprintf(buf, sizeof(buf), "%s.%s", m_dbnam, m_tblnam);
+ protocol->store(buf, bytes, &my_charset_bin);
+}
+
+#endif
+
+
+#ifdef MYSQL_CLIENT
+void Table_map_log_event::print(FILE *file, PRINT_EVENT_INFO *print_event_info)
+{
+ if (!print_event_info->short_form)
+ {
+ print_header(file, print_event_info);
+ fprintf(file, "\tTable_map: `%s`.`%s` mapped to number %lu\n",
+ m_dbnam, m_tblnam, m_table_id);
+ print_base64(file, print_event_info);
+ }
+}
+#endif
+
+/**************************************************************************
+ Write_rows_log_event member functions
+**************************************************************************/
+
+/*
+ Constructor used to build an event for writing to the binary log.
+ */
+#if !defined(MYSQL_CLIENT)
+Write_rows_log_event::Write_rows_log_event(THD *thd_arg, TABLE *tbl_arg,
+ ulong tid_arg,
+ MY_BITMAP const *cols,
+ bool is_transactional)
+ : Rows_log_event(thd_arg, tbl_arg, tid_arg, cols, is_transactional)
+{
+}
+#endif
+
+/*
+ Constructor used by slave to read the event from the binary log.
+ */
+#ifdef HAVE_REPLICATION
+Write_rows_log_event::Write_rows_log_event(const char *buf, uint event_len,
+ const Format_description_log_event
+ *description_event)
+: Rows_log_event(buf, event_len, WRITE_ROWS_EVENT, description_event)
+{
+}
+#endif
+
+#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION)
+int Write_rows_log_event::do_before_row_operations(TABLE *table)
+{
+ int error= 0;
+
+ /*
+ We are using REPLACE semantics and not INSERT IGNORE semantics
+ when writing rows, that is: new rows replace old rows. We need to
+ inform the storage engine that it should use this behaviour.
+ */
+
+ /* Tell the storage engine that we are using REPLACE semantics. */
+ thd->lex->duplicates= DUP_REPLACE;
+
+ /*
+ Pretend we're executing a REPLACE command: this is needed for
+ InnoDB and NDB Cluster since they are not (properly) checking the
+ lex->duplicates flag.
+ */
+ thd->lex->sql_command= SQLCOM_REPLACE;
+
+ table->file->extra(HA_EXTRA_IGNORE_DUP_KEY); // needed for ndbcluster
+ /*
+ TODO: the cluster team (Tomas?) says that it's better if the engine knows
+ how many rows are going to be inserted, then it can allocate needed memory
+ from the start.
+ */
+ table->file->start_bulk_insert(0);
+ /*
+ We need TIMESTAMP_NO_AUTO_SET otherwise ha_write_row() will not use fill
+ any TIMESTAMP column with data from the row but instead will use
+ the event's current time.
+ As we replicate from TIMESTAMP to TIMESTAMP and slave has no extra
+ columns, we know that all TIMESTAMP columns on slave will receive explicit
+ data from the row, so TIMESTAMP_NO_AUTO_SET is ok.
+ When we allow a table without TIMESTAMP to be replicated to a table having
+ more columns including a TIMESTAMP column, or when we allow a TIMESTAMP
+ column to be replicated into a BIGINT column and the slave's table has a
+ TIMESTAMP column, then the slave's TIMESTAMP column will take its value
+ from set_time() which we called earlier (consistent with SBR). And then in
+ some cases we won't want TIMESTAMP_NO_AUTO_SET (will require some code to
+ analyze if explicit data is provided for slave's TIMESTAMP columns).
+ */
+ table->timestamp_field_type= TIMESTAMP_NO_AUTO_SET;
+ return error;
+}
+
+int Write_rows_log_event::do_after_row_operations(TABLE *table, int error)
+{
+ if (error == 0)
+ error= table->file->end_bulk_insert();
+ return error;
+}
+
+char const *Write_rows_log_event::do_prepare_row(THD *thd, TABLE *table,
+ char const *row_start)
+{
+ char const *ptr= row_start;
+ DBUG_ASSERT(table != NULL);
+ /*
+ This assertion actually checks that there is at least as many
+ columns on the slave as on the master.
+ */
+ DBUG_ASSERT(table->s->fields >= m_width);
+ DBUG_ASSERT(ptr);
+ ptr= unpack_row(table, table->record[0], ptr, &m_cols);
+ return ptr;
+}
+
+/*
+ Check if there are more UNIQUE keys after the given key.
+*/
+static int
+last_uniq_key(TABLE *table, uint keyno)
+{
+ while (++keyno < table->s->keys)
+ if (table->key_info[keyno].flags & HA_NOSAME)
+ return 0;
+ return 1;
+}
+
+/* Anonymous namespace for template functions/classes */
+namespace {
+
+ /*
+ Smart pointer that will automatically call my_afree (a macro) when
+ the pointer goes out of scope. This is used so that I do not have
+ to remember to call my_afree() before each return. There is no
+ overhead associated with this, since all functions are inline.
+
+ I (Matz) would prefer to use the free function as a template
+ parameter, but that is not possible when the "function" is a
+ macro.
+ */
+ template <class Obj>
+ class auto_afree_ptr
+ {
+ Obj* m_ptr;
+ public:
+ auto_afree_ptr(Obj* ptr) : m_ptr(ptr) { }
+ ~auto_afree_ptr() { if (m_ptr) my_afree(m_ptr); }
+ void assign(Obj* ptr) {
+ /* Only to be called if it hasn't been given a value before. */
+ DBUG_ASSERT(m_ptr == NULL);
+ m_ptr= ptr;
+ }
+ Obj* get() { return m_ptr; }
+ };
+
+}
+
+
+/*
+ Replace the provided record in the database.
+
+ Similar to how it is done in <code>mysql_insert()</code>, we first
+ try to do a <code>ha_write_row()</code> and of that fails due to
+ duplicated keys (or indices), we do an <code>ha_update_row()</code>
+ or a <code>ha_delete_row()</code> instead.
+
+ @param thd Thread context for writing the record.
+ @param table Table to which record should be written.
+
+ @return Error code on failure, 0 on success.
+ */
+static int
+replace_record(THD *thd, TABLE *table)
+{
+ DBUG_ASSERT(table != NULL && thd != NULL);
+
+ int error;
+ int keynum;
+ auto_afree_ptr<char> key(NULL);
+
+ while ((error= table->file->ha_write_row(table->record[0])))
+ {
+ if ((keynum= table->file->get_dup_key(error)) < 0)
+ {
+ /* We failed to retrieve the duplicate key */
+ return HA_ERR_FOUND_DUPP_KEY;
+ }
+
+ /*
+ We need to retrieve the old row into record[1] to be able to
+ either update or delete the offending record. We either:
+
+ - use rnd_pos() with a row-id (available as dupp_row) to the
+ offending row, if that is possible (MyISAM and Blackhole), or else
+
+ - use index_read_idx() with the key that is duplicated, to
+ retrieve the offending row.
+ */
+ if (table->file->table_flags() & HA_DUPP_POS)
+ {
+ error= table->file->rnd_pos(table->record[1], table->file->dupp_ref);
+ if (error)
+ return error;
+ }
+ else
+ {
+ if (table->file->extra(HA_EXTRA_FLUSH_CACHE))
+ {
+ return my_errno;
+ }
+
+ if (key.get() == NULL)
+ {
+ key.assign(static_cast<char*>(my_alloca(table->s->max_unique_length)));
+ if (key.get() == NULL)
+ return ENOMEM;
+ }
+
+ key_copy(key.get(), table->record[0], table->key_info + keynum, 0);
+ error= table->file->index_read_idx(table->record[1], keynum, key.get(),
+ table->key_info[keynum].key_length,
+ HA_READ_KEY_EXACT);
+ if (error)
+ return error;
+ }
+
+ /*
+ Now, table->record[1] should contain the offending row. That
+ will enable us to update it or, alternatively, delete it (so
+ that we can insert the new row afterwards).
+
+ REPLACE is defined as either INSERT or DELETE + INSERT. If
+ possible, we can replace it with an UPDATE, but that will not
+ work on InnoDB if FOREIGN KEY checks are necessary.
+
+ I (Matz) am not sure of the reason for the last_uniq_key()
+ check as, but I'm guessing that it's something along the
+ following lines.
+
+ Suppose that we got the duplicate key to be a key that is not
+ the last unique key for the table and we perform an update:
+ then there might be another key for which the unique check will
+ fail, so we're better off just deleting the row and inserting
+ the correct row.
+ */
+ if (last_uniq_key(table, keynum) &&
+ !table->file->referenced_by_foreign_key())
+ {
+ error=table->file->ha_update_row(table->record[1],
+ table->record[0]);
+ return error;
+ }
+ else
+ {
+ if ((error= table->file->ha_delete_row(table->record[1])))
+ return error;
+ /* Will retry ha_write_row() with the offending row removed. */
+ }
+ }
+ return error;
+}
+
+int Write_rows_log_event::do_exec_row(TABLE *table)
+{
+ DBUG_ASSERT(table != NULL);
+ int error= replace_record(thd, table);
+ return error;
+}
+#endif /* !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) */
+
+#ifdef MYSQL_CLIENT
+void Write_rows_log_event::print(FILE *file, PRINT_EVENT_INFO* print_event_info)
+{
+ if (!print_event_info->short_form)
+ {
+ print_header(file, print_event_info);
+ fprintf(file, "\tWrite_rows: table id %lu", m_table_id);
+ print_base64(file, print_event_info);
+ }
+}
+#endif
+
+/**************************************************************************
+ Delete_rows_log_event member functions
+**************************************************************************/
+
+#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION)
+static int record_compare(TABLE *table, byte const *a, byte const *b)
+{
+ for (my_size_t i= 0 ; i < table->s->fields ; ++i)
+ {
+ uint const off= table->field[i]->offset();
+ uint const res= table->field[i]->cmp_binary(a + off, b + off);
+ if (res != 0) {
+ return res;
+ }
+ }
+ return 0;
+}
+
+
+/*
+ Find the row given by 'key', if the table has keys, or else use a table scan
+ to find (and fetch) the row. If the engine allows random access of the
+ records, a combination of position() and rnd_pos() will be used.
+
+ The 'record_buf' will be used as buffer for records while locating the
+ correct row.
+ */
+static int find_and_fetch_row(TABLE *table, byte *key, byte *record_buf)
+{
+ DBUG_ENTER("find_and_fetch_row(TABLE *table, byte *key, byte *record)");
+ DBUG_PRINT("enter", ("table=%p, key=%p, record=%p",
+ table, key, record_buf));
+
+ DBUG_ASSERT(table->in_use != NULL);
+
+ if ((table->file->table_flags() & HA_PRIMARY_KEY_ALLOW_RANDOM_ACCESS)
+ && table->s->primary_key < MAX_KEY)
+ {
+ /*
+ Use a more efficient method to fetch the record given by
+ table->record[0] if the engine allows it. We first compute a
+ row reference using the position() member function (it will be
+ stored in table->file->ref) and the use rnd_pos() to position
+ the "cursor" at the correct row.
+ */
+ table->file->position(table->record[0]);
+ DBUG_RETURN(table->file->rnd_pos(table->record[0], table->file->ref));
+ }
+
+ DBUG_ASSERT(record_buf);
+
+ if (table->s->keys > 0)
+ {
+ int error;
+ if ((error= table->file->index_read_idx(record_buf, 0, key,
+ table->key_info->key_length,
+ HA_READ_KEY_EXACT)))
+ {
+ table->file->print_error(error, MYF(0));
+ DBUG_RETURN(error);
+ }
+
+ /*
+ Below is a minor "optimization". If the key (i.e., key number
+ 0) has the HA_NOSAME flag set, we know that we have found the
+ correct record (since there can be no duplicates); otherwise, we
+ have to compare the record with the one found to see if it is
+ the correct one.
+
+ CAVEAT! This behaviour is essential for the replication of,
+ e.g., the mysql.proc table since the correct record *shall* be
+ found using the primary key *only*. There shall be no
+ comparison of non-PK columns to decide if the correct record is
+ found. I can see no scenario where it would be incorrect to
+ chose the row to change only using a PK or an UNNI.
+ */
+ if (table->key_info->flags & HA_NOSAME)
+ DBUG_RETURN(0);
+
+ while (record_compare(table, table->record[0], record_buf) != 0)
+ {
+ int error;
+ if ((error= table->file->index_next(record_buf)))
+ {
+ table->file->print_error(error, MYF(0));
+ DBUG_RETURN(error);
+ }
+ }
+ }
+ else
+ {
+ /* Continue until we find the right record or have made a full loop */
+ int restart_count= 0; // Number of times scanning has restarted from top
+ int error= 0;
+ do
+ {
+ error= table->file->rnd_next(record_buf);
+ switch (error)
+ {
+ case 0:
+ case HA_ERR_RECORD_DELETED:
+ break;
+
+ case HA_ERR_END_OF_FILE:
+ if (++restart_count < 2)
+ table->file->ha_rnd_init(1);
+ break;
+
+ default:
+ table->file->print_error(error, MYF(0));
+ DBUG_RETURN(error);
+ }
+ }
+ while (restart_count < 2 &&
+ record_compare(table, table->record[0], record_buf) != 0);
+
+ DBUG_ASSERT(error == HA_ERR_END_OF_FILE || error == 0);
+ DBUG_RETURN(error);
+ }
+
+ DBUG_RETURN(0);
+}
+#endif
+
+/*
+ Constructor used to build an event for writing to the binary log.
+ */
+
+#ifndef MYSQL_CLIENT
+Delete_rows_log_event::Delete_rows_log_event(THD *thd_arg, TABLE *tbl_arg,
+ ulong tid, MY_BITMAP const *cols,
+ bool is_transactional)
+ : Rows_log_event(thd_arg, tbl_arg, tid, cols, is_transactional)
+#ifdef HAVE_REPLICATION
+ ,m_memory(NULL), m_key(NULL), m_search_record(NULL)
+#endif
+{
+}
+#endif /* #if !defined(MYSQL_CLIENT) */
+
+/*
+ Constructor used by slave to read the event from the binary log.
+ */
+#ifdef HAVE_REPLICATION
+Delete_rows_log_event::Delete_rows_log_event(const char *buf, uint event_len,
+ const Format_description_log_event
+ *description_event)
+#if defined(MYSQL_CLIENT)
+ : Rows_log_event(buf, event_len, DELETE_ROWS_EVENT, description_event)
+#else
+ : Rows_log_event(buf, event_len, DELETE_ROWS_EVENT, description_event),
+ m_memory(NULL), m_key(NULL), m_search_record(NULL)
+#endif
+{
+}
+#endif
+
+#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION)
+int Delete_rows_log_event::do_before_row_operations(TABLE *table)
+{
+ DBUG_ASSERT(m_memory == NULL);
+
+ if ((table->file->table_flags() & HA_PRIMARY_KEY_ALLOW_RANDOM_ACCESS) &&
+ table->s->primary_key < MAX_KEY)
+ {
+ /*
+ We don't need to allocate any memory for m_search_record and
+ m_key since they are not used.
+ */
+ return 0;
+ }
+
+ int error= 0;
+
+ if (table->s->keys > 0)
+ {
+ m_memory=
+ my_multi_malloc(MYF(MY_WME),
+ &m_search_record, table->s->reclength,
+ &m_key, table->key_info->key_length,
+ NULL);
+ }
+ else
+ {
+ m_memory= m_search_record= my_malloc(table->s->reclength, MYF(MY_WME));
+ m_key= NULL;
+ }
+ if (!m_memory)
+ return HA_ERR_OUT_OF_MEM;
+
+ if (table->s->keys > 0)
+ {
+ /* We have a key: search the table using the index */
+ if (!table->file->inited)
+ error= table->file->ha_index_init(0, FALSE);
+ }
+ else
+ {
+ /* We doesn't have a key: search the table using rnd_next() */
+ error= table->file->ha_rnd_init(1);
+ }
+
+ return error;
+}
+
+int Delete_rows_log_event::do_after_row_operations(TABLE *table, int error)
+{
+ /*error= ToDo:find out what this should really be, this triggers close_scan in nbd, returning error?*/
+ table->file->ha_index_or_rnd_end();
+ my_free(m_memory, MYF(MY_ALLOW_ZERO_PTR)); // Free for multi_malloc
+ m_memory= m_search_record= m_key= NULL;
+
+ return error;
+}
+
+char const *Delete_rows_log_event::do_prepare_row(THD *thd, TABLE *table,
+ char const *row_start)
+{
+ char const *ptr= row_start;
+ DBUG_ASSERT(ptr);
+ /*
+ This assertion actually checks that there is at least as many
+ columns on the slave as on the master.
+ */
+ DBUG_ASSERT(table->s->fields >= m_width);
+
+ DBUG_ASSERT(ptr != NULL);
+ ptr= unpack_row(table, table->record[0], ptr, &m_cols);
+
+ /*
+ If we will access rows using the random access method, m_key will
+ be set to NULL, so we do not need to make a key copy in that case.
+ */
+ if (m_key)
+ {
+ KEY *const key_info= table->key_info;
+
+ key_copy(m_key, table->record[0], key_info, 0);
+ }
+
+ return ptr;
+}
+
+int Delete_rows_log_event::do_exec_row(TABLE *table)
+{
+ DBUG_ASSERT(table != NULL);
+
+ int error= find_and_fetch_row(table, m_key, m_search_record);
+ if (error)
+ return error;
+
+ /*
+ Now we should have the right row to delete. We are using
+ record[0] since it is guaranteed to point to a record with the
+ correct value.
+ */
+ error= table->file->ha_delete_row(table->record[0]);
+
+ return error;
+}
+
+#endif /* !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) */
+
+#ifdef MYSQL_CLIENT
+void Delete_rows_log_event::print(FILE *file,
+ PRINT_EVENT_INFO* print_event_info)
+{
+ if (!print_event_info->short_form)
+ {
+ print_header(file, print_event_info);
+ fprintf(file, "\tDelete_rows: table id %lu", m_table_id);
+ print_base64(file, print_event_info);
+ }
+}
+#endif
+
+
+/**************************************************************************
+ Update_rows_log_event member functions
+**************************************************************************/
+
+/*
+ Constructor used to build an event for writing to the binary log.
+ */
+#if !defined(MYSQL_CLIENT)
+Update_rows_log_event::Update_rows_log_event(THD *thd_arg, TABLE *tbl_arg,
+ ulong tid, MY_BITMAP const *cols,
+ bool is_transactional)
+: Rows_log_event(thd_arg, tbl_arg, tid, cols, is_transactional)
+#ifdef HAVE_REPLICATION
+ , m_memory(NULL), m_key(NULL)
+#endif
+{
+}
+#endif /* !defined(MYSQL_CLIENT) */
+
+/*
+ Constructor used by slave to read the event from the binary log.
+ */
+#ifdef HAVE_REPLICATION
+Update_rows_log_event::Update_rows_log_event(const char *buf, uint event_len,
+ const
+ Format_description_log_event
+ *description_event)
+#if defined(MYSQL_CLIENT)
+ : Rows_log_event(buf, event_len, UPDATE_ROWS_EVENT, description_event)
+#else
+ : Rows_log_event(buf, event_len, UPDATE_ROWS_EVENT, description_event),
+ m_memory(NULL), m_key(NULL)
+#endif
+{
+}
+#endif
+
+#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION)
+int Update_rows_log_event::do_before_row_operations(TABLE *table)
+{
+ DBUG_ASSERT(m_memory == NULL);
+
+ if ((table->file->table_flags() & HA_PRIMARY_KEY_ALLOW_RANDOM_ACCESS) &&
+ table->s->primary_key < MAX_KEY)
+ {
+ /*
+ We don't need to allocate any memory for m_search_record and
+ m_key since they are not used.
+ */
+ return 0;
+ }
+
+ int error= 0;
+
+ if (table->s->keys > 0)
+ {
+ m_memory=
+ my_multi_malloc(MYF(MY_WME),
+ &m_search_record, table->s->reclength,
+ &m_key, table->key_info->key_length,
+ NULL);
+ }
+ else
+ {
+ m_memory= m_search_record= my_malloc(table->s->reclength, MYF(MY_WME));
+ m_key= NULL;
+ }
+ if (!m_memory)
+ return HA_ERR_OUT_OF_MEM;
+
+ if (table->s->keys > 0)
+ {
+ /* We have a key: search the table using the index */
+ if (!table->file->inited)
+ error= table->file->ha_index_init(0, FALSE);
+ }
+ else
+ {
+ /* We doesn't have a key: search the table using rnd_next() */
+ error= table->file->ha_rnd_init(1);
+ }
+ table->timestamp_field_type= TIMESTAMP_NO_AUTO_SET;
+
+ return error;
+}
+
+int Update_rows_log_event::do_after_row_operations(TABLE *table, int error)
+{
+ /*error= ToDo:find out what this should really be, this triggers close_scan in nbd, returning error?*/
+ table->file->ha_index_or_rnd_end();
+ my_free(m_memory, MYF(MY_ALLOW_ZERO_PTR));
+ m_memory= m_search_record= m_key= NULL;
+
+ return error;
+}
+
+char const *Update_rows_log_event::do_prepare_row(THD *thd, TABLE *table,
+ char const *row_start)
+{
+ char const *ptr= row_start;
+ DBUG_ASSERT(ptr);
+ /*
+ This assertion actually checks that there is at least as many
+ columns on the slave as on the master.
+ */
+ DBUG_ASSERT(table->s->fields >= m_width);
+
+ /* record[0] is the before image for the update */
+ ptr= unpack_row(table, table->record[0], ptr, &m_cols);
+ DBUG_ASSERT(ptr != NULL);
+ /* record[1] is the after image for the update */
+ ptr= unpack_row(table, table->record[1], ptr, &m_cols);
+
+ /*
+ If we will access rows using the random access method, m_key will
+ be set to NULL, so we do not need to make a key copy in that case.
+ */
+ if (m_key)
+ {
+ KEY *const key_info= table->key_info;
+
+ key_copy(m_key, table->record[0], key_info, 0);
+ }
+
+ return ptr;
+}
+
+int Update_rows_log_event::do_exec_row(TABLE *table)
+{
+ DBUG_ASSERT(table != NULL);
+
+ int error= find_and_fetch_row(table, m_key, m_search_record);
+ if (error)
+ return error;
+
+ /*
+ Now we should have the right row to update. The record that has
+ been fetched is guaranteed to be in record[0], so we use that.
+ */
+ error= table->file->ha_update_row(table->record[0], table->record[1]);
+
+ return error;
+}
+#endif /* !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) */
+
+#ifdef MYSQL_CLIENT
+void Update_rows_log_event::print(FILE *file,
+ PRINT_EVENT_INFO* print_event_info)
+{
+ if (!print_event_info->short_form)
+ {
+ print_header(file, print_event_info);
+ fprintf(file, "\tUpdate_rows: table id %lu", m_table_id);
+ print_base64(file, print_event_info);
+ }
+}
+#endif
+
+#endif /* defined(HAVE_ROW_BASED_REPLICATION) */
diff --git a/sql/log_event.h b/sql/log_event.h
index 7783a97f03f..5d58a204ec9 100644
--- a/sql/log_event.h
+++ b/sql/log_event.h
@@ -26,6 +26,16 @@
#pragma interface /* gcc class implementation */
#endif
+#include <my_bitmap.h>
+
+#if !defined(MYSQL_CLIENT)
+#ifdef HAVE_ROW_BASED_REPLICATION
+extern my_bool binlog_row_based;
+#else
+extern const my_bool binlog_row_based;
+#endif
+#endif
+
#define LOG_READ_EOF -1
#define LOG_READ_BOGUS -2
#define LOG_READ_IO -3
@@ -196,6 +206,8 @@ struct sql_ex_info
#define EXEC_LOAD_HEADER_LEN 4
#define DELETE_FILE_HEADER_LEN 4
#define FORMAT_DESCRIPTION_HEADER_LEN (START_V3_HEADER_LEN+1+LOG_EVENT_TYPES)
+#define ROWS_HEADER_LEN 8
+#define TABLE_MAP_HEADER_LEN 8
#define EXECUTE_LOAD_QUERY_EXTRA_HEADER_LEN (4 + 4 + 4 + 1)
#define EXECUTE_LOAD_QUERY_HEADER_LEN (QUERY_HEADER_LEN + EXECUTE_LOAD_QUERY_EXTRA_HEADER_LEN)
@@ -302,6 +314,14 @@ struct sql_ex_info
/* DF = "Delete File" */
#define DF_FILE_ID_OFFSET 0
+/* TM = "Table Map" */
+#define TM_MAPID_OFFSET 0
+#define TM_FLAGS_OFFSET 6
+
+/* RW = "RoWs" */
+#define RW_MAPID_OFFSET 0
+#define RW_FLAGS_OFFSET 6
+
/* ELQ = "Execute Load Query" */
#define ELQ_FILE_ID_OFFSET QUERY_HEADER_LEN
#define ELQ_FN_POS_START_OFFSET ELQ_FILE_ID_OFFSET + 4
@@ -373,6 +393,12 @@ struct sql_ex_info
#define LOG_EVENT_SUPPRESS_USE_F 0x8
/*
+ The table map version internal to the log should be increased after
+ the event has been written to the binary log.
+ */
+#define LOG_EVENT_UPDATE_TABLE_MAP_VERSION_F 0x10
+
+/*
OPTIONS_WRITTEN_TO_BIN_LOG are the bits of thd->options which must be
written to the binlog. OPTIONS_WRITTEN_TO_BINLOG could be written
into the Format_description_log_event, so that if later we don't want
@@ -427,6 +453,10 @@ enum Log_event_type
XID_EVENT= 16,
BEGIN_LOAD_QUERY_EVENT= 17,
EXECUTE_LOAD_QUERY_EVENT= 18,
+ TABLE_MAP_EVENT = 19,
+ WRITE_ROWS_EVENT = 20,
+ UPDATE_ROWS_EVENT = 21,
+ DELETE_ROWS_EVENT = 22,
/*
Add new events here - right above this comment!
@@ -504,6 +534,7 @@ typedef struct st_print_event_info
/* Settings on how to print the events */
bool short_form;
+ bool base64_output;
my_off_t hexdump_from;
uint8 common_header_len;
@@ -616,9 +647,10 @@ public:
static Log_event* read_log_event(IO_CACHE* file,
const Format_description_log_event *description_event);
/* print*() functions are used by mysqlbinlog */
- virtual void print(FILE* file, PRINT_EVENT_INFO* print_event_info= 0) = 0;
+ virtual void print(FILE* file, PRINT_EVENT_INFO* print_event_info) = 0;
void print_timestamp(FILE* file, time_t *ts = 0);
- void print_header(FILE* file, PRINT_EVENT_INFO* print_event_info= 0);
+ void print_header(FILE* file, PRINT_EVENT_INFO* print_event_info);
+ void print_base64(FILE* file, PRINT_EVENT_INFO* print_event_info);
#endif
static void *operator new(size_t size)
@@ -646,7 +678,7 @@ public:
virtual Log_event_type get_type_code() = 0;
virtual bool is_valid() const = 0;
virtual bool is_artificial_event() { return 0; }
- inline bool get_cache_stmt() { return cache_stmt; }
+ inline bool get_cache_stmt() const { return cache_stmt; }
Log_event(const char* buf, const Format_description_log_event* description_event);
virtual ~Log_event() { free_temp_buf();}
void register_temp_buf(char* buf) { temp_buf = buf; }
@@ -778,8 +810,8 @@ public:
uint32 q_len_arg);
#endif /* HAVE_REPLICATION */
#else
- void print_query_header(FILE* file, PRINT_EVENT_INFO* print_event_info= 0);
- void print(FILE* file, PRINT_EVENT_INFO* print_event_info= 0);
+ void print_query_header(FILE* file, PRINT_EVENT_INFO* print_event_info);
+ void print(FILE* file, PRINT_EVENT_INFO* print_event_info);
#endif
Query_log_event(const char* buf, uint event_len,
@@ -833,7 +865,7 @@ public:
void pack_info(Protocol* protocol);
int exec_event(struct st_relay_log_info* rli);
#else
- void print(FILE* file, PRINT_EVENT_INFO* print_event_info= 0);
+ void print(FILE* file, PRINT_EVENT_INFO* print_event_info);
#endif
Slave_log_event(const char* buf, uint event_len);
@@ -921,7 +953,7 @@ public:
bool use_rli_only_for_errors);
#endif /* HAVE_REPLICATION */
#else
- void print(FILE* file, PRINT_EVENT_INFO* print_event_info = 0);
+ void print(FILE* file, PRINT_EVENT_INFO* print_event_info);
void print(FILE* file, PRINT_EVENT_INFO* print_event_info, bool commented);
#endif
@@ -1011,7 +1043,7 @@ public:
#endif /* HAVE_REPLICATION */
#else
Start_log_event_v3() {}
- void print(FILE* file, PRINT_EVENT_INFO* print_event_info= 0);
+ void print(FILE* file, PRINT_EVENT_INFO* print_event_info);
#endif
Start_log_event_v3(const char* buf,
@@ -1106,7 +1138,7 @@ public:
int exec_event(struct st_relay_log_info* rli);
#endif /* HAVE_REPLICATION */
#else
- void print(FILE* file, PRINT_EVENT_INFO* print_event_info= 0);
+ void print(FILE* file, PRINT_EVENT_INFO* print_event_info);
#endif
Intvar_log_event(const char* buf, const Format_description_log_event* description_event);
@@ -1147,7 +1179,7 @@ class Rand_log_event: public Log_event
int exec_event(struct st_relay_log_info* rli);
#endif /* HAVE_REPLICATION */
#else
- void print(FILE* file, PRINT_EVENT_INFO* print_event_info= 0);
+ void print(FILE* file, PRINT_EVENT_INFO* print_event_info);
#endif
Rand_log_event(const char* buf, const Format_description_log_event* description_event);
@@ -1184,7 +1216,7 @@ class Xid_log_event: public Log_event
int exec_event(struct st_relay_log_info* rli);
#endif /* HAVE_REPLICATION */
#else
- void print(FILE* file, PRINT_EVENT_INFO* print_event_info= 0);
+ void print(FILE* file, PRINT_EVENT_INFO* print_event_info);
#endif
Xid_log_event(const char* buf, const Format_description_log_event* description_event);
@@ -1195,6 +1227,9 @@ class Xid_log_event: public Log_event
bool write(IO_CACHE* file);
#endif
bool is_valid() const { return 1; }
+#if !defined(DBUG_OFF) && !defined(MYSQL_CLIENT)
+ static my_bool show_xid;
+#endif
};
/*****************************************************************************
@@ -1226,7 +1261,7 @@ public:
void pack_info(Protocol* protocol);
int exec_event(struct st_relay_log_info* rli);
#else
- void print(FILE* file, PRINT_EVENT_INFO* print_event_info= 0);
+ void print(FILE* file, PRINT_EVENT_INFO* print_event_info);
#endif
User_var_log_event(const char* buf, const Format_description_log_event* description_event);
@@ -1252,7 +1287,7 @@ public:
{}
int exec_event(struct st_relay_log_info* rli);
#else
- void print(FILE* file, PRINT_EVENT_INFO* print_event_info= 0);
+ void print(FILE* file, PRINT_EVENT_INFO* print_event_info);
#endif
Stop_log_event(const char* buf, const Format_description_log_event* description_event):
@@ -1282,7 +1317,7 @@ public:
uint ident_len;
uint flags;
#ifndef MYSQL_CLIENT
- Rotate_log_event(THD* thd_arg, const char* new_log_ident_arg,
+ Rotate_log_event(const char* new_log_ident_arg,
uint ident_len_arg,
ulonglong pos_arg, uint flags);
#ifdef HAVE_REPLICATION
@@ -1290,7 +1325,7 @@ public:
int exec_event(struct st_relay_log_info* rli);
#endif /* HAVE_REPLICATION */
#else
- void print(FILE* file, PRINT_EVENT_INFO* print_event_info= 0);
+ void print(FILE* file, PRINT_EVENT_INFO* print_event_info);
#endif
Rotate_log_event(const char* buf, uint event_len,
@@ -1343,7 +1378,7 @@ public:
int exec_event(struct st_relay_log_info* rli);
#endif /* HAVE_REPLICATION */
#else
- void print(FILE* file, PRINT_EVENT_INFO* print_event_info= 0);
+ void print(FILE* file, PRINT_EVENT_INFO* print_event_info);
void print(FILE* file, PRINT_EVENT_INFO* print_event_info, bool enable_local);
#endif
@@ -1411,7 +1446,7 @@ public:
virtual int get_create_or_append() const;
#endif /* HAVE_REPLICATION */
#else
- void print(FILE* file, PRINT_EVENT_INFO* print_event_info= 0);
+ void print(FILE* file, PRINT_EVENT_INFO* print_event_info);
#endif
Append_block_log_event(const char* buf, uint event_len,
@@ -1422,8 +1457,8 @@ public:
bool is_valid() const { return block != 0; }
#ifndef MYSQL_CLIENT
bool write(IO_CACHE* file);
-#endif
const char* get_db() { return db; }
+#endif
};
@@ -1446,7 +1481,7 @@ public:
int exec_event(struct st_relay_log_info* rli);
#endif /* HAVE_REPLICATION */
#else
- void print(FILE* file, PRINT_EVENT_INFO* print_event_info= 0);
+ void print(FILE* file, PRINT_EVENT_INFO* print_event_info);
void print(FILE* file, PRINT_EVENT_INFO* print_event_info, bool enable_local);
#endif
@@ -1458,8 +1493,8 @@ public:
bool is_valid() const { return file_id != 0; }
#ifndef MYSQL_CLIENT
bool write(IO_CACHE* file);
-#endif
const char* get_db() { return db; }
+#endif
};
@@ -1482,7 +1517,7 @@ public:
int exec_event(struct st_relay_log_info* rli);
#endif /* HAVE_REPLICATION */
#else
- void print(FILE* file, PRINT_EVENT_INFO* print_event_info= 0);
+ void print(FILE* file, PRINT_EVENT_INFO* print_event_info);
#endif
Execute_load_log_event(const char* buf, uint event_len,
@@ -1493,8 +1528,8 @@ public:
bool is_valid() const { return file_id != 0; }
#ifndef MYSQL_CLIENT
bool write(IO_CACHE* file);
-#endif
const char* get_db() { return db; }
+#endif
};
@@ -1567,7 +1602,7 @@ public:
int exec_event(struct st_relay_log_info* rli);
#endif /* HAVE_REPLICATION */
#else
- void print(FILE* file, PRINT_EVENT_INFO* print_event_info= 0);
+ void print(FILE* file, PRINT_EVENT_INFO* print_event_info);
/* Prints the query as LOAD DATA LOCAL and with rewritten filename */
void print(FILE* file, PRINT_EVENT_INFO* print_event_info,
const char *local_fname);
@@ -1599,10 +1634,523 @@ public:
Log_event(buf, description_event)
{}
~Unknown_log_event() {}
- void print(FILE* file, PRINT_EVENT_INFO* print_event_info= 0);
+ void print(FILE* file, PRINT_EVENT_INFO* print_event_info);
Log_event_type get_type_code() { return UNKNOWN_EVENT;}
bool is_valid() const { return 1; }
};
#endif
char *str_to_hex(char *to, const char *from, uint len);
+
+/*****************************************************************************
+
+ Table map log event class
+
+ Create a mapping from a (database name, table name) couple to a table
+ identifier (an integer number).
+
+ ****************************************************************************/
+
+class Table_map_log_event : public Log_event
+{
+public:
+ /* Constants */
+ enum
+ {
+ TYPE_CODE = TABLE_MAP_EVENT
+ };
+
+ enum enum_error
+ {
+ ERR_OPEN_FAILURE = -1, /* Failure to open table */
+ ERR_OK = 0, /* No error */
+ ERR_TABLE_LIMIT_EXCEEDED = 1, /* No more room for tables */
+ ERR_OUT_OF_MEM = 2, /* Out of memory */
+ ERR_BAD_TABLE_DEF = 3, /* Table definition does not match */
+ ERR_RBR_TO_SBR = 4 /* daisy-chanining RBR to SBR not allowed */
+ };
+
+ enum enum_flag
+ {
+ /*
+ Nothing here right now, but the flags support is there in
+ preparation for changes that are coming.
+ */
+ };
+
+ typedef uint16 flag_set;
+
+ /* Special constants representing sets of flags */
+ enum
+ {
+ NO_FLAGS = 0U
+ };
+
+ void set_flags(flag_set flag) { m_flags |= flag; }
+ void clear_flags(flag_set flag) { m_flags &= ~flag; }
+ flag_set get_flags(flag_set flag) const { return m_flags & flag; }
+
+#ifndef MYSQL_CLIENT
+ Table_map_log_event(THD *thd, TABLE *tbl, ulong tid,
+ bool is_transactional, uint16 flags);
+#endif
+#ifdef HAVE_REPLICATION
+ Table_map_log_event(const char *buf, uint event_len,
+ const Format_description_log_event *description_event);
+#endif
+
+ ~Table_map_log_event();
+
+ virtual Log_event_type get_type_code() { return TABLE_MAP_EVENT; }
+ virtual bool is_valid() const { return m_memory; /* we check malloc */ }
+
+ virtual int get_data_size() { return m_data_size; }
+#ifndef MYSQL_CLIENT
+ virtual bool write_data_header(IO_CACHE *file);
+ virtual bool write_data_body(IO_CACHE *file);
+ virtual const char *get_db() { return m_dbnam; }
+#endif
+
+#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION)
+ virtual int exec_event(struct st_relay_log_info *rli);
+ virtual void pack_info(Protocol *protocol);
+#endif
+
+#ifdef MYSQL_CLIENT
+ virtual void print(FILE *file, PRINT_EVENT_INFO *print_event_info);
+#endif
+
+
+private:
+#ifndef MYSQL_CLIENT
+ TABLE *m_table;
+#endif
+ char const *m_dbnam;
+ my_size_t m_dblen;
+ char const *m_tblnam;
+ my_size_t m_tbllen;
+ ulong m_colcnt;
+ unsigned char *m_coltype;
+
+ gptr m_memory;
+ ulong m_table_id;
+ flag_set m_flags;
+
+ my_size_t m_data_size;
+};
+
+
+/*****************************************************************************
+
+ Row level log event class.
+
+ Common base class for all row-level log events.
+
+ RESPONSIBILITIES
+
+ Encode the common parts of all events containing rows, which are:
+ - Write data header and data body to an IO_CACHE.
+ - Provide an interface for adding an individual row to the event.
+
+ ****************************************************************************/
+
+class Rows_log_event : public Log_event
+{
+public:
+ /*
+ These definitions allow you to combine the flags into an
+ appropriate flag set using the normal bitwise operators. The
+ implicit conversion from an enum-constant to an integer is
+ accepted by the compiler, which is then used to set the real set
+ of flags.
+ */
+
+ enum enum_flag
+ {
+ /* Last event of a statement */
+ STMT_END_F = (1U << 0),
+
+ /* Value of the OPTION_NO_FOREIGN_KEY_CHECKS flag in thd->options */
+ NO_FOREIGN_KEY_CHECKS_F = (1U << 1),
+
+ /* Value of the OPTION_RELAXED_UNIQUE_CHECKS flag in thd->options */
+ RELAXED_UNIQUE_CHECKS_F = (1U << 2)
+ };
+
+ typedef uint16 flag_set;
+
+ /* Special constants representing sets of flags */
+ enum
+ {
+ NO_FLAGS = 0U
+ };
+
+ virtual ~Rows_log_event();
+
+ void set_flags(flag_set flags) { m_flags |= flags; }
+ void clear_flags(flag_set flags) { m_flags &= ~flags; }
+ flag_set get_flags(flag_set flags) const { return m_flags & flags; }
+
+#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION)
+ virtual int exec_event(struct st_relay_log_info *rli);
+#ifdef DBUG_RBR
+ virtual void pack_info(Protocol *protocol);
+#endif
+#endif
+
+#ifdef MYSQL_CLIENT
+ /* not for direct call, each derived has its own ::print() */
+ virtual void print(FILE *file, PRINT_EVENT_INFO *print_event_info)= 0;
+#endif
+
+#ifndef MYSQL_CLIENT
+ int add_row_data(byte *data, my_size_t length)
+ {
+ return do_add_row_data(data,length);
+ }
+#endif
+
+ /* Member functions to implement superclass interface */
+ virtual int get_data_size()
+ {
+ DBUG_EXECUTE_IF("old_row_based_repl_4_byte_map_id_master",
+ return 6 + 1 + no_bytes_in_map(&m_cols) +
+ (m_rows_cur - m_rows_buf););
+ return ROWS_HEADER_LEN + 1 + no_bytes_in_map(&m_cols) +
+ (m_rows_cur - m_rows_buf);
+ }
+
+ MY_BITMAP const *get_cols() const { return &m_cols; }
+ my_size_t get_width() const { return m_width; }
+ ulong get_table_id() const { return m_table_id; }
+
+#ifndef MYSQL_CLIENT
+ virtual bool write_data_header(IO_CACHE *file);
+ virtual bool write_data_body(IO_CACHE *file);
+ virtual const char *get_db() { return m_table->s->db.str; }
+#endif
+ virtual bool is_valid() const
+ {
+ /* that's how we check malloc() succeeded */
+ return m_rows_buf && m_cols.bitmap;
+ }
+
+ /*
+ If there is no table map active for the event, write one to the
+ binary log.
+
+ LOCK_log has to be aquired before calling this function.
+
+ PARAMETERS
+ thd - Thread to use when writing the table map
+
+ RETURN VALUE
+ Error code, or zero if write succeeded.
+ */
+#if !defined(MYSQL_CLIENT) && defined(HAVE_ROW_BASED_REPLICATION)
+ int maybe_write_table_map(THD *thd, IO_CACHE *file, MYSQL_LOG *log) const
+ {
+ /*
+ N.B., get_cache_stmt() returns the value of 'using_trans' that
+ was provided to the constructor, i.e., get_cache_stmt() == true
+ if and only if the table is transactional.
+ */
+
+ int result= 0;
+ if (!log->is_table_mapped(m_table))
+ result= log->write_table_map(thd, file, m_table, get_cache_stmt());
+ return result;
+ }
+#endif
+
+protected:
+ /*
+ The constructors are protected since you're supposed to inherit
+ this class, not create instances of this class.
+ */
+#ifndef MYSQL_CLIENT
+ Rows_log_event(THD*, TABLE*, ulong table_id,
+ MY_BITMAP const *cols, bool is_transactional);
+#endif
+ Rows_log_event(const char *row_data, uint event_len,
+ Log_event_type event_type,
+ const Format_description_log_event *description_event);
+
+#ifndef MYSQL_CLIENT
+ virtual int do_add_row_data(byte *data, my_size_t length);
+#endif
+
+#ifndef MYSQL_CLIENT
+ TABLE *m_table; /* The table the rows belong to */
+#endif
+ ulong m_table_id; /* Table ID */
+ MY_BITMAP m_cols; /* Bitmap denoting columns available */
+ ulong m_width; /* The width of the columns bitmap */
+
+ /* Bit buffer in the same memory as the class */
+ uint32 m_bitbuf[128/(sizeof(uint32)*8)];
+
+ byte *m_rows_buf; /* The rows in packed format */
+ byte *m_rows_cur; /* One-after the end of the data */
+ byte *m_rows_end; /* One-after the end of the allocated space */
+
+ flag_set m_flags; /* Flags for row-level events */
+
+private:
+
+#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION)
+ /*
+ Primitive to prepare for a sequence of row executions.
+
+ DESCRIPTION
+
+ Before doing a sequence of do_prepare_row() and do_exec_row()
+ calls, this member function should be called to prepare for the
+ entire sequence. Typically, this member function will allocate
+ space for any buffers that are needed for the two member
+ functions mentioned above.
+
+ RETURN VALUE
+
+ The member function will return 0 if all went OK, or a non-zero
+ error code otherwise.
+ */
+ virtual int do_before_row_operations(TABLE *table) = 0;
+
+ /*
+ Primitive to clean up after a sequence of row executions.
+
+ DESCRIPTION
+
+ After doing a sequence of do_prepare_row() and do_exec_row(),
+ this member function should be called to clean up and release
+ any allocated buffers.
+ */
+ virtual int do_after_row_operations(TABLE *table, int error) = 0;
+
+ /*
+ Primitive to prepare for handling one row in a row-level event.
+
+ DESCRIPTION
+
+ The member function prepares for execution of operations needed for one
+ row in a row-level event by reading up data from the buffer containing
+ the row. No specific interpretation of the data is normally done here,
+ since SQL thread specific data is not available: that data is made
+ available for the do_exec function.
+
+ RETURN VALUE
+ A pointer to the start of the next row, or NULL if the preparation
+ failed. Currently, preparation cannot fail, but don't rely on this
+ behavior.
+ */
+ virtual char const *do_prepare_row(THD*, TABLE*, char const *row_start) = 0;
+
+ /*
+ Primitive to do the actual execution necessary for a row.
+
+ DESCRIPTION
+ The member function will do the actual execution needed to handle a row.
+
+ RETURN VALUE
+ 0 if execution succeeded, 1 if execution failed.
+
+ */
+ virtual int do_exec_row(TABLE *table) = 0;
+#endif /* !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) */
+};
+
+
+/*****************************************************************************
+
+ Write row log event class
+
+ Log row insertions and updates. The event contain several
+ insert/update rows for a table. Note that each event contains only
+ rows for one table.
+
+ ****************************************************************************/
+class Write_rows_log_event : public Rows_log_event
+{
+public:
+ enum
+ {
+ /* Support interface to THD::binlog_prepare_pending_rows_event */
+ TYPE_CODE = WRITE_ROWS_EVENT
+ };
+
+#if !defined(MYSQL_CLIENT)
+ Write_rows_log_event(THD*, TABLE*, ulong table_id,
+ MY_BITMAP const *cols, bool is_transactional);
+#endif
+#ifdef HAVE_REPLICATION
+ Write_rows_log_event(const char *buf, uint event_len,
+ const Format_description_log_event *description_event);
+#endif
+#if !defined(MYSQL_CLIENT) && defined(HAVE_ROW_BASED_REPLICATION)
+ static bool binlog_row_logging_function(THD *thd, TABLE *table,
+ bool is_transactional,
+ MY_BITMAP *cols,
+ uint fields,
+ const byte *before_record
+ __attribute__((unused)),
+ const byte *after_record)
+ {
+ return thd->binlog_write_row(table, is_transactional,
+ cols, fields, after_record);
+ }
+#endif
+
+private:
+ virtual Log_event_type get_type_code() { return (Log_event_type)TYPE_CODE; }
+
+#ifdef MYSQL_CLIENT
+ void print(FILE *file, PRINT_EVENT_INFO *print_event_info);
+#endif
+
+#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION)
+ gptr m_memory;
+ byte *m_search_record;
+
+ virtual int do_before_row_operations(TABLE *table);
+ virtual int do_after_row_operations(TABLE *table, int error);
+ virtual char const *do_prepare_row(THD*, TABLE*, char const *row_start);
+ virtual int do_exec_row(TABLE *table);
+#endif
+};
+
+
+/*****************************************************************************
+
+ Update rows log event class
+
+ Log row updates with a before image. The event contain several
+ update rows for a table. Note that each event contains only rows for
+ one table.
+
+ Also note that the row data consists of pairs of row data: one row
+ for the old data and one row for the new data.
+
+ ****************************************************************************/
+class Update_rows_log_event : public Rows_log_event
+{
+public:
+ enum
+ {
+ /* Support interface to THD::binlog_prepare_pending_rows_event */
+ TYPE_CODE = UPDATE_ROWS_EVENT
+ };
+
+#ifndef MYSQL_CLIENT
+ Update_rows_log_event(THD*, TABLE*, ulong table_id,
+ MY_BITMAP const *cols, bool is_transactional);
+#endif
+
+#ifdef HAVE_REPLICATION
+ Update_rows_log_event(const char *buf, uint event_len,
+ const Format_description_log_event *description_event);
+#endif
+
+#if !defined(MYSQL_CLIENT) && defined(HAVE_ROW_BASED_REPLICATION)
+ static bool binlog_row_logging_function(THD *thd, TABLE *table,
+ bool is_transactional,
+ MY_BITMAP *cols,
+ uint fields,
+ const byte *before_record,
+ const byte *after_record)
+ {
+ return thd->binlog_update_row(table, is_transactional,
+ cols, fields, before_record, after_record);
+ }
+#endif
+
+private:
+ virtual Log_event_type get_type_code() { return (Log_event_type)TYPE_CODE; }
+
+#ifdef MYSQL_CLIENT
+ void print(FILE *file, PRINT_EVENT_INFO *print_event_info);
+#endif
+
+#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION)
+ gptr m_memory;
+ byte *m_key;
+ byte *m_search_record;
+
+ virtual int do_before_row_operations(TABLE *table);
+ virtual int do_after_row_operations(TABLE *table, int error);
+ virtual char const *do_prepare_row(THD*, TABLE*, char const *row_start);
+ virtual int do_exec_row(TABLE *table);
+#endif /* !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) */
+};
+
+/*****************************************************************************
+
+ Delete rows log event class.
+
+ Log row deletions. The event contain several delete rows for a
+ table. Note that each event contains only rows for one table.
+
+ RESPONSIBILITIES
+
+ - Act as a container for rows that has been deleted on the master
+ and should be deleted on the slave.
+
+ COLLABORATION
+
+ Row_writer
+ Create the event and add rows to the event.
+ Row_reader
+ Extract the rows from the event.
+
+ ****************************************************************************/
+class Delete_rows_log_event : public Rows_log_event
+{
+public:
+ enum
+ {
+ /* Support interface to THD::binlog_prepare_pending_rows_event */
+ TYPE_CODE = DELETE_ROWS_EVENT
+ };
+
+#ifndef MYSQL_CLIENT
+ Delete_rows_log_event(THD*, TABLE*, ulong,
+ MY_BITMAP const *cols, bool is_transactional);
+#endif
+#ifdef HAVE_REPLICATION
+ Delete_rows_log_event(const char *buf, uint event_len,
+ const Format_description_log_event *description_event);
+#endif
+#if !defined(MYSQL_CLIENT) && defined(HAVE_ROW_BASED_REPLICATION)
+ static bool binlog_row_logging_function(THD *thd, TABLE *table,
+ bool is_transactional,
+ MY_BITMAP *cols,
+ uint fields,
+ const byte *before_record,
+ const byte *after_record
+ __attribute__((unused)))
+ {
+ return thd->binlog_delete_row(table, is_transactional,
+ cols, fields, before_record);
+ }
+#endif
+
+private:
+ virtual Log_event_type get_type_code() { return (Log_event_type)TYPE_CODE; }
+
+#ifdef MYSQL_CLIENT
+ void print(FILE *file, PRINT_EVENT_INFO *print_event_info);
+#endif
+
+#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION)
+ gptr m_memory;
+ byte *m_key;
+ byte *m_search_record;
+
+ virtual int do_before_row_operations(TABLE *table);
+ virtual int do_after_row_operations(TABLE *table, int error);
+ virtual char const *do_prepare_row(THD*, TABLE*, char const *row_start);
+ virtual int do_exec_row(TABLE *table);
+#endif
+};
+
+
#endif /* _log_event_h */
diff --git a/sql/mysql_priv.h b/sql/mysql_priv.h
index f18447de71c..48d1ea8e798 100644
--- a/sql/mysql_priv.h
+++ b/sql/mysql_priv.h
@@ -248,50 +248,50 @@ extern CHARSET_INFO *national_charset_info, *table_alias_charset;
TODO: separate three contexts above, move them to separate bitfields.
*/
-#define SELECT_DISTINCT (1L << 0) // SELECT, user
-#define SELECT_STRAIGHT_JOIN (1L << 1) // SELECT, user
-#define SELECT_DESCRIBE (1L << 2) // SELECT, user
-#define SELECT_SMALL_RESULT (1L << 3) // SELECT, user
-#define SELECT_BIG_RESULT (1L << 4) // SELECT, user
-#define OPTION_FOUND_ROWS (1L << 5) // SELECT, user
-#define OPTION_TO_QUERY_CACHE (1L << 6) // SELECT, user
-#define SELECT_NO_JOIN_CACHE (1L << 7) // intern
-#define OPTION_BIG_TABLES (1L << 8) // THD, user
-#define OPTION_BIG_SELECTS (1L << 9) // THD, user
-#define OPTION_LOG_OFF (1L << 10) // THD, user
-#define OPTION_UPDATE_LOG (1L << 11) // THD, user, unused
-#define TMP_TABLE_ALL_COLUMNS (1L << 12) // SELECT, intern
-#define OPTION_WARNINGS (1L << 13) // THD, user
-#define OPTION_AUTO_IS_NULL (1L << 14) // THD, user, binlog
-#define OPTION_FOUND_COMMENT (1L << 15) // SELECT, intern, parser
-#define OPTION_SAFE_UPDATES (1L << 16) // THD, user
-#define OPTION_BUFFER_RESULT (1L << 17) // SELECT, user
-#define OPTION_BIN_LOG (1L << 18) // THD, user
-#define OPTION_NOT_AUTOCOMMIT (1L << 19) // THD, user
-#define OPTION_BEGIN (1L << 20) // THD, intern
-#define OPTION_TABLE_LOCK (1L << 21) // THD, intern
-#define OPTION_QUICK (1L << 22) // SELECT (for DELETE)
-#define OPTION_QUOTE_SHOW_CREATE (1L << 23) // THD, user
+#define SELECT_DISTINCT (LL(1) << 0) // SELECT, user
+#define SELECT_STRAIGHT_JOIN (LL(1) << 1) // SELECT, user
+#define SELECT_DESCRIBE (LL(1) << 2) // SELECT, user
+#define SELECT_SMALL_RESULT (LL(1) << 3) // SELECT, user
+#define SELECT_BIG_RESULT (LL(1) << 4) // SELECT, user
+#define OPTION_FOUND_ROWS (LL(1) << 5) // SELECT, user
+#define OPTION_TO_QUERY_CACHE (LL(1) << 6) // SELECT, user
+#define SELECT_NO_JOIN_CACHE (LL(1) << 7) // intern
+#define OPTION_BIG_TABLES (LL(1) << 8) // THD, user
+#define OPTION_BIG_SELECTS (LL(1) << 9) // THD, user
+#define OPTION_LOG_OFF (LL(1) << 10) // THD, user
+#define OPTION_UPDATE_LOG (LL(1) << 11) // THD, user, unused
+#define TMP_TABLE_ALL_COLUMNS (LL(1) << 12) // SELECT, intern
+#define OPTION_WARNINGS (LL(1) << 13) // THD, user
+#define OPTION_AUTO_IS_NULL (LL(1) << 14) // THD, user, binlog
+#define OPTION_FOUND_COMMENT (LL(1) << 15) // SELECT, intern, parser
+#define OPTION_SAFE_UPDATES (LL(1) << 16) // THD, user
+#define OPTION_BUFFER_RESULT (LL(1) << 17) // SELECT, user
+#define OPTION_BIN_LOG (LL(1) << 18) // THD, user
+#define OPTION_NOT_AUTOCOMMIT (LL(1) << 19) // THD, user
+#define OPTION_BEGIN (LL(1) << 20) // THD, intern
+#define OPTION_TABLE_LOCK (LL(1) << 21) // THD, intern
+#define OPTION_QUICK (LL(1) << 22) // SELECT (for DELETE)
+#define OPTION_QUOTE_SHOW_CREATE (LL(1) << 23) // THD, user
/* Thr following is used to detect a conflict with DISTINCT
in the user query has requested */
-#define SELECT_ALL (1L << 24) // SELECT, user, parser
+#define SELECT_ALL (LL(1) << 24) // SELECT, user, parser
/* Set if we are updating a non-transaction safe table */
-#define OPTION_STATUS_NO_TRANS_UPDATE (1L << 25) // THD, intern
+#define OPTION_STATUS_NO_TRANS_UPDATE (LL(1) << 25) // THD, intern
/* The following can be set when importing tables in a 'wrong order'
to suppress foreign key checks */
-#define OPTION_NO_FOREIGN_KEY_CHECKS (1L << 26) // THD, user, binlog
+#define OPTION_NO_FOREIGN_KEY_CHECKS (LL(1) << 26) // THD, user, binlog
/* The following speeds up inserts to InnoDB tables by suppressing unique
key checks in some cases */
-#define OPTION_RELAXED_UNIQUE_CHECKS (1L << 27) // THD, user, binlog
-#define SELECT_NO_UNLOCK (1L << 28) // SELECT, intern
-#define OPTION_SCHEMA_TABLE (1L << 29) // SELECT, intern
+#define OPTION_RELAXED_UNIQUE_CHECKS (LL(1) << 27) // THD, user, binlog
+#define SELECT_NO_UNLOCK (LL(1) << 28) // SELECT, intern
+#define OPTION_SCHEMA_TABLE (LL(1) << 29) // SELECT, intern
/* Flag set if setup_tables already done */
-#define OPTION_SETUP_TABLES_DONE (1L << 30) // intern
+#define OPTION_SETUP_TABLES_DONE (LL(1) << 30) // intern
/* If not set then the thread will ignore all warnings with level notes. */
-#define OPTION_SQL_NOTES (1UL << 31) // THD, user
+#define OPTION_SQL_NOTES (LL(1) << 31) // THD, user
/*
Force the used temporary table to be a MyISAM table (because we will use
fulltext functions when reading from it.
@@ -600,6 +600,7 @@ bool mysql_create_db(THD *thd, char *db, HA_CREATE_INFO *create, bool silent);
bool mysql_alter_db(THD *thd, const char *db, HA_CREATE_INFO *create);
bool mysql_rm_db(THD *thd,char *db,bool if_exists, bool silent);
void mysql_binlog_send(THD* thd, char* log_ident, my_off_t pos, ushort flags);
+void mysql_client_binlog_statement(THD *thd);
bool mysql_rm_table(THD *thd,TABLE_LIST *tables, my_bool if_exists,
my_bool drop_temporary);
int mysql_rm_table_part2(THD *thd, TABLE_LIST *tables, bool if_exists,
@@ -1198,6 +1199,13 @@ extern ulong what_to_log,flush_time;
extern ulong query_buff_size, thread_stack;
extern ulong binlog_cache_size, max_binlog_cache_size, open_files_limit;
extern ulong max_binlog_size, max_relay_log_size;
+extern const char *opt_binlog_format;
+#ifdef HAVE_ROW_BASED_REPLICATION
+extern my_bool binlog_row_based;
+extern ulong opt_binlog_rows_event_max_size;
+#else
+extern const my_bool binlog_row_based;
+#endif
extern ulong rpl_recovery_rank, thread_cache_size;
extern ulong back_log;
extern ulong specialflag, current_pid;
@@ -1338,6 +1346,7 @@ extern handlerton myisammrg_hton;
extern handlerton heap_hton;
extern SHOW_COMP_OPTION have_isam;
+extern SHOW_COMP_OPTION have_row_based_replication;
extern SHOW_COMP_OPTION have_raid, have_openssl, have_symlink;
extern SHOW_COMP_OPTION have_query_cache;
extern SHOW_COMP_OPTION have_geometry, have_rtree_keys;
diff --git a/sql/mysqld.cc b/sql/mysqld.cc
index 20d09ae0228..2a51afbee8d 100644
--- a/sql/mysqld.cc
+++ b/sql/mysqld.cc
@@ -441,6 +441,33 @@ volatile bool mqh_used = 0;
my_bool opt_noacl;
my_bool sp_automatic_privileges= 1;
+#ifdef HAVE_ROW_BASED_REPLICATION
+/*
+ This variable below serves as an optimization for (opt_binlog_format ==
+ BF_ROW) as we need to do this test for every row. Stmt-based is default.
+*/
+my_bool binlog_row_based= FALSE;
+ulong opt_binlog_rows_event_max_size;
+const char *binlog_format_names[]= {"STATEMENT", "ROW", NullS};
+/*
+ Note that BF_UNSPECIFIED is last, after the end of binlog_format_names: it
+ has no corresponding cell in this array. We use this value to be able to
+ know if the user has explicitely specified a binlog format (then we require
+ also --log-bin) or not (then we fall back to statement-based).
+*/
+enum binlog_format { BF_STMT= 0, BF_ROW= 1, BF_UNSPECIFIED= 2 };
+#else
+const my_bool binlog_row_based= FALSE;
+const char *binlog_format_names[]= {"STATEMENT", NullS};
+enum binlog_format { BF_STMT= 0, BF_UNSPECIFIED= 2 };
+#endif
+
+TYPELIB binlog_format_typelib=
+ { array_elements(binlog_format_names)-1,"",
+ binlog_format_names, NULL };
+const char *opt_binlog_format= 0;
+enum binlog_format opt_binlog_format_id= BF_UNSPECIFIED;
+
#ifdef HAVE_INITGROUPS
static bool calling_initgroups= FALSE; /* Used in SIGSEGV handler. */
#endif
@@ -528,6 +555,7 @@ MY_BITMAP temp_pool;
CHARSET_INFO *system_charset_info, *files_charset_info ;
CHARSET_INFO *national_charset_info, *table_alias_charset;
+SHOW_COMP_OPTION have_row_based_replication;
SHOW_COMP_OPTION have_raid, have_openssl, have_symlink, have_query_cache;
SHOW_COMP_OPTION have_geometry, have_rtree_keys;
SHOW_COMP_OPTION have_crypt, have_compress;
@@ -3032,8 +3060,44 @@ with --log-bin instead.");
{
sql_print_warning("You need to use --log-bin to make "
"--log-slave-updates work.");
- unireg_abort(1);
+ unireg_abort(1);
+ }
+
+ if (!opt_bin_log && (opt_binlog_format_id != BF_UNSPECIFIED))
+ {
+ sql_print_warning("You need to use --log-bin to make "
+ "--binlog-format work.");
+ unireg_abort(1);
}
+ if (opt_binlog_format_id == BF_UNSPECIFIED)
+ {
+ /*
+ We use statement-based by default, but could change this to be row-based
+ if this is a cluster build (i.e. have_ndbcluster is true)...
+ */
+ opt_binlog_format_id= BF_STMT;
+ }
+#ifdef HAVE_ROW_BASED_REPLICATION
+ if (opt_binlog_format_id == BF_ROW)
+ {
+ binlog_row_based= TRUE;
+ /*
+ Row-based binlogging turns on InnoDB unsafe locking, because the locks
+ are not needed when using row-based binlogging. In fact
+ innodb-locks-unsafe-for-binlog is unsafe only for stmt-based, it's
+ safe for row-based.
+ */
+#ifdef HAVE_INNOBASE_DB
+ innobase_locks_unsafe_for_binlog= TRUE;
+#endif
+ /* Trust stored function creators because they can do no harm */
+ trust_function_creators= 1;
+ }
+#endif
+ /* Check that we have not let the format to unspecified at this point */
+ DBUG_ASSERT((uint)opt_binlog_format_id <=
+ array_elements(binlog_format_names)-1);
+ opt_binlog_format= binlog_format_names[opt_binlog_format_id];
if (opt_slow_log)
mysql_slow_log.open_slow_log(opt_slow_logname);
@@ -4504,6 +4568,13 @@ enum options_mysqld
OPT_SQL_BIN_UPDATE_SAME, OPT_REPLICATE_DO_DB,
OPT_REPLICATE_IGNORE_DB, OPT_LOG_SLAVE_UPDATES,
OPT_BINLOG_DO_DB, OPT_BINLOG_IGNORE_DB,
+ OPT_BINLOG_FORMAT,
+#ifndef DBUG_OFF
+ OPT_BINLOG_SHOW_XID,
+#endif
+#ifdef HAVE_ROW_BASED_REPLICATION
+ OPT_BINLOG_ROWS_EVENT_MAX_SIZE,
+#endif
OPT_WANT_CORE, OPT_CONCURRENT_INSERT,
OPT_MEMLOCK, OPT_MYISAM_RECOVER,
OPT_REPLICATE_REWRITE_DB, OPT_SERVER_ID,
@@ -4732,12 +4803,46 @@ Disable with --skip-bdb (will save memory).",
{"bind-address", OPT_BIND_ADDRESS, "IP address to bind to.",
(gptr*) &my_bind_addr_str, (gptr*) &my_bind_addr_str, 0, GET_STR,
REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
+ {"binlog-format", OPT_BINLOG_FORMAT,
+#ifdef HAVE_ROW_BASED_REPLICATION
+ "Tell the master the form of binary logging to use: either 'row' for "
+ "row-based binary logging (which automatically turns on "
+ "innodb_locks_unsafe_for_binlog as it is safe in this case), or "
+ "'statement' for statement-based logging. ",
+#else
+ "Tell the master the form of binary logging to use: this release build "
+ "supports only statement-based binary logging, so only 'statement' is "
+ "a legal value; MySQL-Max release builds support row-based binary logging "
+ "in addition.",
+#endif
+ 0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0 },
{"binlog-do-db", OPT_BINLOG_DO_DB,
"Tells the master it should log updates for the specified database, and exclude all others not explicitly mentioned.",
0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
{"binlog-ignore-db", OPT_BINLOG_IGNORE_DB,
"Tells the master that updates to the given database should not be logged tothe binary log.",
0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
+#if !defined(DBUG_OFF) && !defined(MYSQL_CLIENT)
+ {"binlog-show-xid", OPT_BINLOG_SHOW_XID,
+ "Option used by mysql-test for debugging and testing: "
+ "do not display the XID in SHOW BINLOG EVENTS; "
+ "may be removed in future versions",
+ (gptr*) &Xid_log_event::show_xid, (gptr*) &Xid_log_event::show_xid,
+ 0, GET_BOOL, NO_ARG, 1, 0, 0, 0, 0, 0},
+#endif
+#ifdef HAVE_ROW_BASED_REPLICATION
+ {"binlog-row-event-max-size", OPT_BINLOG_ROWS_EVENT_MAX_SIZE,
+ "The maximum size of a row-based binary log event in bytes. Rows will be "
+ "grouped into events smaller than this size if possible. "
+ "The value has to be a multiple of 256.",
+ (gptr*) &opt_binlog_rows_event_max_size,
+ (gptr*) &opt_binlog_rows_event_max_size, 0,
+ GET_ULONG, REQUIRED_ARG,
+ /* def_value */ 1024, /* min_value */ 256, /* max_value */ ULONG_MAX,
+ /* sub_size */ 0, /* block_size */ 256,
+ /* app_type */ 0
+ },
+#endif
{"bootstrap", OPT_BOOTSTRAP, "Used by mysql installation scripts.", 0, 0, 0,
GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
{"character-set-client-handshake", OPT_CHARACTER_SET_CLIENT_HANDSHAKE,
@@ -4905,7 +5010,9 @@ Disable with --skip-innodb-doublewrite.", (gptr*) &innobase_use_doublewrite,
(gptr*) &innobase_unix_file_flush_method, 0, GET_STR, REQUIRED_ARG, 0, 0, 0,
0, 0, 0},
{"innodb_locks_unsafe_for_binlog", OPT_INNODB_LOCKS_UNSAFE_FOR_BINLOG,
- "Force InnoDB not to use next-key locking. Instead use only row-level locking",
+ "Force InnoDB not to use next-key locking, to use only row-level locking."
+ " This is unsafe if you are using statement-based binary logging, and safe"
+ " if you are using row-based binary logging.",
(gptr*) &innobase_locks_unsafe_for_binlog,
(gptr*) &innobase_locks_unsafe_for_binlog, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
{"innodb_log_arch_dir", OPT_INNODB_LOG_ARCH_DIR,
@@ -4984,8 +5091,12 @@ Disable with --skip-innodb-doublewrite.", (gptr*) &innobase_use_doublewrite,
{"log-bin-trust-function-creators", OPT_LOG_BIN_TRUST_FUNCTION_CREATORS,
"If equal to 0 (the default), then when --log-bin is used, creation of "
"a function is allowed only to users having the SUPER privilege and only "
- "if this function may not break binary logging.",
- (gptr*) &trust_function_creators, (gptr*) &trust_function_creators, 0,
+ "if this function may not break binary logging."
+#ifdef HAVE_ROW_BASED_REPLICATION
+ " If using --binlog-format=row, the security issues do not exist and the "
+ "binary logging cannot break so this option is automatically set to 1."
+#endif
+ ,(gptr*) &trust_function_creators, (gptr*) &trust_function_creators, 0,
GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
{"log-error", OPT_ERROR_LOG_FILE, "Error log file.",
(gptr*) &log_error_file_ptr, (gptr*) &log_error_file_ptr, 0, GET_STR,
@@ -6459,6 +6570,11 @@ static void mysql_init_variables(void)
"d:t:i:o,/tmp/mysqld.trace");
#endif
opt_error_log= IF_WIN(1,0);
+#ifdef HAVE_ROW_BASED_REPLICATION
+ have_row_based_replication= SHOW_OPTION_YES;
+#else
+ have_row_based_replication= SHOW_OPTION_NO;
+#endif
#ifdef WITH_NDBCLUSTER_STORAGE_ENGINE
have_ndbcluster=SHOW_OPTION_DISABLED;
global_system_variables.ndb_index_stat_enable=TRUE;
@@ -6682,6 +6798,28 @@ get_one_option(int optid, const struct my_option *opt __attribute__((unused)),
binlog_filter->add_ignore_db(argument);
break;
}
+ case OPT_BINLOG_FORMAT:
+ {
+ int id;
+ if ((id= find_type(argument, &binlog_format_typelib, 2)) <= 0)
+ {
+#ifdef HAVE_ROW_BASED_REPLICATION
+ fprintf(stderr,
+ "Unknown binary log format: '%s' "
+ "(should be '%s' or '%s')\n",
+ argument,
+ binlog_format_names[BF_STMT],
+ binlog_format_names[BF_ROW]);
+#else
+ fprintf(stderr,
+ "Unknown binary log format: '%s' (only legal value is '%s')\n",
+ argument, binlog_format_names[BF_STMT]);
+#endif
+ exit(1);
+ }
+ opt_binlog_format_id= (enum binlog_format)(id-1);
+ break;
+ }
case (int)OPT_BINLOG_DO_DB:
{
binlog_filter->add_do_db(argument);
@@ -7229,6 +7367,7 @@ static void get_options(int argc,char **argv)
init_global_datetime_format(MYSQL_TIMESTAMP_DATETIME,
&global_system_variables.datetime_format))
exit(1);
+
}
diff --git a/sql/rpl_filter.h b/sql/rpl_filter.h
index 5a766424d19..58d2b97c9c6 100644
--- a/sql/rpl_filter.h
+++ b/sql/rpl_filter.h
@@ -18,7 +18,6 @@
#define RPL_FILTER_H
#include "mysql.h"
-#include "my_list.h"
typedef struct st_table_rule_ent
{
diff --git a/sql/rpl_rli.h b/sql/rpl_rli.h
new file mode 100644
index 00000000000..5500fdf1f64
--- /dev/null
+++ b/sql/rpl_rli.h
@@ -0,0 +1,312 @@
+/* Copyright (C) 2005 MySQL AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#ifndef RPL_RLI_H
+#define RPL_RLI_H
+
+#define MAX_SLAVE_ERRMSG 1024
+
+#include "rpl_tblmap.h"
+
+/****************************************************************************
+
+ Replication SQL Thread
+
+ st_relay_log_info contains:
+ - the current relay log
+ - the current relay log offset
+ - master log name
+ - master log sequence corresponding to the last update
+ - misc information specific to the SQL thread
+
+ st_relay_log_info is initialized from the slave.info file if such exists.
+ Otherwise, data members are intialized with defaults. The initialization is
+ done with init_relay_log_info() call.
+
+ The format of slave.info file:
+
+ relay_log_name
+ relay_log_pos
+ master_log_name
+ master_log_pos
+
+ To clean up, call end_relay_log_info()
+
+*****************************************************************************/
+
+typedef struct st_relay_log_info
+{
+ /*
+ If flag set, then rli does not store its state in any info file.
+ This is the case only when we execute BINLOG SQL commands inside
+ a client, non-replication thread.
+ */
+ bool no_storage;
+
+ /*** The following variables can only be read when protect by data lock ****/
+
+ /*
+ info_fd - file descriptor of the info file. set only during
+ initialization or clean up - safe to read anytime
+ cur_log_fd - file descriptor of the current read relay log
+ */
+ File info_fd,cur_log_fd;
+
+ /*
+ Protected with internal locks.
+ Must get data_lock when resetting the logs.
+ */
+ MYSQL_LOG relay_log;
+ LOG_INFO linfo;
+ IO_CACHE cache_buf,*cur_log;
+
+ /* The following variables are safe to read any time */
+
+ /* IO_CACHE of the info file - set only during init or end */
+ IO_CACHE info_file;
+
+ /*
+ When we restart slave thread we need to have access to the previously
+ created temporary tables. Modified only on init/end and by the SQL
+ thread, read only by SQL thread.
+ */
+ TABLE *save_temporary_tables;
+
+ /*
+ standard lock acquistion order to avoid deadlocks:
+ run_lock, data_lock, relay_log.LOCK_log, relay_log.LOCK_index
+ */
+ pthread_mutex_t data_lock,run_lock;
+
+ /*
+ start_cond is broadcast when SQL thread is started
+ stop_cond - when stopped
+ data_cond - when data protected by data_lock changes
+ */
+ pthread_cond_t start_cond, stop_cond, data_cond;
+
+ /* parent master info structure */
+ struct st_master_info *mi;
+
+ /*
+ Needed to deal properly with cur_log getting closed and re-opened with
+ a different log under our feet
+ */
+ uint32 cur_log_old_open_count;
+
+ /*
+ Let's call a group (of events) :
+ - a transaction
+ or
+ - an autocommiting query + its associated events (INSERT_ID,
+ TIMESTAMP...)
+ We need these rli coordinates :
+ - relay log name and position of the beginning of the group we currently are
+ executing. Needed to know where we have to restart when replication has
+ stopped in the middle of a group (which has been rolled back by the slave).
+ - relay log name and position just after the event we have just
+ executed. This event is part of the current group.
+ Formerly we only had the immediately above coordinates, plus a 'pending'
+ variable, but this dealt wrong with the case of a transaction starting on a
+ relay log and finishing (commiting) on another relay log. Case which can
+ happen when, for example, the relay log gets rotated because of
+ max_binlog_size.
+ */
+ char group_relay_log_name[FN_REFLEN];
+ ulonglong group_relay_log_pos;
+ char event_relay_log_name[FN_REFLEN];
+ ulonglong event_relay_log_pos;
+ ulonglong future_event_relay_log_pos;
+
+ /*
+ Original log name and position of the group we're currently executing
+ (whose coordinates are group_relay_log_name/pos in the relay log)
+ in the master's binlog. These concern the *group*, because in the master's
+ binlog the log_pos that comes with each event is the position of the
+ beginning of the group.
+ */
+ char group_master_log_name[FN_REFLEN];
+ volatile my_off_t group_master_log_pos;
+
+ /*
+ Handling of the relay_log_space_limit optional constraint.
+ ignore_log_space_limit is used to resolve a deadlock between I/O and SQL
+ threads, the SQL thread sets it to unblock the I/O thread and make it
+ temporarily forget about the constraint.
+ */
+ ulonglong log_space_limit,log_space_total;
+ bool ignore_log_space_limit;
+
+ /*
+ When it commits, InnoDB internally stores the master log position it has
+ processed so far; the position to store is the one of the end of the
+ committing event (the COMMIT query event, or the event if in autocommit
+ mode).
+ */
+#if MYSQL_VERSION_ID < 40100
+ ulonglong future_master_log_pos;
+#else
+ ulonglong future_group_master_log_pos;
+#endif
+
+ time_t last_master_timestamp;
+
+ /*
+ Needed for problems when slave stops and we want to restart it
+ skipping one or more events in the master log that have caused
+ errors, and have been manually applied by DBA already.
+ */
+ volatile uint32 slave_skip_counter;
+ volatile ulong abort_pos_wait; /* Incremented on change master */
+ volatile ulong slave_run_id; /* Incremented on slave start */
+ pthread_mutex_t log_space_lock;
+ pthread_cond_t log_space_cond;
+ THD * sql_thd;
+ int last_slave_errno;
+#ifndef DBUG_OFF
+ int events_till_abort;
+#endif
+ char last_slave_error[MAX_SLAVE_ERRMSG];
+
+ /* if not set, the value of other members of the structure are undefined */
+ bool inited;
+ volatile bool abort_slave;
+ volatile uint slave_running;
+
+ /*
+ Condition and its parameters from START SLAVE UNTIL clause.
+
+ UNTIL condition is tested with is_until_satisfied() method that is
+ called by exec_relay_log_event(). is_until_satisfied() caches the result
+ of the comparison of log names because log names don't change very often;
+ this cache is invalidated by parts of code which change log names with
+ notify_*_log_name_updated() methods. (They need to be called only if SQL
+ thread is running).
+ */
+
+ enum {UNTIL_NONE= 0, UNTIL_MASTER_POS, UNTIL_RELAY_POS} until_condition;
+ char until_log_name[FN_REFLEN];
+ ulonglong until_log_pos;
+ /* extension extracted from log_name and converted to int */
+ ulong until_log_name_extension;
+ /*
+ Cached result of comparison of until_log_name and current log name
+ -2 means unitialised, -1,0,1 are comarison results
+ */
+ enum
+ {
+ UNTIL_LOG_NAMES_CMP_UNKNOWN= -2, UNTIL_LOG_NAMES_CMP_LESS= -1,
+ UNTIL_LOG_NAMES_CMP_EQUAL= 0, UNTIL_LOG_NAMES_CMP_GREATER= 1
+ } until_log_names_cmp_result;
+
+ char cached_charset[6];
+ /*
+ trans_retries varies between 0 to slave_transaction_retries and counts how
+ many times the slave has retried the present transaction; gets reset to 0
+ when the transaction finally succeeds. retried_trans is a cumulative
+ counter: how many times the slave has retried a transaction (any) since
+ slave started.
+ */
+ ulong trans_retries, retried_trans;
+
+ /*
+ If the end of the hot relay log is made of master's events ignored by the
+ slave I/O thread, these two keep track of the coords (in the master's
+ binlog) of the last of these events seen by the slave I/O thread. If not,
+ ign_master_log_name_end[0] == 0.
+ As they are like a Rotate event read/written from/to the relay log, they
+ are both protected by rli->relay_log.LOCK_log.
+ */
+ char ign_master_log_name_end[FN_REFLEN];
+ ulonglong ign_master_log_pos_end;
+
+ st_relay_log_info();
+ ~st_relay_log_info();
+
+ /*
+ Invalidate cached until_log_name and group_relay_log_name comparison
+ result. Should be called after any update of group_realy_log_name if
+ there chances that sql_thread is running.
+ */
+ inline void notify_group_relay_log_name_update()
+ {
+ if (until_condition==UNTIL_RELAY_POS)
+ until_log_names_cmp_result= UNTIL_LOG_NAMES_CMP_UNKNOWN;
+ }
+
+ /*
+ The same as previous but for group_master_log_name.
+ */
+ inline void notify_group_master_log_name_update()
+ {
+ if (until_condition==UNTIL_MASTER_POS)
+ until_log_names_cmp_result= UNTIL_LOG_NAMES_CMP_UNKNOWN;
+ }
+
+ inline void inc_event_relay_log_pos()
+ {
+ event_relay_log_pos= future_event_relay_log_pos;
+ }
+
+ void inc_group_relay_log_pos(ulonglong log_pos,
+ bool skip_lock=0);
+
+ int wait_for_pos(THD* thd, String* log_name, longlong log_pos,
+ longlong timeout);
+ void close_temporary_tables();
+
+ /* Check if UNTIL condition is satisfied. See slave.cc for more. */
+ bool is_until_satisfied();
+ inline ulonglong until_pos()
+ {
+ return ((until_condition == UNTIL_MASTER_POS) ? group_master_log_pos :
+ group_relay_log_pos);
+ }
+
+ table_mapping m_table_map;
+
+ /*
+ Last charset (6 bytes) seen by slave SQL thread is cached here; it helps
+ the thread save 3 get_charset() per Query_log_event if the charset is not
+ changing from event to event (common situation).
+ When the 6 bytes are equal to 0 is used to mean "cache is invalidated".
+ */
+ void cached_charset_invalidate();
+ bool cached_charset_compare(char *charset);
+
+ /*
+ To reload special tables when they are changes, we introduce a set
+ of functions that will mark whenever special functions need to be
+ called after modifying tables. Right now, the tables are either
+ ACL tables or grants tables.
+ */
+ enum enum_reload_flag
+ {
+ RELOAD_NONE_F = 0UL,
+ RELOAD_GRANT_F = (1UL << 0),
+ RELOAD_ACCESS_F = (1UL << 1)
+ };
+
+ ulong m_reload_flags;
+
+ void touching_table(char const* db, char const* table, ulong table_id);
+ void transaction_end(THD*);
+
+ void cleanup_context(THD *, bool);
+ time_t unsafe_to_stop_at;
+} RELAY_LOG_INFO;
+
+#endif /* RPL_RLI_H */
diff --git a/sql/rpl_tblmap.cc b/sql/rpl_tblmap.cc
new file mode 100644
index 00000000000..a0272b23ee8
--- /dev/null
+++ b/sql/rpl_tblmap.cc
@@ -0,0 +1,151 @@
+/* Copyright (C) 2005 MySQL AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#include "mysql_priv.h"
+
+#ifdef HAVE_REPLICATION
+
+#include "rpl_tblmap.h"
+
+#define MAYBE_TABLE_NAME(T) ((T) ? (T)->s->table_name.str : "<>")
+#define TABLE_ID_HASH_SIZE 32
+#define TABLE_ID_CHUNK 256
+
+table_mapping::table_mapping()
+ : m_free(0)
+{
+ /*
+ No "free_element" function for entries passed here, as the entries are
+ allocated in a MEM_ROOT (freed as a whole in the destructor), they cannot
+ be freed one by one.
+ Note that below we don't test if hash_init() succeeded. This constructor
+ is called at startup only.
+ */
+ (void) hash_init(&m_table_ids,&my_charset_bin,TABLE_ID_HASH_SIZE,
+ offsetof(entry,table_id),sizeof(ulong),
+ 0,0,0);
+ /* We don't preallocate any block, this is consistent with m_free=0 above */
+ init_alloc_root(&m_mem_root, TABLE_ID_HASH_SIZE*sizeof(entry), 0);
+}
+
+table_mapping::~table_mapping()
+{
+ hash_free(&m_table_ids);
+ free_root(&m_mem_root, MYF(0));
+}
+
+st_table* table_mapping::get_table(ulong table_id)
+{
+ DBUG_ENTER("table_mapping::get_table(ulong)");
+ DBUG_PRINT("enter", ("table_id=%d", table_id));
+ entry *e= find_entry(table_id);
+ if (e)
+ {
+ DBUG_PRINT("info", ("tid %d -> table %p (%s)",
+ table_id, e->table,
+ MAYBE_TABLE_NAME(e->table)));
+ DBUG_RETURN(e->table);
+ }
+
+ DBUG_PRINT("info", ("tid %d is not mapped!", table_id));
+ DBUG_RETURN(NULL);
+}
+
+/*
+ Called when we are out of table id entries. Creates TABLE_ID_CHUNK
+ new entries, chain them and attach them at the head of the list of free
+ (free for use) entries.
+*/
+int table_mapping::expand()
+{
+ /*
+ If we wanted to use "tmp= new (&m_mem_root) entry[TABLE_ID_CHUNK]",
+ we would have to make "entry" derive from Sql_alloc but then it would not
+ be a POD anymore and we want it to be (see rpl_tblmap.h). So we allocate
+ in C.
+ */
+ entry *tmp= (entry *)alloc_root(&m_mem_root, TABLE_ID_CHUNK*sizeof(entry));
+ if (tmp == NULL)
+ return ERR_MEMORY_ALLOCATION; // Memory allocation failed
+
+ /* Find the end of this fresh new array of free entries */
+ entry *e_end= tmp+TABLE_ID_CHUNK-1;
+ for (entry *e= tmp; e < e_end; e++)
+ e->next= e+1;
+ e_end->next= m_free;
+ m_free= tmp;
+ return 0;
+}
+
+int table_mapping::set_table(ulong table_id, TABLE* table)
+{
+ DBUG_ENTER("table_mapping::set_table(ulong,TABLE*)");
+ DBUG_PRINT("enter", ("table_id=%d, table=%p (%s)",
+ table_id,
+ table, MAYBE_TABLE_NAME(table)));
+ entry *e= find_entry(table_id);
+ if (e == 0)
+ {
+ if (m_free == 0 && expand())
+ DBUG_RETURN(ERR_MEMORY_ALLOCATION); // Memory allocation failed
+ e= m_free;
+ m_free= m_free->next;
+ }
+ else
+ hash_delete(&m_table_ids,(byte *)e);
+
+ e->table_id= table_id;
+ e->table= table;
+ my_hash_insert(&m_table_ids,(byte *)e);
+
+ DBUG_PRINT("info", ("tid %d -> table %p (%s)",
+ table_id, e->table,
+ MAYBE_TABLE_NAME(e->table)));
+ DBUG_RETURN(0); // All OK
+}
+
+int table_mapping::remove_table(ulong table_id)
+{
+ entry *e= find_entry(table_id);
+ if (e)
+ {
+ hash_delete(&m_table_ids,(byte *)e);
+ /* we add this entry to the chain of free (free for use) entries */
+ e->next= m_free;
+ m_free= e;
+ return 0; // All OK
+ }
+ return 1; // No table to remove
+}
+
+/*
+ Puts all entries into the list of free-for-use entries (does not free any
+ memory), and empties the hash.
+*/
+void table_mapping::clear_tables()
+{
+ DBUG_ENTER("table_mapping::clear_tables()");
+ for (uint i= 0; i < m_table_ids.records; i++)
+ {
+ entry *e= (entry *)hash_element(&m_table_ids, i);
+ e->next= m_free;
+ m_free= e;
+ }
+ my_hash_reset(&m_table_ids);
+ DBUG_VOID_RETURN;
+}
+
+#endif
diff --git a/sql/rpl_tblmap.h b/sql/rpl_tblmap.h
new file mode 100644
index 00000000000..cfc2d7c2c6c
--- /dev/null
+++ b/sql/rpl_tblmap.h
@@ -0,0 +1,109 @@
+/* Copyright (C) 2005 MySQL AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#ifndef TABLE_MAPPING_H
+#define TABLE_MAPPING_H
+
+/* Forward declarations */
+struct st_table;
+typedef st_table TABLE;
+
+/*
+ CLASS table_mapping
+
+ RESPONSIBILITIES
+ The table mapping is used to map table id's to table pointers
+
+ COLLABORATION
+ RELAY_LOG For mapping table id:s to tables when receiving events.
+ */
+
+/*
+ Guilhem to Mats:
+ in the table_mapping class, the memory is allocated and never freed (until
+ destruction). So this is a good candidate for allocating inside a MEM_ROOT:
+ it gives the efficient allocation in chunks (like in expand()). So I have
+ introduced a MEM_ROOT.
+
+ Note that inheriting from Sql_alloc had no effect: it has effects only when
+ "ptr= new table_mapping" is called, and this is never called. And it would
+ then allocate from thd->mem_root which is a highly volatile object (reset
+ from example after executing each query, see dispatch_command(), it has a
+ free_root() at end); as the table_mapping object is supposed to live longer
+ than a query, it was dangerous.
+ A dedicated MEM_ROOT needs to be used, see below.
+*/
+
+class table_mapping {
+
+private:
+ MEM_ROOT m_mem_root;
+
+public:
+
+ enum {
+ NO_TABLE = ULONG_MAX
+ };
+
+ enum enum_error {
+ ERR_NO_ERROR = 0,
+ ERR_LIMIT_EXCEEDED,
+ ERR_MEMORY_ALLOCATION
+ };
+
+ table_mapping();
+ ~table_mapping();
+
+ TABLE* get_table(ulong table_id);
+
+ int set_table(ulong table_id, TABLE* table);
+ int remove_table(ulong table_id);
+ void clear_tables();
+ ulong count() const { return m_table_ids.records; }
+
+private:
+ /*
+ This is a POD (Plain Old Data). Keep it that way (we apply offsetof() to
+ it, which only works for PODs)
+ */
+ struct entry {
+ ulong table_id;
+ union {
+ TABLE *table;
+ entry *next;
+ };
+ };
+
+ entry *find_entry(ulong table_id)
+ {
+ return (entry *)hash_search(&m_table_ids,
+ (byte*)&table_id,
+ sizeof(table_id));
+ }
+ int expand();
+
+ /*
+ Head of the list of free entries; "free" in the sense that it's an
+ allocated entry free for use, NOT in the sense that it's freed
+ memory.
+ */
+ entry *m_free;
+
+ /* Correspondance between an id (a number) and a TABLE object */
+ HASH m_table_ids;
+};
+
+#endif
diff --git a/sql/set_var.cc b/sql/set_var.cc
index 990d8047009..01ff30045c4 100644
--- a/sql/set_var.cc
+++ b/sql/set_var.cc
@@ -619,6 +619,7 @@ sys_var_have_variable sys_have_query_cache("have_query_cache",
sys_var_have_variable sys_have_raid("have_raid", &have_raid);
sys_var_have_variable sys_have_rtree_keys("have_rtree_keys", &have_rtree_keys);
sys_var_have_variable sys_have_symlink("have_symlink", &have_symlink);
+sys_var_have_variable sys_have_row_based_replication("have_row_based_replication",&have_row_based_replication);
/* Global read-only variable describing server license */
sys_var_const_str sys_license("license", STRINGIFY_ARG(LICENSE));
@@ -643,6 +644,7 @@ struct show_var_st init_vars[]= {
{"bdb_shared_data", (char*) &berkeley_shared_data, SHOW_BOOL},
{"bdb_tmpdir", (char*) &berkeley_tmpdir, SHOW_CHAR_PTR},
{sys_binlog_cache_size.name,(char*) &sys_binlog_cache_size, SHOW_SYS},
+ {"binlog_format", (char*) &opt_binlog_format, SHOW_CHAR_PTR},
{sys_bulk_insert_buff_size.name,(char*) &sys_bulk_insert_buff_size,SHOW_SYS},
{sys_character_set_client.name,(char*) &sys_character_set_client, SHOW_SYS},
{sys_character_set_connection.name,(char*) &sys_character_set_connection,SHOW_SYS},
@@ -695,6 +697,7 @@ struct show_var_st init_vars[]= {
{sys_have_raid.name, (char*) &have_raid, SHOW_HAVE},
{sys_have_rtree_keys.name, (char*) &have_rtree_keys, SHOW_HAVE},
{sys_have_symlink.name, (char*) &have_symlink, SHOW_HAVE},
+ {sys_have_row_based_replication.name, (char*) &have_row_based_replication, SHOW_HAVE},
{"init_connect", (char*) &sys_init_connect, SHOW_SYS},
{"init_file", (char*) &opt_init_file, SHOW_CHAR_PTR},
{"init_slave", (char*) &sys_init_slave, SHOW_SYS},
diff --git a/sql/share/errmsg.txt b/sql/share/errmsg.txt
index aa5607c9f6a..ab9663db475 100644
--- a/sql/share/errmsg.txt
+++ b/sql/share/errmsg.txt
@@ -5727,3 +5727,9 @@ ER_WRONG_VALUE
eng "Incorrect %-.32s value: '%-.128s'"
ER_NO_PARTITION_FOR_GIVEN_VALUE
eng "Table has no partition for value %ld"
+ER_BINLOG_ROW_LOGGING_FAILED
+ eng "Writing one row to the row-based binary log failed"
+ER_BINLOG_ROW_WRONG_TABLE_DEF
+ eng "Table definition on master and slave does not match"
+ER_BINLOG_ROW_RBR_TO_SBR
+ eng "Slave running with --log-slave-updates must use row-based binary logging to be able to replicate row-based binary log events"
diff --git a/sql/slave.cc b/sql/slave.cc
index 6c9cfc250c5..99bddb7b9b0 100644
--- a/sql/slave.cc
+++ b/sql/slave.cc
@@ -16,10 +16,9 @@
#include "mysql_priv.h"
-#ifdef HAVE_REPLICATION
-
#include <mysql.h>
#include <myisam.h>
+#include "rpl_rli.h"
#include "slave.h"
#include "sql_repl.h"
#include "rpl_filter.h"
@@ -28,6 +27,10 @@
#include <my_dir.h>
#include <sql_common.h>
+#ifdef HAVE_REPLICATION
+
+#include "rpl_tblmap.h"
+
#define MAX_SLAVE_RETRY_PAUSE 5
bool use_slave_mask = 0;
MY_BITMAP slave_error_mask;
@@ -48,8 +51,6 @@ ulonglong relay_log_space_limit = 0;
*/
int disconnect_slave_event_count = 0, abort_slave_event_count = 0;
-int events_till_abort = -1;
-static int events_till_disconnect = -1;
typedef enum { SLAVE_THD_IO, SLAVE_THD_SQL} SLAVE_THD_TYPE;
@@ -860,19 +861,48 @@ static bool sql_slave_killed(THD* thd, RELAY_LOG_INFO* rli)
{
DBUG_ASSERT(rli->sql_thd == thd);
DBUG_ASSERT(rli->slave_running == 1);// tracking buffer overrun
- return rli->abort_slave || abort_loop || thd->killed;
+ if (abort_loop || thd->killed || rli->abort_slave)
+ {
+ /*
+ If we are in an unsafe situation (stopping could corrupt replication),
+ we give one minute to the slave SQL thread of grace before really
+ terminating, in the hope that it will be able to read more events and
+ the unsafe situation will soon be left. Note that this one minute starts
+ from the last time anything happened in the slave SQL thread. So it's
+ really one minute of idleness, we don't timeout if the slave SQL thread
+ is actively working.
+ */
+ if (!rli->unsafe_to_stop_at)
+ return 1;
+ DBUG_PRINT("info", ("Slave SQL thread is in an unsafe situation, giving "
+ "it some grace period"));
+ if (difftime(time(0), rli->unsafe_to_stop_at) > 60)
+ {
+ slave_print_msg(ERROR_LEVEL, rli, 0,
+ "SQL thread had to stop in an unsafe situation, in "
+ "the middle of applying updates to a "
+ "non-transactional table without any primary key. "
+ "There is a risk of duplicate updates when the slave "
+ "SQL thread is restarted. Please check your tables' "
+ "contents after restart.");
+ return 1;
+ }
+ }
+ return 0;
}
/*
- Writes an error message to rli->last_slave_error and rli->last_slave_errno
- (which will be displayed by SHOW SLAVE STATUS), and prints it to stderr.
+ Writes a message to stderr, and if it's an error message, to
+ rli->last_slave_error and rli->last_slave_errno (which will be displayed by
+ SHOW SLAVE STATUS).
SYNOPSIS
- slave_print_error()
- rli
+ slave_print_msg()
+ level The severity level
+ rli
err_code The error code
- msg The error message (usually related to the error code, but can
+ msg The message (usually related to the error code, but can
contain more information).
... (this is printf-like format, with % symbols in msg)
@@ -880,22 +910,47 @@ static bool sql_slave_killed(THD* thd, RELAY_LOG_INFO* rli)
void
*/
-void slave_print_error(RELAY_LOG_INFO* rli, int err_code, const char* msg, ...)
+void slave_print_msg(enum loglevel level, RELAY_LOG_INFO* rli,
+ int err_code, const char* msg, ...)
{
+ void (*report_function)(const char *, ...);
+ char buff[MAX_SLAVE_ERRMSG], *pbuff= buff;
+ uint pbuffsize= sizeof(buff);
va_list args;
va_start(args,msg);
- my_vsnprintf(rli->last_slave_error,
- sizeof(rli->last_slave_error), msg, args);
- rli->last_slave_errno = err_code;
- /* If the error string ends with '.', do not add a ',' it would be ugly */
- if (rli->last_slave_error[0] &&
- (*(strend(rli->last_slave_error)-1) == '.'))
- sql_print_error("Slave: %s Error_code: %d", rli->last_slave_error,
- err_code);
+ switch (level)
+ {
+ case ERROR_LEVEL:
+ /*
+ This my_error call only has effect in client threads.
+ Slave threads do nothing in my_error().
+ */
+ my_error(ER_UNKNOWN_ERROR, MYF(0), msg);
+ /*
+ It's an error, it must be reported in Last_error and Last_errno in SHOW
+ SLAVE STATUS.
+ */
+ pbuff= rli->last_slave_error;
+ pbuffsize= sizeof(rli->last_slave_error);
+ rli->last_slave_errno = err_code;
+ report_function= sql_print_error;
+ break;
+ case WARNING_LEVEL:
+ report_function= sql_print_warning;
+ break;
+ case INFORMATION_LEVEL:
+ report_function= sql_print_information;
+ break;
+ default:
+ DBUG_ASSERT(0); // should not come here
+ return; // don't crash production builds, just do nothing
+ }
+ my_vsnprintf(pbuff, pbuffsize, msg, args);
+ /* If the msg string ends with '.', do not add a ',' it would be ugly */
+ if (pbuff[0] && (*(strend(pbuff)-1) == '.'))
+ (*report_function)("Slave: %s Error_code: %d", pbuff, err_code);
else
- sql_print_error("Slave: %s, Error_code: %d", rli->last_slave_error,
- err_code);
-
+ (*report_function)("Slave: %s, Error_code: %d", pbuff, err_code);
}
/*
@@ -919,7 +974,6 @@ bool net_request_file(NET* net, const char* fname)
DBUG_RETURN(net_write_command(net, 251, fname, strlen(fname), "", 0));
}
-
/*
From other comments and tests in code, it looks like
sometimes Query_log_event and Load_log_event can have db == 0
@@ -932,7 +986,6 @@ const char *print_slave_db_safe(const char* db)
return (db ? db : "");
}
-
static int init_strvar_from_file(char *var, int max_size, IO_CACHE *f,
const char *default_val)
{
@@ -1379,6 +1432,7 @@ static int init_relay_log_info(RELAY_LOG_INFO* rli,
const char* msg = 0;
int error = 0;
DBUG_ENTER("init_relay_log_info");
+ DBUG_ASSERT(!rli->no_storage); // Don't init if there is no storage
if (rli->inited) // Set if this function called
DBUG_RETURN(0);
@@ -1674,7 +1728,7 @@ static void write_ignored_events_info_to_relay_log(THD *thd, MASTER_INFO *mi)
if (rli->ign_master_log_name_end[0])
{
DBUG_PRINT("info",("writing a Rotate event to track down ignored events"));
- Rotate_log_event *ev= new Rotate_log_event(thd, rli->ign_master_log_name_end,
+ Rotate_log_event *ev= new Rotate_log_event(rli->ign_master_log_name_end,
0, rli->ign_master_log_pos_end,
Rotate_log_event::DUP_NAME);
rli->ign_master_log_name_end[0]= 0;
@@ -2241,17 +2295,17 @@ bool flush_master_info(MASTER_INFO* mi, bool flush_relay_log_cache)
st_relay_log_info::st_relay_log_info()
- :info_fd(-1), cur_log_fd(-1), save_temporary_tables(0),
+ :no_storage(FALSE), info_fd(-1), cur_log_fd(-1), save_temporary_tables(0),
cur_log_old_open_count(0), group_master_log_pos(0), log_space_total(0),
ignore_log_space_limit(0), last_master_timestamp(0), slave_skip_counter(0),
abort_pos_wait(0), slave_run_id(0), sql_thd(0), last_slave_errno(0),
inited(0), abort_slave(0), slave_running(0), until_condition(UNTIL_NONE),
- until_log_pos(0), retried_trans(0)
+ until_log_pos(0), retried_trans(0), m_reload_flags(RELOAD_NONE_F),
+ unsafe_to_stop_at(0)
{
group_relay_log_name[0]= event_relay_log_name[0]=
group_master_log_name[0]= 0;
last_slave_error[0]= until_log_name[0]= ign_master_log_name_end[0]= 0;
-
bzero((char*) &info_file, sizeof(info_file));
bzero((char*) &cache_buf, sizeof(cache_buf));
cached_charset_invalidate();
@@ -2671,11 +2725,9 @@ static ulong read_event(MYSQL* mysql, MASTER_INFO *mi, bool* suppress_warnings)
/*
my_real_read() will time us out
We check if we were told to die, and if not, try reading again
-
- TODO: Move 'events_till_disconnect' to the MASTER_INFO structure
*/
#ifndef DBUG_OFF
- if (disconnect_slave_event_count && !(events_till_disconnect--))
+ if (disconnect_slave_event_count && !(mi->events_till_disconnect--))
return packet_error;
#endif
@@ -2950,7 +3002,7 @@ static int exec_relay_log_event(THD* thd, RELAY_LOG_INFO* rli)
thd->lex->current_select= 0;
if (!ev->when)
ev->when = time(NULL);
- ev->thd = thd;
+ ev->thd = thd; // because up to this point, ev->thd == 0
exec_res = ev->exec_event(rli);
DBUG_ASSERT(rli->sql_thd==thd);
/*
@@ -3022,7 +3074,7 @@ static int exec_relay_log_event(THD* thd, RELAY_LOG_INFO* rli)
else
{
pthread_mutex_unlock(&rli->data_lock);
- slave_print_error(rli, 0, "\
+ slave_print_msg(ERROR_LEVEL, rli, 0, "\
Could not parse relay log event entry. The possible reasons are: the master's \
binary log is corrupted (you can check this by running 'mysqlbinlog' on the \
binary log), the slave's relay log is corrupted (you can check this by running \
@@ -3051,9 +3103,6 @@ pthread_handler_t handle_slave_io(void *arg)
my_thread_init();
DBUG_ENTER("handle_slave_io");
-#ifndef DBUG_OFF
-slave_begin:
-#endif
DBUG_ASSERT(mi->inited);
mysql= NULL ;
retry_count= 0;
@@ -3063,7 +3112,7 @@ slave_begin:
mi->slave_run_id++;
#ifndef DBUG_OFF
- mi->events_till_abort = abort_slave_event_count;
+ mi->events_till_disconnect = disconnect_slave_event_count;
#endif
thd= new THD; // note that contructor of THD uses DBUG_ !
@@ -3301,14 +3350,6 @@ ignore_log_space_limit=%d",
log space");
goto err;
}
- // TODO: check debugging abort code
-#ifndef DBUG_OFF
- if (abort_slave_event_count && !--events_till_abort)
- {
- sql_print_error("Slave I/O thread: debugging abort");
- goto err;
- }
-#endif
}
}
@@ -3347,10 +3388,6 @@ err:
pthread_mutex_unlock(&LOCK_thread_count);
pthread_cond_broadcast(&mi->stop_cond); // tell the world we are done
pthread_mutex_unlock(&mi->run_lock);
-#ifndef DBUG_OFF
- if (abort_slave_event_count && !events_till_abort)
- goto slave_begin;
-#endif
my_thread_end();
pthread_exit(0);
DBUG_RETURN(0); // Can't return anything here
@@ -3370,10 +3407,6 @@ pthread_handler_t handle_slave_sql(void *arg)
my_thread_init();
DBUG_ENTER("handle_slave_sql");
-#ifndef DBUG_OFF
-slave_begin:
-#endif
-
DBUG_ASSERT(rli->inited);
pthread_mutex_lock(&rli->run_lock);
DBUG_ASSERT(!rli->slave_running);
@@ -3520,6 +3553,14 @@ the slave SQL thread with \"SLAVE START\". We stopped at log \
RPL_LOG_NAME, llstr(rli->group_master_log_pos,llbuff));
err:
+
+ /*
+ Some events set some playgrounds, which won't be cleared because thread
+ stops. Stopping of this thread may not be known to these events ("stop"
+ request is detected only by the present function, not by events), so we
+ must "proactively" clear playgrounds:
+ */
+ rli->cleanup_context(thd, 1);
VOID(pthread_mutex_lock(&LOCK_thread_count));
/*
Some extra safety, which should not been needed (normally, event deletion
@@ -3565,10 +3606,6 @@ the slave SQL thread with \"SLAVE START\". We stopped at log \
pthread_cond_broadcast(&rli->stop_cond);
// tell the world we are done
pthread_mutex_unlock(&rli->run_lock);
-#ifndef DBUG_OFF // TODO: reconsider the code below
- if (abort_slave_event_count && !rli->events_till_abort)
- goto slave_begin;
-#endif
my_thread_end();
pthread_exit(0);
DBUG_RETURN(0); // Can't return anything here
@@ -3721,7 +3758,7 @@ static int process_io_rotate(MASTER_INFO *mi, Rotate_log_event *rev)
rotate event forever, so we need to not disconnect after one.
*/
if (disconnect_slave_event_count)
- events_till_disconnect++;
+ mi->events_till_disconnect++;
#endif
/*
@@ -4177,7 +4214,7 @@ static int connect_to_master(THD* thd, MYSQL* mysql, MASTER_INFO* mi,
DBUG_ENTER("connect_to_master");
#ifndef DBUG_OFF
- events_till_disconnect = disconnect_slave_event_count;
+ mi->events_till_disconnect = disconnect_slave_event_count;
#endif
ulong client_flag= CLIENT_REMEMBER_OPTIONS;
if (opt_slave_compressed_protocol)
@@ -4311,6 +4348,10 @@ static int safe_reconnect(THD* thd, MYSQL* mysql, MASTER_INFO* mi,
bool flush_relay_log_info(RELAY_LOG_INFO* rli)
{
bool error=0;
+
+ if (unlikely(rli->no_storage))
+ return 0;
+
IO_CACHE *file = &rli->info_file;
char buff[FN_REFLEN*2+22*2+4], *pos;
@@ -4327,6 +4368,7 @@ bool flush_relay_log_info(RELAY_LOG_INFO* rli)
error=1;
if (flush_io_cache(file))
error=1;
+
/* Flushing the relay log is done by the slave I/O thread */
return error;
}
@@ -4357,7 +4399,7 @@ static IO_CACHE *reopen_relay_log(RELAY_LOG_INFO *rli, const char **errmsg)
}
-Log_event* next_event(RELAY_LOG_INFO* rli)
+static Log_event* next_event(RELAY_LOG_INFO* rli)
{
Log_event* ev;
IO_CACHE* cur_log = rli->cur_log;
@@ -4368,6 +4410,11 @@ Log_event* next_event(RELAY_LOG_INFO* rli)
DBUG_ENTER("next_event");
DBUG_ASSERT(thd != 0);
+#ifndef DBUG_OFF
+ if (abort_slave_event_count && !rli->events_till_abort--)
+ DBUG_RETURN(0);
+#endif
+
/*
For most operations we need to protect rli members with data_lock,
so we assume calling function acquired this mutex for us and we will
@@ -4489,7 +4536,7 @@ Log_event* next_event(RELAY_LOG_INFO* rli)
{
/* We generate and return a Rotate, to make our positions advance */
DBUG_PRINT("info",("seeing an ignored end segment"));
- ev= new Rotate_log_event(thd, rli->ign_master_log_name_end,
+ ev= new Rotate_log_event(rli->ign_master_log_name_end,
0, rli->ign_master_log_pos_end,
Rotate_log_event::DUP_NAME);
rli->ign_master_log_name_end[0]= 0;
@@ -4737,11 +4784,114 @@ end:
DBUG_VOID_RETURN;
}
+/*
+ Some system tables needed to be re-read by the MySQL server after it has
+ updated them; in statement-based replication, the GRANT and other commands
+ are sent verbatim to the slave which then reloads; in row-based replication,
+ changes to these tables are done through ordinary Rows binlog events, so
+ master must add some flag for the slave to know it has to reload the tables.
+*/
+struct st_reload_entry
+{
+ char const *table;
+ st_relay_log_info::enum_reload_flag flag;
+};
+
+/*
+ Sorted array of table names, please keep it sorted since we are
+ using bsearch() on it below.
+ */
+static st_reload_entry s_mysql_tables[] =
+{
+ { "columns_priv", st_relay_log_info::RELOAD_GRANT_F },
+ { "db", st_relay_log_info::RELOAD_ACCESS_F },
+ { "host", st_relay_log_info::RELOAD_ACCESS_F },
+ { "procs_priv", st_relay_log_info::RELOAD_GRANT_F },
+ { "tables_priv", st_relay_log_info::RELOAD_GRANT_F },
+ { "user", st_relay_log_info::RELOAD_ACCESS_F }
+};
+
+static const my_size_t s_mysql_tables_size =
+ sizeof(s_mysql_tables)/sizeof(*s_mysql_tables);
+
+static int reload_entry_compare(const void *lhs, const void *rhs)
+{
+ const char *lstr = static_cast<const char *>(lhs);
+ const char *rstr = static_cast<const st_reload_entry*>(rhs)->table;
+ return strcmp(lstr, rstr);
+}
+
+void st_relay_log_info::touching_table(char const* db, char const* table,
+ ulong table_id)
+{
+ if (strcmp(db,"mysql") == 0)
+ {
+#if defined(HAVE_BSEARCH) && defined(HAVE_SIZE_T)
+ void *const ptr= bsearch(table, s_mysql_tables,
+ s_mysql_tables_size,
+ sizeof(*s_mysql_tables), reload_entry_compare);
+ st_reload_entry const *const entry= static_cast<st_reload_entry*>(ptr);
+#else
+ /*
+ Fall back to full scan, there are few rows anyway and updating the
+ "mysql" database is rare.
+ */
+ st_reload_entry const *entry= s_mysql_tables;
+ for ( ; entry < s_mysql_tables + s_mysql_tables_size ; entry++)
+ if (reload_entry_compare(table, entry) == 0)
+ break;
+#endif
+ if (entry)
+ m_reload_flags|= entry->flag;
+ }
+}
+
+void st_relay_log_info::transaction_end(THD* thd)
+{
+ if (m_reload_flags != RELOAD_NONE_F)
+ {
+ if (m_reload_flags & RELOAD_ACCESS_F)
+ acl_reload(thd);
+
+ if (m_reload_flags & RELOAD_GRANT_F)
+ grant_reload(thd);
+
+ m_reload_flags= RELOAD_NONE_F;
+ }
+}
+
+#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION)
+void st_relay_log_info::cleanup_context(THD *thd, bool error)
+{
+ DBUG_ASSERT(sql_thd == thd);
+ /*
+ 1) Instances of Table_map_log_event, if ::exec_event() was called on them,
+ may have opened tables, which we cannot be sure have been closed (because
+ maybe the Rows_log_event have not been found or will not be, because slave
+ SQL thread is stopping, or relay log has a missing tail etc). So we close
+ all thread's tables. And so the table mappings have to be cancelled.
+ 2) Rows_log_event::exec_event() may even have started statements or
+ transactions on them, which we need to rollback in case of error.
+ 3) If finding a Format_description_log_event after a BEGIN, we also need
+ to rollback before continuing with the next events.
+ 4) so we need this "context cleanup" function.
+ */
+ if (error)
+ {
+ ha_autocommit_or_rollback(thd, 1); // if a "statement transaction"
+ end_trans(thd, ROLLBACK); // if a "real transaction"
+ }
+ m_table_map.clear_tables();
+ close_thread_tables(thd);
+ unsafe_to_stop_at= 0;
+}
+#endif
+
#ifdef HAVE_EXPLICIT_TEMPLATE_INSTANTIATION
template class I_List_iterator<i_string>;
template class I_List_iterator<i_string_pair>;
#endif
-
#endif /* HAVE_REPLICATION */
+
diff --git a/sql/slave.h b/sql/slave.h
index c994bfb2d34..6870aaca752 100644
--- a/sql/slave.h
+++ b/sql/slave.h
@@ -14,17 +14,19 @@
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
-#ifdef HAVE_REPLICATION
-
#ifndef SLAVE_H
#define SLAVE_H
-#include "mysql.h"
+#ifdef HAVE_REPLICATION
+
+#include "log.h"
#include "my_list.h"
#include "rpl_filter.h"
+#include "rpl_tblmap.h"
+#include "rpl_rli.h"
#define SLAVE_NET_TIMEOUT 3600
-#define MAX_SLAVE_ERRMSG 1024
+
#define MAX_SLAVE_ERROR 2000
/*****************************************************************************
@@ -110,265 +112,7 @@ struct st_master_info;
#define MYSQL_SLAVE_RUN_NOT_CONNECT 1
#define MYSQL_SLAVE_RUN_CONNECT 2
-/****************************************************************************
-
- Replication SQL Thread
-
- st_relay_log_info contains:
- - the current relay log
- - the current relay log offset
- - master log name
- - master log sequence corresponding to the last update
- - misc information specific to the SQL thread
-
- st_relay_log_info is initialized from the slave.info file if such exists.
- Otherwise, data members are intialized with defaults. The initialization is
- done with init_relay_log_info() call.
-
- The format of slave.info file:
-
- relay_log_name
- relay_log_pos
- master_log_name
- master_log_pos
-
- To clean up, call end_relay_log_info()
-
-*****************************************************************************/
-
-typedef struct st_relay_log_info
-{
- /*** The following variables can only be read when protect by data lock ****/
-
- /*
- info_fd - file descriptor of the info file. set only during
- initialization or clean up - safe to read anytime
- cur_log_fd - file descriptor of the current read relay log
- */
- File info_fd,cur_log_fd;
-
- /*
- Protected with internal locks.
- Must get data_lock when resetting the logs.
- */
- MYSQL_LOG relay_log;
- LOG_INFO linfo;
- IO_CACHE cache_buf,*cur_log;
-
- /* The following variables are safe to read any time */
-
- /* IO_CACHE of the info file - set only during init or end */
- IO_CACHE info_file;
-
- /*
- When we restart slave thread we need to have access to the previously
- created temporary tables. Modified only on init/end and by the SQL
- thread, read only by SQL thread.
- */
- TABLE *save_temporary_tables;
-
- /*
- standard lock acquistion order to avoid deadlocks:
- run_lock, data_lock, relay_log.LOCK_log, relay_log.LOCK_index
- */
- pthread_mutex_t data_lock,run_lock;
-
- /*
- start_cond is broadcast when SQL thread is started
- stop_cond - when stopped
- data_cond - when data protected by data_lock changes
- */
- pthread_cond_t start_cond, stop_cond, data_cond;
-
- /* parent master info structure */
- struct st_master_info *mi;
-
- /*
- Needed to deal properly with cur_log getting closed and re-opened with
- a different log under our feet
- */
- uint32 cur_log_old_open_count;
-
- /*
- Let's call a group (of events) :
- - a transaction
- or
- - an autocommiting query + its associated events (INSERT_ID,
- TIMESTAMP...)
- We need these rli coordinates :
- - relay log name and position of the beginning of the group we currently are
- executing. Needed to know where we have to restart when replication has
- stopped in the middle of a group (which has been rolled back by the slave).
- - relay log name and position just after the event we have just
- executed. This event is part of the current group.
- Formerly we only had the immediately above coordinates, plus a 'pending'
- variable, but this dealt wrong with the case of a transaction starting on a
- relay log and finishing (commiting) on another relay log. Case which can
- happen when, for example, the relay log gets rotated because of
- max_binlog_size.
- */
- char group_relay_log_name[FN_REFLEN];
- ulonglong group_relay_log_pos;
- char event_relay_log_name[FN_REFLEN];
- ulonglong event_relay_log_pos;
- ulonglong future_event_relay_log_pos;
-
- /*
- Original log name and position of the group we're currently executing
- (whose coordinates are group_relay_log_name/pos in the relay log)
- in the master's binlog. These concern the *group*, because in the master's
- binlog the log_pos that comes with each event is the position of the
- beginning of the group.
- */
- char group_master_log_name[FN_REFLEN];
- volatile my_off_t group_master_log_pos;
-
- /*
- Handling of the relay_log_space_limit optional constraint.
- ignore_log_space_limit is used to resolve a deadlock between I/O and SQL
- threads, the SQL thread sets it to unblock the I/O thread and make it
- temporarily forget about the constraint.
- */
- ulonglong log_space_limit,log_space_total;
- bool ignore_log_space_limit;
-
- /*
- When it commits, InnoDB internally stores the master log position it has
- processed so far; the position to store is the one of the end of the
- committing event (the COMMIT query event, or the event if in autocommit
- mode).
- */
-#if MYSQL_VERSION_ID < 40100
- ulonglong future_master_log_pos;
-#else
- ulonglong future_group_master_log_pos;
-#endif
-
- time_t last_master_timestamp;
-
- /*
- Needed for problems when slave stops and we want to restart it
- skipping one or more events in the master log that have caused
- errors, and have been manually applied by DBA already.
- */
- volatile uint32 slave_skip_counter;
- volatile ulong abort_pos_wait; /* Incremented on change master */
- volatile ulong slave_run_id; /* Incremented on slave start */
- pthread_mutex_t log_space_lock;
- pthread_cond_t log_space_cond;
- THD * sql_thd;
- int last_slave_errno;
-#ifndef DBUG_OFF
- int events_till_abort;
-#endif
- char last_slave_error[MAX_SLAVE_ERRMSG];
-
- /* if not set, the value of other members of the structure are undefined */
- bool inited;
- volatile bool abort_slave;
- volatile uint slave_running;
-
- /*
- Condition and its parameters from START SLAVE UNTIL clause.
-
- UNTIL condition is tested with is_until_satisfied() method that is
- called by exec_relay_log_event(). is_until_satisfied() caches the result
- of the comparison of log names because log names don't change very often;
- this cache is invalidated by parts of code which change log names with
- notify_*_log_name_updated() methods. (They need to be called only if SQL
- thread is running).
- */
-
- enum {UNTIL_NONE= 0, UNTIL_MASTER_POS, UNTIL_RELAY_POS} until_condition;
- char until_log_name[FN_REFLEN];
- ulonglong until_log_pos;
- /* extension extracted from log_name and converted to int */
- ulong until_log_name_extension;
- /*
- Cached result of comparison of until_log_name and current log name
- -2 means unitialised, -1,0,1 are comarison results
- */
- enum
- {
- UNTIL_LOG_NAMES_CMP_UNKNOWN= -2, UNTIL_LOG_NAMES_CMP_LESS= -1,
- UNTIL_LOG_NAMES_CMP_EQUAL= 0, UNTIL_LOG_NAMES_CMP_GREATER= 1
- } until_log_names_cmp_result;
-
- char cached_charset[6];
- /*
- trans_retries varies between 0 to slave_transaction_retries and counts how
- many times the slave has retried the present transaction; gets reset to 0
- when the transaction finally succeeds. retried_trans is a cumulative
- counter: how many times the slave has retried a transaction (any) since
- slave started.
- */
- ulong trans_retries, retried_trans;
-
- /*
- If the end of the hot relay log is made of master's events ignored by the
- slave I/O thread, these two keep track of the coords (in the master's
- binlog) of the last of these events seen by the slave I/O thread. If not,
- ign_master_log_name_end[0] == 0.
- As they are like a Rotate event read/written from/to the relay log, they
- are both protected by rli->relay_log.LOCK_log.
- */
- char ign_master_log_name_end[FN_REFLEN];
- ulonglong ign_master_log_pos_end;
-
- st_relay_log_info();
- ~st_relay_log_info();
-
- /*
- Invalidate cached until_log_name and group_relay_log_name comparison
- result. Should be called after any update of group_realy_log_name if
- there chances that sql_thread is running.
- */
- inline void notify_group_relay_log_name_update()
- {
- if (until_condition==UNTIL_RELAY_POS)
- until_log_names_cmp_result= UNTIL_LOG_NAMES_CMP_UNKNOWN;
- }
-
- /*
- The same as previous but for group_master_log_name.
- */
- inline void notify_group_master_log_name_update()
- {
- if (until_condition==UNTIL_MASTER_POS)
- until_log_names_cmp_result= UNTIL_LOG_NAMES_CMP_UNKNOWN;
- }
-
- inline void inc_event_relay_log_pos()
- {
- event_relay_log_pos= future_event_relay_log_pos;
- }
-
- void inc_group_relay_log_pos(ulonglong log_pos,
- bool skip_lock=0);
-
- int wait_for_pos(THD* thd, String* log_name, longlong log_pos,
- longlong timeout);
- void close_temporary_tables();
-
- /* Check if UNTIL condition is satisfied. See slave.cc for more. */
- bool is_until_satisfied();
- inline ulonglong until_pos()
- {
- return ((until_condition == UNTIL_MASTER_POS) ? group_master_log_pos :
- group_relay_log_pos);
- }
- /*
- Last charset (6 bytes) seen by slave SQL thread is cached here; it helps
- the thread save 3 get_charset() per Query_log_event if the charset is not
- changing from event to event (common situation).
- When the 6 bytes are equal to 0 is used to mean "cache is invalidated".
- */
- void cached_charset_invalidate();
- bool cached_charset_compare(char *charset);
-} RELAY_LOG_INFO;
-
-
-Log_event* next_event(RELAY_LOG_INFO* rli);
+static Log_event* next_event(RELAY_LOG_INFO* rli);
/*****************************************************************************
@@ -427,7 +171,7 @@ typedef struct st_master_info
uint port;
uint connect_retry;
#ifndef DBUG_OFF
- int events_till_abort;
+ int events_till_disconnect;
#endif
bool inited;
volatile bool abort_slave;
@@ -474,17 +218,11 @@ typedef struct st_master_info
int queue_event(MASTER_INFO* mi,const char* buf,ulong event_len);
-#define MAX_SLAVE_ERRMSG 1024
-
#define RPL_LOG_NAME (rli->group_master_log_name[0] ? rli->group_master_log_name :\
"FIRST")
#define IO_RPL_LOG_NAME (mi->master_log_name[0] ? mi->master_log_name :\
"FIRST")
-/* masks for start/stop operations on io and sql slave threads */
-#define SLAVE_IO 1
-#define SLAVE_SQL 2
-
/*
If the following is set, if first gives an error, second will be
tried. Otherwise, if first fails, we fail.
@@ -533,7 +271,8 @@ bool show_binlog_info(THD* thd);
const char *print_slave_db_safe(const char *db);
int check_expected_error(THD* thd, RELAY_LOG_INFO* rli, int error_code);
void skip_load_data_infile(NET* net);
-void slave_print_error(RELAY_LOG_INFO* rli, int err_code, const char* msg, ...);
+void slave_print_msg(enum loglevel level, RELAY_LOG_INFO* rli,
+ int err_code, const char* msg, ...);
void end_slave(); /* clean up */
void init_master_info_with_options(MASTER_INFO* mi);
@@ -579,8 +318,12 @@ extern my_string master_ssl_ca, master_ssl_capath, master_ssl_cert,
extern I_List<THD> threads;
-#endif
-#else
+#endif /* HAVE_REPLICATION */
+
+/* masks for start/stop operations on io and sql slave threads */
#define SLAVE_IO 1
#define SLAVE_SQL 2
-#endif /* HAVE_REPLICATION */
+
+#endif
+
+
diff --git a/sql/sp.cc b/sql/sp.cc
index 81164131910..c85c1f2afef 100644
--- a/sql/sp.cc
+++ b/sql/sp.cc
@@ -585,14 +585,14 @@ db_create_routine(THD *thd, int type, sp_head *sp)
}
ret= SP_OK;
- if (table->file->write_row(table->record[0]))
+ if (table->file->ha_write_row(table->record[0]))
ret= SP_WRITE_ROW_FAILED;
else if (mysql_bin_log.is_open())
{
thd->clear_error();
/* Such a statement can always go directly to binlog, no trans cache */
- Query_log_event qinfo(thd, thd->query, thd->query_length, 0, FALSE);
- mysql_bin_log.write(&qinfo);
+ thd->binlog_query(THD::MYSQL_QUERY_TYPE,
+ thd->query, thd->query_length, FALSE, FALSE);
}
}
@@ -618,7 +618,7 @@ db_drop_routine(THD *thd, int type, sp_name *name)
DBUG_RETURN(SP_OPEN_TABLE_FAILED);
if ((ret= db_find_routine_aux(thd, type, name, table)) == SP_OK)
{
- if (table->file->delete_row(table->record[0]))
+ if (table->file->ha_delete_row(table->record[0]))
ret= SP_DELETE_ROW_FAILED;
}
close_thread_tables(thd);
@@ -653,7 +653,7 @@ db_update_routine(THD *thd, int type, sp_name *name, st_sp_chistics *chistics)
table->field[MYSQL_PROC_FIELD_COMMENT]->store(chistics->comment.str,
chistics->comment.length,
system_charset_info);
- if ((table->file->update_row(table->record[1],table->record[0])))
+ if ((table->file->ha_update_row(table->record[1],table->record[0])))
ret= SP_WRITE_ROW_FAILED;
}
close_thread_tables(thd);
@@ -873,7 +873,7 @@ sp_drop_db_routines(THD *thd, char *db)
do
{
- if (! table->file->delete_row(table->record[0]))
+ if (! table->file->ha_delete_row(table->record[0]))
deleted= TRUE; /* We deleted something */
else
{
diff --git a/sql/sp_head.cc b/sql/sp_head.cc
index 4a252fc4d86..63d1388473e 100644
--- a/sql/sp_head.cc
+++ b/sql/sp_head.cc
@@ -695,6 +695,9 @@ int cmp_splocal_locations(Item_splocal * const *a, Item_splocal * const *b)
/*
StoredRoutinesBinlogging
+ This paragraph applies only to statement-based binlogging. Row-based
+ binlogging does not need anything special like this.
+
Top-down overview:
1. Statements
@@ -1258,56 +1261,62 @@ sp_head::execute_function(THD *thd, Item **argp, uint argcount,
thd->spcont= nctx;
- binlog_save_options= thd->options;
- need_binlog_call= mysql_bin_log.is_open() && (thd->options & OPTION_BIN_LOG);
+ /*
+ If row-based binlogging, we don't need to binlog the function's call, let
+ each substatement be binlogged its way.
+ */
+ need_binlog_call= mysql_bin_log.is_open() &&
+ (thd->options & OPTION_BIN_LOG) && !binlog_row_based;
if (need_binlog_call)
{
reset_dynamic(&thd->user_var_events);
mysql_bin_log.start_union_events(thd);
+ binlog_save_options= thd->options;
+ thd->options&= ~OPTION_BIN_LOG;
}
-
- thd->options&= ~OPTION_BIN_LOG;
+
err_status= execute(thd);
- thd->options= binlog_save_options;
-
- if (need_binlog_call)
- mysql_bin_log.stop_union_events(thd);
- if (need_binlog_call && thd->binlog_evt_union.unioned_events)
+ if (need_binlog_call)
{
- char buf[256];
- String bufstr(buf, sizeof(buf), &my_charset_bin);
- bufstr.length(0);
- bufstr.append(STRING_WITH_LEN("DO "));
- append_identifier(thd, &bufstr, m_name.str, m_name.length);
- bufstr.append('(');
- for (uint i=0; i < argcount; i++)
- {
- String str_value_holder;
- String *str_value;
-
- if (i)
- bufstr.append(',');
-
- str_value= sp_get_item_value(param_values[i], &str_value_holder);
-
- if (str_value)
- bufstr.append(*str_value);
- else
- bufstr.append(STRING_WITH_LEN("NULL"));
- }
- bufstr.append(')');
-
- Query_log_event qinfo(thd, bufstr.ptr(), bufstr.length(),
- thd->binlog_evt_union.unioned_events_trans, FALSE);
- if (mysql_bin_log.write(&qinfo) &&
- thd->binlog_evt_union.unioned_events_trans)
+ mysql_bin_log.stop_union_events(thd);
+ thd->options= binlog_save_options;
+ if (thd->binlog_evt_union.unioned_events)
{
- push_warning(thd, MYSQL_ERROR::WARN_LEVEL_WARN, ER_UNKNOWN_ERROR,
- "Invoked ROUTINE modified a transactional table but MySQL "
- "failed to reflect this change in the binary log");
+ char buf[256];
+ String bufstr(buf, sizeof(buf), &my_charset_bin);
+ bufstr.length(0);
+ bufstr.append(STRING_WITH_LEN("DO "));
+ append_identifier(thd, &bufstr, m_name.str, m_name.length);
+ bufstr.append('(');
+ for (uint i=0; i < argcount; i++)
+ {
+ String str_value_holder;
+ String *str_value;
+
+ if (i)
+ bufstr.append(',');
+
+ str_value= sp_get_item_value(param_values[i], &str_value_holder);
+
+ if (str_value)
+ bufstr.append(*str_value);
+ else
+ bufstr.append(STRING_WITH_LEN("NULL"));
+ }
+ bufstr.append(')');
+
+ Query_log_event qinfo(thd, bufstr.ptr(), bufstr.length(),
+ thd->binlog_evt_union.unioned_events_trans, FALSE);
+ if (mysql_bin_log.write(&qinfo) &&
+ thd->binlog_evt_union.unioned_events_trans)
+ {
+ push_warning(thd, MYSQL_ERROR::WARN_LEVEL_WARN, ER_UNKNOWN_ERROR,
+ "Invoked ROUTINE modified a transactional table but MySQL "
+ "failed to reflect this change in the binary log");
+ }
+ reset_dynamic(&thd->user_var_events);
}
- reset_dynamic(&thd->user_var_events);
}
if (m_type == TYPE_ENUM_FUNCTION && !err_status)
diff --git a/sql/sql_acl.cc b/sql/sql_acl.cc
index 5ee7bf8fd58..42e99bbd1bc 100644
--- a/sql/sql_acl.cc
+++ b/sql/sql_acl.cc
@@ -1468,8 +1468,7 @@ bool change_password(THD *thd, const char *host, const char *user,
acl_user->host.hostname ? acl_user->host.hostname : "",
new_password));
thd->clear_error();
- Query_log_event qinfo(thd, buff, query_length, 0, FALSE);
- mysql_bin_log.write(&qinfo);
+ thd->binlog_query(THD::MYSQL_QUERY_TYPE, buff, query_length, FALSE, FALSE);
}
end:
close_thread_tables(thd);
@@ -1654,7 +1653,7 @@ static bool update_user_table(THD *thd, TABLE *table,
}
store_record(table,record[1]);
table->field[2]->store(new_password, new_password_len, system_charset_info);
- if ((error=table->file->update_row(table->record[1],table->record[0])))
+ if ((error=table->file->ha_update_row(table->record[1],table->record[0])))
{
table->file->print_error(error,MYF(0)); /* purecov: deadcode */
DBUG_RETURN(1);
@@ -1871,14 +1870,14 @@ static int replace_user_table(THD *thd, TABLE *table, const LEX_USER &combo,
*/
table->file->ha_retrieve_all_cols();
if (cmp_record(table,record[1]) &&
- (error=table->file->update_row(table->record[1],table->record[0])))
+ (error=table->file->ha_update_row(table->record[1],table->record[0])))
{ // This should never happen
table->file->print_error(error,MYF(0)); /* purecov: deadcode */
error= -1; /* purecov: deadcode */
goto end; /* purecov: deadcode */
}
}
- else if ((error=table->file->write_row(table->record[0]))) // insert
+ else if ((error=table->file->ha_write_row(table->record[0]))) // insert
{ // This should never happen
if (error && error != HA_ERR_FOUND_DUPP_KEY &&
error != HA_ERR_FOUND_DUPP_UNIQUE) /* purecov: inspected */
@@ -1988,16 +1987,17 @@ static int replace_db_table(TABLE *table, const char *db,
if (rights)
{
table->file->ha_retrieve_all_cols();
- if ((error=table->file->update_row(table->record[1],table->record[0])))
+ if ((error=table->file->ha_update_row(table->record[1],
+ table->record[0])))
goto table_error; /* purecov: deadcode */
}
else /* must have been a revoke of all privileges */
{
- if ((error = table->file->delete_row(table->record[1])))
+ if ((error = table->file->ha_delete_row(table->record[1])))
goto table_error; /* purecov: deadcode */
}
}
- else if (rights && (error=table->file->write_row(table->record[0])))
+ else if (rights && (error=table->file->ha_write_row(table->record[0])))
{
if (error && error != HA_ERR_FOUND_DUPP_KEY) /* purecov: inspected */
goto table_error; /* purecov: deadcode */
@@ -2365,9 +2365,9 @@ static int replace_column_table(GRANT_TABLE *g_t,
{
GRANT_COLUMN *grant_column;
if (privileges)
- error=table->file->update_row(table->record[1],table->record[0]);
+ error=table->file->ha_update_row(table->record[1],table->record[0]);
else
- error=table->file->delete_row(table->record[1]);
+ error=table->file->ha_delete_row(table->record[1]);
if (error)
{
table->file->print_error(error,MYF(0)); /* purecov: inspected */
@@ -2382,7 +2382,7 @@ static int replace_column_table(GRANT_TABLE *g_t,
else // new grant
{
GRANT_COLUMN *grant_column;
- if ((error=table->file->write_row(table->record[0])))
+ if ((error=table->file->ha_write_row(table->record[0])))
{
table->file->print_error(error,MYF(0)); /* purecov: inspected */
result= -1; /* purecov: inspected */
@@ -2434,8 +2434,8 @@ static int replace_column_table(GRANT_TABLE *g_t,
if (privileges)
{
int tmp_error;
- if ((tmp_error=table->file->update_row(table->record[1],
- table->record[0])))
+ if ((tmp_error=table->file->ha_update_row(table->record[1],
+ table->record[0])))
{ /* purecov: deadcode */
table->file->print_error(tmp_error,MYF(0)); /* purecov: deadcode */
result= -1; /* purecov: deadcode */
@@ -2447,7 +2447,7 @@ static int replace_column_table(GRANT_TABLE *g_t,
else
{
int tmp_error;
- if ((tmp_error = table->file->delete_row(table->record[1])))
+ if ((tmp_error = table->file->ha_delete_row(table->record[1])))
{ /* purecov: deadcode */
table->file->print_error(tmp_error,MYF(0)); /* purecov: deadcode */
result= -1; /* purecov: deadcode */
@@ -2555,15 +2555,15 @@ static int replace_table_table(THD *thd, GRANT_TABLE *grant_table,
{
if (store_table_rights || store_col_rights)
{
- if ((error=table->file->update_row(table->record[1],table->record[0])))
+ if ((error=table->file->ha_update_row(table->record[1],table->record[0])))
goto table_error; /* purecov: deadcode */
}
- else if ((error = table->file->delete_row(table->record[1])))
+ else if ((error = table->file->ha_delete_row(table->record[1])))
goto table_error; /* purecov: deadcode */
}
else
{
- error=table->file->write_row(table->record[0]);
+ error=table->file->ha_write_row(table->record[0]);
if (error && error != HA_ERR_FOUND_DUPP_KEY)
goto table_error; /* purecov: deadcode */
}
@@ -2672,15 +2672,15 @@ static int replace_routine_table(THD *thd, GRANT_NAME *grant_name,
{
if (store_proc_rights)
{
- if ((error=table->file->update_row(table->record[1],table->record[0])))
+ if ((error=table->file->ha_update_row(table->record[1],table->record[0])))
goto table_error;
}
- else if ((error= table->file->delete_row(table->record[1])))
+ else if ((error= table->file->ha_delete_row(table->record[1])))
goto table_error;
}
else
{
- error=table->file->write_row(table->record[0]);
+ error=table->file->ha_write_row(table->record[0]);
if (error && error != HA_ERR_FOUND_DUPP_KEY)
goto table_error;
}
@@ -3119,6 +3119,16 @@ bool mysql_routine_grant(THD *thd, TABLE_LIST *table_list, bool is_proc,
}
grant_option=TRUE;
thd->mem_root= old_root;
+ /*
+ This flush is here only becuase there is code that writes rows to
+ system tables after executing a binlog_query().
+
+ TODO: Ensure that no writes are executed after a binlog_query() by
+ moving the writes to before calling binlog_query(). Then remove
+ this line (and add an assert inside send_ok() that checks that
+ everything is in a consistent state).
+ */
+ thd->binlog_flush_pending_rows_event(true);
rw_unlock(&LOCK_grant);
if (!result && !no_error)
send_ok(thd);
@@ -4670,13 +4680,13 @@ static int modify_grant_table(TABLE *table, Field *host_field,
system_charset_info);
user_field->store(user_to->user.str, user_to->user.length,
system_charset_info);
- if ((error= table->file->update_row(table->record[1], table->record[0])))
+ if ((error= table->file->ha_update_row(table->record[1], table->record[0])))
table->file->print_error(error, MYF(0));
}
else
{
/* delete */
- if ((error=table->file->delete_row(table->record[0])))
+ if ((error=table->file->ha_delete_row(table->record[0])))
table->file->print_error(error, MYF(0));
}
@@ -5683,7 +5693,7 @@ void update_schema_privilege(TABLE *table, char *buff, const char* db,
table->field[i++]->store(column, col_length, cs);
table->field[i++]->store(priv, priv_length, cs);
table->field[i]->store(is_grantable, strlen(is_grantable), cs);
- table->file->write_row(table->record[0]);
+ table->file->ha_write_row(table->record[0]);
}
diff --git a/sql/sql_base.cc b/sql/sql_base.cc
index 8f0f0d779e4..b9cd1afcd26 100644
--- a/sql/sql_base.cc
+++ b/sql/sql_base.cc
@@ -1029,6 +1029,19 @@ void close_thread_tables(THD *thd, bool lock_in_use, bool skip_derived)
/* Fallthrough */
}
+ /*
+ For RBR: before calling close_thread_tables(), storage engines
+ should autocommit. Hence if there is a a pending event, it belongs
+ to a non-transactional engine, which writes directly to the table,
+ and should therefore be flushed before unlocking and closing the
+ tables. The test above for locked tables will not be triggered
+ since RBR locks and unlocks tables on a per-event basis.
+
+ TODO (WL#3023): Change the semantics so that RBR does not lock and
+ unlock tables on a per-event basis.
+ */
+ thd->binlog_flush_pending_rows_event(true);
+
if (thd->lock)
{
mysql_unlock_tables(thd, thd->lock);
@@ -1171,7 +1184,8 @@ void close_temporary_tables(THD *thd)
next=table->next;
close_temporary(table, 1, 1);
}
- if (query && found_user_tables && mysql_bin_log.is_open())
+ if (query && found_user_tables && mysql_bin_log.is_open() &&
+ !binlog_row_based) // CREATE TEMP TABLE not binlogged if row-based
{
/* The -1 is to remove last ',' */
thd->clear_error();
@@ -2038,6 +2052,8 @@ static bool reopen_table(TABLE *table)
tmp.keys_in_use_for_query= tmp.s->keys_in_use;
tmp.used_keys= tmp.s->keys_for_keyread;
+ tmp.s->table_map_id= table->s->table_map_id;
+
/* Get state */
tmp.in_use= thd;
tmp.reginfo.lock_type=table->reginfo.lock_type;
@@ -2343,6 +2359,48 @@ void abort_locked_tables(THD *thd,const char *db, const char *table_name)
/*
+ Function to assign a new table map id to a table.
+
+ PARAMETERS
+
+ table - Pointer to table structure
+
+ PRE-CONDITION(S)
+
+ table is non-NULL
+ The LOCK_open mutex is locked
+
+ POST-CONDITION(S)
+
+ table->s->table_map_id is given a value that with a high certainty
+ is not used by any other table.
+
+ table->s->table_map_id is not ULONG_MAX.
+ */
+static void assign_new_table_id(TABLE *table)
+{
+ static ulong last_table_id= ULONG_MAX;
+
+ DBUG_ENTER("assign_new_table_id(TABLE*)");
+
+ /* Preconditions */
+ DBUG_ASSERT(table != NULL);
+ safe_mutex_assert_owner(&LOCK_open);
+
+ ulong tid= ++last_table_id; /* get next id */
+ /* There is one reserved number that cannot be used. */
+ if (unlikely(tid == ULONG_MAX))
+ tid= ++last_table_id;
+ table->s->table_map_id= tid;
+ DBUG_PRINT("info", ("table_id=%lu", tid));
+
+ /* Post conditions */
+ DBUG_ASSERT(table->s->table_map_id != ULONG_MAX);
+
+ DBUG_VOID_RETURN;
+}
+
+/*
Load a table definition from file and open unireg table
SYNOPSIS
@@ -2490,7 +2548,21 @@ retry:
goto err;
break;
}
-
+
+ /*
+ We assign a new table id under the protection of the LOCK_open
+ mutex. We assign a new table id here instead of inside openfrm()
+ since that function can be used without acquiring any lock (e.g.,
+ inside ha_create_table()). Insted of creatint a new mutex and
+ using it for the sole purpose of serializing accesses to a static
+ variable, we assign the table id here.
+
+ CAVEAT. This means that the table cannot be used for
+ binlogging/replication purposes, unless open_table() has been called
+ directly or indirectly.
+ */
+ assign_new_table_id(entry);
+
if (Table_triggers_list::check_n_load(thd, share->db.str,
share->table_name.str, entry, 0))
{
@@ -2511,10 +2583,11 @@ retry:
uint query_buf_size= 20 + share->db.length + share->table_name.length +1;
if ((query= (char*) my_malloc(query_buf_size,MYF(MY_WME))))
{
+ /* this DELETE FROM is needed even with row-based binlogging */
end = strxmov(strmov(query, "DELETE FROM `"),
share->db.str,"`.`",share->table_name.str,"`", NullS);
- Query_log_event qinfo(thd, query, (ulong)(end-query), 0, FALSE);
- mysql_bin_log.write(&qinfo);
+ thd->binlog_query(THD::STMT_QUERY_TYPE,
+ query, (ulong)(end-query), FALSE, FALSE);
my_free(query, MYF(0));
}
else
diff --git a/sql/sql_binlog.cc b/sql/sql_binlog.cc
new file mode 100644
index 00000000000..cc0e9714d85
--- /dev/null
+++ b/sql/sql_binlog.cc
@@ -0,0 +1,135 @@
+/* Copyright (C) 2005 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#include "mysql_priv.h"
+#include "base64.h"
+
+/*
+ Execute a BINLOG statement
+
+ TODO: This currently assumes a MySQL 5.x binlog.
+ When we'll have binlog with a different format, to execute the
+ BINLOG command properly the server will need to know which format
+ the BINLOG command's event is in. mysqlbinlog should then send
+ the Format_description_log_event of the binlog it reads and the
+ server thread should cache this format into
+ rli->description_event_for_exec.
+*/
+
+void mysql_client_binlog_statement(THD* thd)
+{
+ DBUG_PRINT("info",("binlog base64: '%*s'",
+ (thd->lex->comment.length < 2048 ?
+ thd->lex->comment.length : 2048),
+ thd->lex->comment.str));
+
+ /*
+ Temporarily turn off send_ok, since different events handle this
+ differently
+ */
+ my_bool nsok= thd->net.no_send_ok;
+ thd->net.no_send_ok= TRUE;
+
+ const my_size_t coded_len= thd->lex->comment.length + 1;
+ const my_size_t event_len= base64_needed_decoded_length(coded_len);
+ DBUG_ASSERT(coded_len > 0);
+
+ /*
+ Allocation
+ */
+ if (!thd->rli_fake)
+ thd->rli_fake= new RELAY_LOG_INFO;
+
+ const Format_description_log_event *desc=
+ new Format_description_log_event(4);
+
+ const char *error= 0;
+ char *buf= (char *) my_malloc(event_len, MYF(MY_WME));
+ Log_event *ev;
+ int res;
+
+ /*
+ Out of memory check
+ */
+ if (!(thd->rli_fake && desc && buf))
+ {
+ my_error(ER_OUTOFMEMORY, MYF(0), 1); /* needed 1 bytes */
+ goto end;
+ }
+
+ thd->rli_fake->sql_thd= thd;
+ thd->rli_fake->no_storage= TRUE;
+
+ res= base64_decode(thd->lex->comment.str, coded_len, buf);
+
+ DBUG_PRINT("info",("binlog base64 decoded_len=%d, event_len=%d\n",
+ res, uint4korr(buf + EVENT_LEN_OFFSET)));
+ /*
+ Note that 'res' is the correct event length, 'event_len' was
+ calculated based on the base64-string that possibly contained
+ extra spaces, so it can be longer than the real event.
+ */
+ if (res < EVENT_LEN_OFFSET
+ || (uint) res != uint4korr(buf+EVENT_LEN_OFFSET))
+ {
+ my_error(ER_SYNTAX_ERROR, MYF(0));
+ goto end;
+ }
+
+ ev= Log_event::read_log_event(buf, res, &error, desc);
+
+ DBUG_PRINT("info",("binlog base64 err=%s", error));
+ if (!ev)
+ {
+ /*
+ This could actually be an out-of-memory, but it is more
+ likely causes by a bad statement
+ */
+ my_error(ER_SYNTAX_ERROR, MYF(0));
+ goto end;
+ }
+
+ DBUG_PRINT("info",("ev->get_type_code()=%d", ev->get_type_code()));
+ DBUG_PRINT("info",("buf+EVENT_TYPE_OFFSET=%d", buf+EVENT_TYPE_OFFSET));
+
+ ev->thd= thd;
+ if (ev->exec_event(thd->rli_fake))
+ {
+ my_error(ER_UNKNOWN_ERROR, MYF(0), "Error executing BINLOG statement");
+ goto end;
+ }
+
+ /*
+ Restore setting of no_send_ok
+ */
+ thd->net.no_send_ok= nsok;
+
+ DBUG_PRINT("info",("binlog base64 execution finished successfully"));
+ send_ok(thd);
+
+end:
+ /*
+ Restore setting of no_send_ok
+ */
+ thd->net.no_send_ok= nsok;
+
+ if (ev)
+ delete ev;
+ if (desc)
+ delete desc;
+ if (buf)
+ my_free(buf, MYF(0));
+}
diff --git a/sql/sql_class.cc b/sql/sql_class.cc
index a28324c5e28..08d89228a72 100644
--- a/sql/sql_class.cc
+++ b/sql/sql_class.cc
@@ -27,6 +27,8 @@
#endif
#include "mysql_priv.h"
+#include <my_bitmap.h>
+#include "log_event.h"
#include <m_ctype.h>
#include <sys/stat.h>
#include <thr_alarm.h>
@@ -174,7 +176,7 @@ Open_tables_state::Open_tables_state(ulong version_arg)
THD::THD()
:Statement(CONVENTIONAL_EXECUTION, 0, ALLOC_ROOT_MIN_BLOCK_SIZE, 0),
- Open_tables_state(refresh_version),
+ Open_tables_state(refresh_version), rli_fake(0),
lock_id(&main_lock_id),
user_time(0), in_sub_stmt(0), global_read_lock(0), is_fatal_error(0),
rand_used(0), time_zone_used(0),
@@ -227,6 +229,9 @@ THD::THD()
ull=0;
system_thread= cleanup_done= abort_on_warning= no_warnings_for_error= 0;
peer_port= 0; // For SHOW PROCESSLIST
+#ifdef HAVE_ROW_BASED_REPLICATION
+ transaction.m_pending_rows_event= 0;
+#endif
#ifdef __WIN__
real_id = 0;
#endif
@@ -440,6 +445,11 @@ THD::~THD()
#ifndef DBUG_OFF
dbug_sentry= THD_SENTRY_GONE;
#endif
+#ifndef EMBEDDED_LIBRARY
+ if (rli_fake)
+ delete rli_fake;
+#endif
+
DBUG_VOID_RETURN;
}
@@ -1959,7 +1969,8 @@ void THD::reset_sub_statement_state(Sub_statement_state *backup,
backup->client_capabilities= client_capabilities;
backup->savepoints= transaction.savepoints;
- if (!lex->requires_prelocking() || is_update_query(lex->sql_command))
+ if ((!lex->requires_prelocking() || is_update_query(lex->sql_command)) &&
+ !binlog_row_based)
options&= ~OPTION_BIN_LOG;
/* Disable result sets */
client_capabilities &= ~CLIENT_MULTI_RESULTS;
@@ -2101,3 +2112,439 @@ void xid_cache_delete(XID_STATE *xid_state)
pthread_mutex_unlock(&LOCK_xid_cache);
}
+/*
+ Implementation of interface to write rows to the binary log through the
+ thread. The thread is responsible for writing the rows it has
+ inserted/updated/deleted.
+*/
+
+#ifndef MYSQL_CLIENT
+#ifdef HAVE_ROW_BASED_REPLICATION
+
+/*
+ Template member function for ensuring that there is an rows log
+ event of the apropriate type before proceeding.
+
+ PRE CONDITION:
+ - Events of type 'RowEventT' have the type code 'type_code'.
+
+ POST CONDITION:
+ If a non-NULL pointer is returned, the pending event for thread 'thd' will
+ be an event of type 'RowEventT' (which have the type code 'type_code')
+ will either empty or have enough space to hold 'needed' bytes. In
+ addition, the columns bitmap will be correct for the row, meaning that
+ the pending event will be flushed if the columns in the event differ from
+ the columns suppled to the function.
+
+ RETURNS
+ If no error, a non-NULL pending event (either one which already existed or
+ the newly created one).
+ If error, NULL.
+ */
+
+template <class RowsEventT> Rows_log_event*
+THD::binlog_prepare_pending_rows_event(TABLE* table, uint32 serv_id,
+ MY_BITMAP const* cols,
+ my_size_t colcnt,
+ my_size_t needed,
+ bool is_transactional)
+{
+ /* Pre-conditions */
+ DBUG_ASSERT(table->s->table_map_id != ULONG_MAX);
+
+ /* Fetch the type code for the RowsEventT template parameter */
+ int const type_code= RowsEventT::TYPE_CODE;
+
+ /*
+ There is no good place to set up the transactional data, so we
+ have to do it here.
+ */
+ if (binlog_setup_trx_data())
+ return NULL;
+
+ Rows_log_event* pending= binlog_get_pending_rows_event();
+
+ if (unlikely(pending && !pending->is_valid()))
+ return NULL;
+
+ /*
+ Check if the current event is non-NULL and a write-rows
+ event. Also check if the table provided is mapped: if it is not,
+ then we have switched to writing to a new table.
+ If there is no pending event, we need to create one. If there is a pending
+ event, but it's not about the same table id, or not of the same type
+ (between Write, Update and Delete), or not the same affected columns, or
+ going to be too big, flush this event to disk and create a new pending
+ event.
+ */
+ if (!pending ||
+ pending->server_id != serv_id ||
+ pending->get_table_id() != table->s->table_map_id ||
+ pending->get_type_code() != type_code ||
+ pending->get_data_size() + needed > opt_binlog_rows_event_max_size ||
+ pending->get_width() != colcnt ||
+ !bitmap_cmp(pending->get_cols(), cols))
+ {
+ /* Create a new RowsEventT... */
+ Rows_log_event* const
+ ev= new RowsEventT(this, table, table->s->table_map_id, cols,
+ is_transactional);
+ if (unlikely(!ev))
+ return NULL;
+ ev->server_id= serv_id; // I don't like this, it's too easy to forget.
+ /*
+ flush the pending event and replace it with the newly created
+ event...
+ */
+ if (unlikely(mysql_bin_log.flush_and_set_pending_rows_event(this, ev)))
+ {
+ delete ev;
+ return NULL;
+ }
+
+ return ev; /* This is the new pending event */
+ }
+ return pending; /* This is the current pending event */
+}
+
+/*
+ Instansiate the versions we need, we have -fno-implicit-template as
+ compiling option.
+*/
+template Rows_log_event*
+THD::binlog_prepare_pending_rows_event<Write_rows_log_event>
+(TABLE*, uint32, MY_BITMAP const*, my_size_t colcnt, size_t, bool);
+
+template Rows_log_event*
+THD::binlog_prepare_pending_rows_event<Delete_rows_log_event>
+(TABLE*, uint32, MY_BITMAP const*, my_size_t colcnt, size_t, bool);
+
+template Rows_log_event*
+THD::binlog_prepare_pending_rows_event<Update_rows_log_event>
+(TABLE*, uint32, MY_BITMAP const*, my_size_t colcnt, size_t, bool);
+
+static char const*
+field_type_name(enum_field_types type)
+{
+ switch (type)
+ {
+ case MYSQL_TYPE_DECIMAL:
+ return "MYSQL_TYPE_DECIMAL";
+ case MYSQL_TYPE_TINY:
+ return "MYSQL_TYPE_TINY";
+ case MYSQL_TYPE_SHORT:
+ return "MYSQL_TYPE_SHORT";
+ case MYSQL_TYPE_LONG:
+ return "MYSQL_TYPE_LONG";
+ case MYSQL_TYPE_FLOAT:
+ return "MYSQL_TYPE_FLOAT";
+ case MYSQL_TYPE_DOUBLE:
+ return "MYSQL_TYPE_DOUBLE";
+ case MYSQL_TYPE_NULL:
+ return "MYSQL_TYPE_NULL";
+ case MYSQL_TYPE_TIMESTAMP:
+ return "MYSQL_TYPE_TIMESTAMP";
+ case MYSQL_TYPE_LONGLONG:
+ return "MYSQL_TYPE_LONGLONG";
+ case MYSQL_TYPE_INT24:
+ return "MYSQL_TYPE_INT24";
+ case MYSQL_TYPE_DATE:
+ return "MYSQL_TYPE_DATE";
+ case MYSQL_TYPE_TIME:
+ return "MYSQL_TYPE_TIME";
+ case MYSQL_TYPE_DATETIME:
+ return "MYSQL_TYPE_DATETIME";
+ case MYSQL_TYPE_YEAR:
+ return "MYSQL_TYPE_YEAR";
+ case MYSQL_TYPE_NEWDATE:
+ return "MYSQL_TYPE_NEWDATE";
+ case MYSQL_TYPE_VARCHAR:
+ return "MYSQL_TYPE_VARCHAR";
+ case MYSQL_TYPE_BIT:
+ return "MYSQL_TYPE_BIT";
+ case MYSQL_TYPE_NEWDECIMAL:
+ return "MYSQL_TYPE_NEWDECIMAL";
+ case MYSQL_TYPE_ENUM:
+ return "MYSQL_TYPE_ENUM";
+ case MYSQL_TYPE_SET:
+ return "MYSQL_TYPE_SET";
+ case MYSQL_TYPE_TINY_BLOB:
+ return "MYSQL_TYPE_TINY_BLOB";
+ case MYSQL_TYPE_MEDIUM_BLOB:
+ return "MYSQL_TYPE_MEDIUM_BLOB";
+ case MYSQL_TYPE_LONG_BLOB:
+ return "MYSQL_TYPE_LONG_BLOB";
+ case MYSQL_TYPE_BLOB:
+ return "MYSQL_TYPE_BLOB";
+ case MYSQL_TYPE_VAR_STRING:
+ return "MYSQL_TYPE_VAR_STRING";
+ case MYSQL_TYPE_STRING:
+ return "MYSQL_TYPE_STRING";
+ case MYSQL_TYPE_GEOMETRY:
+ return "MYSQL_TYPE_GEOMETRY";
+ }
+ return "Unknown";
+}
+
+my_size_t THD::max_row_length_blob(TABLE *table, const byte *data) const
+{
+ my_size_t length= 0;
+ TABLE_SHARE *table_s= table->s;
+ uint* const beg= table_s->blob_field;
+ uint* const end= beg + table_s->blob_fields;
+
+ for (uint *ptr= beg ; ptr != end ; ++ptr)
+ {
+ Field_blob* const blob= (Field_blob*) table->field[*ptr];
+ length+= blob->get_length(data + blob->offset()) + 2;
+ }
+
+ return length;
+}
+
+my_size_t THD::pack_row(TABLE *table, MY_BITMAP const* cols, byte *row_data,
+ const byte *record) const
+{
+ Field **p_field= table->field, *field= *p_field;
+ int n_null_bytes= table->s->null_bytes;
+ my_ptrdiff_t const offset= record - (byte*) table->record[0];
+
+ memcpy(row_data, record, n_null_bytes);
+ byte *ptr= row_data+n_null_bytes;
+
+ for (int i= 0 ; field ; i++, p_field++, field= *p_field)
+ {
+ if (bitmap_is_set(cols,i))
+ ptr= field->pack(ptr, field->ptr + offset);
+ }
+
+ /*
+ my_ptrdiff_t is signed, size_t is unsigned. Assert that the
+ conversion will work correctly.
+ */
+ DBUG_ASSERT(ptr - row_data >= 0);
+ return (static_cast<size_t>(ptr - row_data));
+}
+
+int THD::binlog_write_row(TABLE* table, bool is_trans,
+ MY_BITMAP const* cols, my_size_t colcnt,
+ byte const *record)
+{
+ DBUG_ASSERT(binlog_row_based && mysql_bin_log.is_open());
+
+ /*
+ Pack records into format for transfer. We are allocating more
+ memory than needed, but that doesn't matter.
+ */
+ bool error= 0;
+ byte *row_data= table->write_row_record;
+ my_size_t const max_len= max_row_length(table, record);
+
+ /*
+ * Allocate room for a row (if needed)
+ */
+ if (!row_data)
+ {
+ if (!table->s->blob_fields)
+ {
+ /* multiply max_len by 2 so it can be used for update_row as well */
+ table->write_row_record= alloc_root(&table->mem_root, 2*max_len);
+ if (!table->write_row_record)
+ return HA_ERR_OUT_OF_MEM;
+ row_data= table->write_row_record;
+ }
+ else if (unlikely(!(row_data= my_malloc(max_len, MYF(MY_WME)))))
+ return HA_ERR_OUT_OF_MEM;
+ }
+ my_size_t const len= pack_row(table, cols, row_data, record);
+
+ Rows_log_event* const
+ ev= binlog_prepare_pending_rows_event<Write_rows_log_event>
+ (table, server_id, cols, colcnt, len, is_trans);
+
+ /* add_row_data copies row_data to internal buffer */
+ error= likely(ev != 0) ? ev->add_row_data(row_data,len) : HA_ERR_OUT_OF_MEM ;
+
+ if (table->write_row_record == 0)
+ my_free(row_data, MYF(MY_WME));
+
+ return error;
+}
+
+int THD::binlog_update_row(TABLE* table, bool is_trans,
+ MY_BITMAP const* cols, my_size_t colcnt,
+ const byte *before_record,
+ const byte *after_record)
+{
+ DBUG_ASSERT(binlog_row_based && mysql_bin_log.is_open());
+
+ bool error= 0;
+ my_size_t const before_maxlen = max_row_length(table, before_record);
+ my_size_t const after_maxlen = max_row_length(table, after_record);
+
+ byte *row_data= table->write_row_record;
+ byte *before_row, *after_row;
+ if (row_data != 0)
+ {
+ before_row= row_data;
+ after_row= before_row + before_maxlen;
+ }
+ else
+ {
+ if (unlikely(!(row_data= my_multi_malloc(MYF(MY_WME),
+ &before_row, before_maxlen,
+ &after_row, after_maxlen,
+ NULL))))
+ return HA_ERR_OUT_OF_MEM;
+ }
+
+ my_size_t const before_size= pack_row(table, cols, before_row,
+ before_record);
+ my_size_t const after_size= pack_row(table, cols, after_row,
+ after_record);
+
+ Rows_log_event* const
+ ev= binlog_prepare_pending_rows_event<Update_rows_log_event>
+ (table, server_id, cols, colcnt, before_size + after_size, is_trans);
+
+ error= (unlikely(!ev)) || ev->add_row_data(before_row, before_size) ||
+ ev->add_row_data(after_row, after_size);
+
+ if (!table->write_row_record)
+ {
+ /* add_row_data copies row_data to internal buffer */
+ my_free(row_data, MYF(MY_WME));
+ }
+
+ return error;
+}
+
+int THD::binlog_delete_row(TABLE* table, bool is_trans,
+ MY_BITMAP const* cols, my_size_t colcnt,
+ byte const *record)
+{
+ DBUG_ASSERT(binlog_row_based && mysql_bin_log.is_open());
+
+ /*
+ Pack records into format for transfer. We are allocating more
+ memory than needed, but that doesn't matter.
+ */
+ bool error= 0;
+ my_size_t const max_len= max_row_length(table, record);
+ byte *row_data= table->write_row_record;
+ if (!row_data && unlikely(!(row_data= my_malloc(max_len, MYF(MY_WME)))))
+ return HA_ERR_OUT_OF_MEM;
+ my_size_t const len= pack_row(table, cols, row_data, record);
+
+ Rows_log_event* const
+ ev= binlog_prepare_pending_rows_event<Delete_rows_log_event>
+ (table, server_id, cols, colcnt, len, is_trans);
+
+ error= (unlikely(!ev)) || ev->add_row_data(row_data, len);
+
+ /* add_row_data copies row_data */
+ if (table->write_row_record == 0)
+ my_free(row_data, MYF(MY_WME));
+
+ return error;
+}
+
+
+int THD::binlog_flush_pending_rows_event(bool stmt_end)
+{
+ DBUG_ENTER("THD::binlog_flush_pending_rows_event");
+ if (!binlog_row_based || !mysql_bin_log.is_open())
+ DBUG_RETURN(0);
+
+ /*
+ Mark the event as the last event of a statement if the stmt_end
+ flag is set.
+ */
+ int error= 0;
+ if (Rows_log_event *pending= binlog_get_pending_rows_event())
+ {
+ if (stmt_end)
+ {
+ pending->set_flags(Rows_log_event::STMT_END_F);
+ pending->flags|= LOG_EVENT_UPDATE_TABLE_MAP_VERSION_F;
+ }
+
+ /*
+ We only bother to set the pending event if it is non-NULL. This
+ is essential for correctness, since there is not necessarily a
+ trx_data created for the thread if the pending event is NULL.
+ */
+ error= mysql_bin_log.flush_and_set_pending_rows_event(this, 0);
+ }
+
+ DBUG_RETURN(error);
+}
+
+
+void THD::binlog_delete_pending_rows_event()
+{
+ if (Rows_log_event *pending= binlog_get_pending_rows_event())
+ {
+ delete pending;
+ binlog_set_pending_rows_event(0);
+ }
+}
+
+#endif /* HAVE_ROW_BASED_REPLICATION */
+
+/*
+ Member function that will log query, either row-based or
+ statement-based depending on the value of the 'binlog_row_based'
+ variable and the value of the 'qtype' flag.
+
+ This function should be called after the all calls to ha_*_row()
+ functions have been issued, but before tables are unlocked and
+ closed.
+
+ RETURN VALUE
+ Error code, or 0 if no error.
+*/
+int THD::binlog_query(THD::enum_binlog_query_type qtype,
+ char const *query, ulong query_len,
+ bool is_trans, bool suppress_use)
+{
+ DBUG_ENTER("THD::binlog_query");
+ DBUG_ASSERT(query && mysql_bin_log.is_open());
+ int error= binlog_flush_pending_rows_event(true);
+ switch (qtype)
+ {
+ case THD::MYSQL_QUERY_TYPE:
+ /*
+ Using this query type is a conveniece hack, since we have been
+ moving back and forth between using RBR for replication of
+ system tables and not using it.
+
+ Make sure to change in check_table_binlog_row_based() according
+ to how you treat this.
+ */
+ case THD::ROW_QUERY_TYPE:
+ if (binlog_row_based)
+ DBUG_RETURN(binlog_flush_pending_rows_event(true));
+ /* Otherwise, we fall through */
+ case THD::STMT_QUERY_TYPE:
+ /*
+ Most callers of binlog_query() ignore the error code, assuming
+ that the statement will always be written to the binlog. In
+ case of error above, we therefore just continue and write the
+ statement to the binary log.
+ */
+ {
+ Query_log_event qinfo(this, query, query_len, is_trans, suppress_use);
+ qinfo.flags|= LOG_EVENT_UPDATE_TABLE_MAP_VERSION_F;
+ DBUG_RETURN(mysql_bin_log.write(&qinfo));
+ }
+ break;
+
+ case THD::QUERY_TYPE_COUNT:
+ default:
+ DBUG_ASSERT(0 <= qtype && qtype < QUERY_TYPE_COUNT);
+ }
+ DBUG_RETURN(0);
+}
+
+#endif /* !defined(MYSQL_CLIENT) */
diff --git a/sql/sql_class.h b/sql/sql_class.h
index 60dc9a4cbad..1ef3322bc8f 100644
--- a/sql/sql_class.h
+++ b/sql/sql_class.h
@@ -21,19 +21,20 @@
#pragma interface /* gcc class implementation */
#endif
-// TODO: create log.h and move all the log header stuff there
+#include "log.h"
+#include "rpl_rli.h"
+#include "rpl_tblmap.h"
class Query_log_event;
class Load_log_event;
class Slave_log_event;
-class Format_description_log_event;
class sp_rcontext;
class sp_cache;
+class Rows_log_event;
enum enum_enable_or_disable { LEAVE_AS_IS, ENABLE, DISABLE };
enum enum_ha_read_modes { RFIRST, RNEXT, RPREV, RLAST, RKEY, RNEXT_SAME };
enum enum_duplicates { DUP_ERROR, DUP_REPLACE, DUP_UPDATE };
-enum enum_log_type { LOG_CLOSED, LOG_TO_BE_OPENED, LOG_NORMAL, LOG_NEW, LOG_BIN};
enum enum_delay_key_write { DELAY_KEY_WRITE_NONE, DELAY_KEY_WRITE_ON,
DELAY_KEY_WRITE_ALL };
@@ -50,117 +51,6 @@ extern const char **errmesg;
#define TC_HEURISTIC_RECOVER_ROLLBACK 2
extern uint tc_heuristic_recover;
-/*
- Transaction Coordinator log - a base abstract class
- for two different implementations
-*/
-class TC_LOG
-{
- public:
- int using_heuristic_recover();
- TC_LOG() {}
- virtual ~TC_LOG() {}
-
- virtual int open(const char *opt_name)=0;
- virtual void close()=0;
- virtual int log(THD *thd, my_xid xid)=0;
- virtual void unlog(ulong cookie, my_xid xid)=0;
-};
-
-class TC_LOG_DUMMY: public TC_LOG // use it to disable the logging
-{
- public:
- int open(const char *opt_name) { return 0; }
- void close() { }
- int log(THD *thd, my_xid xid) { return 1; }
- void unlog(ulong cookie, my_xid xid) { }
-};
-
-#ifdef HAVE_MMAP
-class TC_LOG_MMAP: public TC_LOG
-{
- public: // only to keep Sun Forte on sol9x86 happy
- typedef enum {
- POOL, // page is in pool
- ERROR, // last sync failed
- DIRTY // new xids added since last sync
- } PAGE_STATE;
-
- private:
- typedef struct st_page {
- struct st_page *next; // page a linked in a fifo queue
- my_xid *start, *end; // usable area of a page
- my_xid *ptr; // next xid will be written here
- int size, free; // max and current number of free xid slots on the page
- int waiters; // number of waiters on condition
- PAGE_STATE state; // see above
- pthread_mutex_t lock; // to access page data or control structure
- pthread_cond_t cond; // to wait for a sync
- } PAGE;
-
- char logname[FN_REFLEN];
- File fd;
- my_off_t file_length;
- uint npages, inited;
- uchar *data;
- struct st_page *pages, *syncing, *active, *pool, *pool_last;
- /*
- note that, e.g. LOCK_active is only used to protect
- 'active' pointer, to protect the content of the active page
- one has to use active->lock.
- Same for LOCK_pool and LOCK_sync
- */
- pthread_mutex_t LOCK_active, LOCK_pool, LOCK_sync;
- pthread_cond_t COND_pool, COND_active;
-
- public:
- TC_LOG_MMAP(): inited(0) {}
- int open(const char *opt_name);
- void close();
- int log(THD *thd, my_xid xid);
- void unlog(ulong cookie, my_xid xid);
- int recover();
-
- private:
- void get_active_from_pool();
- int sync();
- int overflow();
-};
-#else
-#define TC_LOG_MMAP TC_LOG_DUMMY
-#endif
-
-extern TC_LOG *tc_log;
-extern TC_LOG_MMAP tc_log_mmap;
-extern TC_LOG_DUMMY tc_log_dummy;
-
-/* log info errors */
-#define LOG_INFO_EOF -1
-#define LOG_INFO_IO -2
-#define LOG_INFO_INVALID -3
-#define LOG_INFO_SEEK -4
-#define LOG_INFO_MEM -6
-#define LOG_INFO_FATAL -7
-#define LOG_INFO_IN_USE -8
-
-/* bitmap to SQL_LOG::close() */
-#define LOG_CLOSE_INDEX 1
-#define LOG_CLOSE_TO_BE_OPENED 2
-#define LOG_CLOSE_STOP_EVENT 4
-
-struct st_relay_log_info;
-
-typedef struct st_log_info
-{
- char log_file_name[FN_REFLEN];
- my_off_t index_file_offset, index_file_start_offset;
- my_off_t pos;
- bool fatal; // if the purge happens to give us a negative offset
- pthread_mutex_t lock;
- st_log_info():fatal(0) { pthread_mutex_init(&lock, MY_MUTEX_INIT_FAST);}
- ~st_log_info() { pthread_mutex_destroy(&lock);}
-} LOG_INFO;
-
typedef struct st_user_var_events
{
user_var_entry *user_var_event;
@@ -173,188 +63,6 @@ typedef struct st_user_var_events
#define RP_LOCK_LOG_IS_ALREADY_LOCKED 1
#define RP_FORCE_ROTATE 2
-class Log_event;
-
-/*
- TODO split MYSQL_LOG into base MYSQL_LOG and
- MYSQL_QUERY_LOG, MYSQL_SLOW_LOG, MYSQL_BIN_LOG
- most of the code from MYSQL_LOG should be in the MYSQL_BIN_LOG
- only (TC_LOG included)
-
- TODO use mmap instead of IO_CACHE for binlog
- (mmap+fsync is two times faster than write+fsync)
-*/
-
-class MYSQL_LOG: public TC_LOG
-{
- private:
- /* LOCK_log and LOCK_index are inited by init_pthread_objects() */
- pthread_mutex_t LOCK_log, LOCK_index;
- pthread_mutex_t LOCK_prep_xids;
- pthread_cond_t COND_prep_xids;
- pthread_cond_t update_cond;
- ulonglong bytes_written;
- time_t last_time,query_start;
- IO_CACHE log_file;
- IO_CACHE index_file;
- char *name;
- char time_buff[20],db[NAME_LEN+1];
- char log_file_name[FN_REFLEN],index_file_name[FN_REFLEN];
- /*
- The max size before rotation (usable only if log_type == LOG_BIN: binary
- logs and relay logs).
- For a binlog, max_size should be max_binlog_size.
- For a relay log, it should be max_relay_log_size if this is non-zero,
- max_binlog_size otherwise.
- max_size is set in init(), and dynamically changed (when one does SET
- GLOBAL MAX_BINLOG_SIZE|MAX_RELAY_LOG_SIZE) by fix_max_binlog_size and
- fix_max_relay_log_size).
- */
- ulong max_size;
- ulong prepared_xids; /* for tc log - number of xids to remember */
- volatile enum_log_type log_type;
- enum cache_type io_cache_type;
- // current file sequence number for load data infile binary logging
- uint file_id;
- uint open_count; // For replication
- int readers_count;
- bool write_error, inited;
- bool need_start_event;
- /*
- no_auto_events means we don't want any of these automatic events :
- Start/Rotate/Stop. That is, in 4.x when we rotate a relay log, we don't
- want a Rotate_log event to be written to the relay log. When we start a
- relay log etc. So in 4.x this is 1 for relay logs, 0 for binlogs.
- In 5.0 it's 0 for relay logs too!
- */
- bool no_auto_events;
- friend class Log_event;
-
-public:
- /*
- These describe the log's format. This is used only for relay logs.
- _for_exec is used by the SQL thread, _for_queue by the I/O thread. It's
- necessary to have 2 distinct objects, because the I/O thread may be reading
- events in a different format from what the SQL thread is reading (consider
- the case of a master which has been upgraded from 5.0 to 5.1 without doing
- RESET MASTER, or from 4.x to 5.0).
- */
- Format_description_log_event *description_event_for_exec,
- *description_event_for_queue;
-
- MYSQL_LOG();
- /*
- note that there's no destructor ~MYSQL_LOG() !
- The reason is that we don't want it to be automatically called
- on exit() - but only during the correct shutdown process
- */
-
- int open(const char *opt_name);
- void close();
- int log(THD *thd, my_xid xid);
- void unlog(ulong cookie, my_xid xid);
- int recover(IO_CACHE *log, Format_description_log_event *fdle);
- void reset_bytes_written()
- {
- bytes_written = 0;
- }
- void harvest_bytes_written(ulonglong* counter)
- {
-#ifndef DBUG_OFF
- char buf1[22],buf2[22];
-#endif
- DBUG_ENTER("harvest_bytes_written");
- (*counter)+=bytes_written;
- DBUG_PRINT("info",("counter: %s bytes_written: %s", llstr(*counter,buf1),
- llstr(bytes_written,buf2)));
- bytes_written=0;
- DBUG_VOID_RETURN;
- }
- void set_max_size(ulong max_size_arg);
- void signal_update();
- void wait_for_update(THD* thd, bool master_or_slave);
- void set_need_start_event() { need_start_event = 1; }
- void init(enum_log_type log_type_arg,
- enum cache_type io_cache_type_arg,
- bool no_auto_events_arg, ulong max_size);
- void init_pthread_objects();
- void cleanup();
- bool open(const char *log_name,
- enum_log_type log_type,
- const char *new_name,
- enum cache_type io_cache_type_arg,
- bool no_auto_events_arg, ulong max_size,
- bool null_created);
- const char *generate_name(const char *log_name, const char *suffix,
- bool strip_ext, char *buff);
- /* simplified open_xxx wrappers for the gigantic open above */
- bool open_query_log(const char *log_name)
- {
- char buf[FN_REFLEN];
- return open(generate_name(log_name, ".log", 0, buf),
- LOG_NORMAL, 0, WRITE_CACHE, 0, 0, 0);
- }
- bool open_slow_log(const char *log_name)
- {
- char buf[FN_REFLEN];
- return open(generate_name(log_name, "-slow.log", 0, buf),
- LOG_NORMAL, 0, WRITE_CACHE, 0, 0, 0);
- }
- bool open_index_file(const char *index_file_name_arg,
- const char *log_name);
- void new_file(bool need_lock);
- bool write(THD *thd, enum enum_server_command command,
- const char *format,...);
- bool write(THD *thd, const char *query, uint query_length,
- time_t query_start=0);
- bool write(Log_event* event_info); // binary log write
- bool write(THD *thd, IO_CACHE *cache, Log_event *commit_event);
-
- void start_union_events(THD *thd);
- void stop_union_events(THD *thd);
- bool is_query_in_union(THD *thd, query_id_t query_id_param);
-
- /*
- v stands for vector
- invoked as appendv(buf1,len1,buf2,len2,...,bufn,lenn,0)
- */
- bool appendv(const char* buf,uint len,...);
- bool append(Log_event* ev);
-
- int generate_new_name(char *new_name,const char *old_name);
- void make_log_name(char* buf, const char* log_ident);
- bool is_active(const char* log_file_name);
- int update_log_index(LOG_INFO* linfo, bool need_update_threads);
- void rotate_and_purge(uint flags);
- bool flush_and_sync();
- int purge_logs(const char *to_log, bool included,
- bool need_mutex, bool need_update_threads,
- ulonglong *decrease_log_space);
- int purge_logs_before_date(time_t purge_time);
- int purge_first_log(struct st_relay_log_info* rli, bool included);
- bool reset_logs(THD* thd);
- void close(uint exiting);
-
- // iterating through the log index file
- int find_log_pos(LOG_INFO* linfo, const char* log_name,
- bool need_mutex);
- int find_next_log(LOG_INFO* linfo, bool need_mutex);
- int get_current_log(LOG_INFO* linfo);
- uint next_file_id();
- inline bool is_open() { return log_type != LOG_CLOSED; }
- inline char* get_index_fname() { return index_file_name;}
- inline char* get_log_fname() { return log_file_name; }
- inline char* get_name() { return name; }
- inline pthread_mutex_t* get_log_lock() { return &LOCK_log; }
- inline IO_CACHE* get_log_file() { return &log_file; }
-
- inline void lock_index() { pthread_mutex_lock(&LOCK_index);}
- inline void unlock_index() { pthread_mutex_unlock(&LOCK_index);}
- inline IO_CACHE *get_index_file() { return &index_file;}
- inline uint32 get_open_count() { return open_count; }
-};
-
-
typedef struct st_copy_info {
ha_rows records;
ha_rows deleted;
@@ -461,28 +169,6 @@ public:
#include "sql_lex.h" /* Must be here */
-/* Needed to be able to have an I_List of char* strings in mysqld.cc. */
-
-class i_string: public ilink
-{
-public:
- const char* ptr;
- i_string():ptr(0) { }
- i_string(const char* s) : ptr(s) {}
-};
-
-/* needed for linked list of two strings for replicate-rewrite-db */
-class i_string_pair: public ilink
-{
-public:
- const char* key;
- const char* val;
- i_string_pair():key(0),val(0) { }
- i_string_pair(const char* key_arg, const char* val_arg) :
- key(key_arg),val(val_arg) {}
-};
-
-
class delayed_insert;
class select_result;
@@ -1102,6 +788,9 @@ class THD :public Statement,
public Open_tables_state
{
public:
+ /* Used to execute base64 coded binlog events in MySQL server */
+ RELAY_LOG_INFO* rli_fake;
+
/*
Constant for THD::where initialization in the beginning of every query.
@@ -1206,12 +895,96 @@ public:
/* container for handler's private per-connection data */
void *ha_data[MAX_HA];
+
+#ifdef HAVE_ROW_BASED_REPLICATION
+#ifndef MYSQL_CLIENT
+
+ /*
+ Public interface to write rows to the binlog
+ */
+ int binlog_write_row(TABLE* table, bool is_transactional,
+ MY_BITMAP const* cols, my_size_t colcnt,
+ const byte *buf);
+ int binlog_delete_row(TABLE* table, bool is_transactional,
+ MY_BITMAP const* cols, my_size_t colcnt,
+ const byte *buf);
+ int binlog_update_row(TABLE* table, bool is_transactional,
+ MY_BITMAP const* cols, my_size_t colcnt,
+ const byte *old_data, const byte *new_data);
+
+ void set_server_id(uint32 sid) { server_id = sid; }
+
+ /*
+ Member functions to handle pending event for row-level logging.
+ */
+ template <class RowsEventT> Rows_log_event*
+ binlog_prepare_pending_rows_event(TABLE* table, uint32 serv_id,
+ MY_BITMAP const* cols,
+ my_size_t colcnt,
+ my_size_t needed,
+ bool is_transactional);
+ Rows_log_event* binlog_get_pending_rows_event() const;
+ void binlog_set_pending_rows_event(Rows_log_event* ev);
+ int binlog_setup_trx_data();
+
+ my_size_t max_row_length_blob(TABLE* table, const byte *data) const;
+ my_size_t max_row_length(TABLE* table, const byte *data) const
+ {
+ TABLE_SHARE *table_s= table->s;
+ my_size_t length= table_s->reclength + 2 * table_s->fields;
+ if (table_s->blob_fields == 0)
+ return length;
+
+ return (length+max_row_length_blob(table,data));
+ }
+
+ my_size_t pack_row(TABLE* table, MY_BITMAP const* cols, byte *row_data,
+ const byte *data) const;
+
+ int binlog_flush_pending_rows_event(bool stmt_end);
+ void binlog_delete_pending_rows_event();
+
+#endif
+#endif /* HAVE_ROW_BASED_REPLICATION */
+#ifndef MYSQL_CLIENT
+ enum enum_binlog_query_type {
+ /*
+ The query can be logged row-based or statement-based
+ */
+ ROW_QUERY_TYPE,
+
+ /*
+ The query has to be logged statement-based
+ */
+ STMT_QUERY_TYPE,
+
+ /*
+ The query represents a change to a table in the "mysql"
+ database and is currently mapped to ROW_QUERY_TYPE.
+ */
+ MYSQL_QUERY_TYPE,
+ QUERY_TYPE_COUNT
+ };
+
+ int binlog_query(enum_binlog_query_type qtype,
+ char const *query, ulong query_len,
+ bool is_trans, bool suppress_use);
+#endif
+
+public:
+
struct st_transactions {
SAVEPOINT *savepoints;
THD_TRANS all; // Trans since BEGIN WORK
THD_TRANS stmt; // Trans for current statement
bool on; // see ha_enable_transaction()
+ XID xid; // transaction identifier
+ enum xa_states xa_state; // used by external XA only
XID_STATE xid_state;
+#ifdef HAVE_ROW_BASED_REPLICATION
+ Rows_log_event *m_pending_rows_event;
+#endif
+
/*
Tables changed in transaction (that must be invalidated in query cache).
List contain only transactional tables, that not invalidated in query
@@ -1768,6 +1541,7 @@ class select_create: public select_insert {
HA_CREATE_INFO *create_info;
MYSQL_LOCK *lock;
Field **field;
+ bool create_table_written;
public:
select_create (TABLE_LIST *table,
HA_CREATE_INFO *create_info_par,
@@ -1776,9 +1550,11 @@ public:
List<Item> &select_fields,enum_duplicates duplic, bool ignore)
:select_insert (NULL, NULL, &select_fields, 0, 0, duplic, ignore), create_table(table),
extra_fields(&fields_par),keys(&keys_par), create_info(create_info_par),
- lock(0)
+ lock(0), create_table_written(FALSE)
{}
int prepare(List<Item> &list, SELECT_LEX_UNIT *u);
+
+ void binlog_show_create_table();
void store_values(List<Item> &values);
void send_error(uint errcode,const char *err);
bool send_eof();
diff --git a/sql/sql_delete.cc b/sql/sql_delete.cc
index a9c3504250e..745139924ab 100644
--- a/sql/sql_delete.cc
+++ b/sql/sql_delete.cc
@@ -40,6 +40,7 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds,
ha_rows deleted;
uint usable_index= MAX_KEY;
SELECT_LEX *select_lex= &thd->lex->select_lex;
+ bool ha_delete_row_bypassed= 0;
DBUG_ENTER("mysql_delete");
if (open_and_lock_tables(thd, table_list))
@@ -77,15 +78,18 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds,
!(specialflag & (SPECIAL_NO_NEW_FUNC | SPECIAL_SAFE_MODE)) &&
!(table->triggers && table->triggers->has_delete_triggers()))
{
- deleted= table->file->records;
+ ha_rows const maybe_deleted= table->file->records;
if (!(error=table->file->delete_all_rows()))
{
error= -1; // ok
+ deleted= maybe_deleted;
+ ha_delete_row_bypassed= 1;
goto cleanup;
}
if (error != HA_ERR_WRONG_COMMAND)
{
table->file->print_error(error,MYF(0));
+ ha_delete_row_bypassed= 1;
error=0;
goto cleanup;
}
@@ -211,7 +215,7 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds,
break;
}
- if (!(error= table->file->delete_row(table->record[0])))
+ if (!(error= table->file->ha_delete_row(table->record[0])))
{
deleted++;
if (table->triggers &&
@@ -293,10 +297,24 @@ cleanup:
{
if (error < 0)
thd->clear_error();
- Query_log_event qinfo(thd, thd->query, thd->query_length,
- transactional_table, FALSE);
- if (mysql_bin_log.write(&qinfo) && transactional_table)
+
+ /*
+ If 'handler::delete_all_rows()' was called, we replicate
+ statement-based; otherwise, 'ha_delete_row()' was used to
+ delete specific rows which we might log row-based.
+ */
+ THD::enum_binlog_query_type const
+ query_type(ha_delete_row_bypassed ?
+ THD::STMT_QUERY_TYPE :
+ THD::ROW_QUERY_TYPE);
+ int log_result= thd->binlog_query(query_type,
+ thd->query, thd->query_length,
+ transactional_table, FALSE);
+
+ if (log_result && transactional_table)
+ {
error=1;
+ }
}
if (!transactional_table)
thd->options|=OPTION_STATUS_NO_TRANS_UPDATE;
@@ -592,7 +610,7 @@ bool multi_delete::send_data(List<Item> &values)
TRG_ACTION_BEFORE, FALSE))
DBUG_RETURN(1);
table->status|= STATUS_DELETED;
- if (!(error=table->file->delete_row(table->record[0])))
+ if (!(error=table->file->ha_delete_row(table->record[0])))
{
deleted++;
if (table->triggers &&
@@ -705,7 +723,7 @@ int multi_delete::do_deletes()
local_error= 1;
break;
}
- if ((local_error=table->file->delete_row(table->record[0])))
+ if ((local_error=table->file->ha_delete_row(table->record[0])))
{
table->file->print_error(local_error,MYF(0));
break;
@@ -772,10 +790,13 @@ bool multi_delete::send_eof()
{
if (local_error == 0)
thd->clear_error();
- Query_log_event qinfo(thd, thd->query, thd->query_length,
- transactional_tables, FALSE);
- if (mysql_bin_log.write(&qinfo) && !normal_tables)
+ if (thd->binlog_query(THD::ROW_QUERY_TYPE,
+ thd->query, thd->query_length,
+ transactional_tables, FALSE) &&
+ !normal_tables)
+ {
local_error=1; // Log write failed: roll back the SQL statement
+ }
}
if (!transactional_tables)
thd->options|=OPTION_STATUS_NO_TRANS_UPDATE;
@@ -880,10 +901,13 @@ end:
{
if (mysql_bin_log.is_open())
{
+ /*
+ TRUNCATE must always be statement-based binlogged (not row-based) so
+ we don't test binlog_row_based.
+ */
thd->clear_error();
- Query_log_event qinfo(thd, thd->query, thd->query_length,
- 0, FALSE);
- mysql_bin_log.write(&qinfo);
+ thd->binlog_query(THD::STMT_QUERY_TYPE,
+ thd->query, thd->query_length, FALSE, FALSE);
}
send_ok(thd); // This should return record count
}
diff --git a/sql/sql_insert.cc b/sql/sql_insert.cc
index 72a2f4a4f91..e053f06df55 100644
--- a/sql/sql_insert.cc
+++ b/sql/sql_insert.cc
@@ -21,6 +21,7 @@
#include "sp_head.h"
#include "sql_trigger.h"
#include "sql_select.h"
+#include "sql_show.h"
static int check_null_fields(THD *thd,TABLE *entry);
#ifndef EMBEDDED_LIBRARY
@@ -576,10 +577,13 @@ bool mysql_insert(THD *thd,TABLE_LIST *table_list,
{
if (error <= 0)
thd->clear_error();
- Query_log_event qinfo(thd, thd->query, thd->query_length,
- transactional_table, FALSE);
- if (mysql_bin_log.write(&qinfo) && transactional_table)
- error=1;
+ if (thd->binlog_query(THD::ROW_QUERY_TYPE,
+ thd->query, thd->query_length,
+ transactional_table, FALSE) &&
+ transactional_table)
+ {
+ error=1;
+ }
}
if (!transactional_table)
thd->options|=OPTION_STATUS_NO_TRANS_UPDATE;
@@ -945,10 +949,11 @@ int write_record(THD *thd, TABLE *table,COPY_INFO *info)
DBUG_ENTER("write_record");
info->records++;
+
if (info->handle_duplicates == DUP_REPLACE ||
info->handle_duplicates == DUP_UPDATE)
{
- while ((error=table->file->write_row(table->record[0])))
+ while ((error=table->file->ha_write_row(table->record[0])))
{
uint key_nr;
if (error != HA_WRITE_SKIP)
@@ -1032,7 +1037,7 @@ int write_record(THD *thd, TABLE *table,COPY_INFO *info)
thd->clear_next_insert_id= 0;
thd->next_insert_id= 0;
}
- if ((error=table->file->update_row(table->record[1],table->record[0])))
+ if ((error=table->file->ha_update_row(table->record[1],table->record[0])))
{
if ((error == HA_ERR_FOUND_DUPP_KEY) && info->ignore)
goto ok_or_after_trg_err;
@@ -1071,8 +1076,8 @@ int write_record(THD *thd, TABLE *table,COPY_INFO *info)
thd->clear_next_insert_id= 0;
thd->next_insert_id= 0;
}
- if ((error=table->file->update_row(table->record[1],
- table->record[0])))
+ if ((error=table->file->ha_update_row(table->record[1],
+ table->record[0])))
goto err;
info->deleted++;
trg_error= (table->triggers &&
@@ -1089,7 +1094,7 @@ int write_record(THD *thd, TABLE *table,COPY_INFO *info)
table->triggers->process_triggers(thd, TRG_EVENT_DELETE,
TRG_ACTION_BEFORE, TRUE))
goto before_trg_err;
- if ((error=table->file->delete_row(table->record[1])))
+ if ((error=table->file->ha_delete_row(table->record[1])))
goto err;
info->deleted++;
if (!table->file->has_transactions())
@@ -1110,7 +1115,7 @@ int write_record(THD *thd, TABLE *table,COPY_INFO *info)
table->triggers->process_triggers(thd, TRG_EVENT_INSERT,
TRG_ACTION_AFTER, TRUE));
}
- else if ((error=table->file->write_row(table->record[0])))
+ else if ((error=table->file->ha_write_row(table->record[0])))
{
if (!info->ignore ||
(error != HA_ERR_FOUND_DUPP_KEY && error != HA_ERR_FOUND_DUPP_UNIQUE))
@@ -1196,16 +1201,15 @@ int check_that_all_fields_are_given_values(THD *thd, TABLE *entry,
class delayed_row :public ilink {
public:
- char *record,*query;
+ char *record;
enum_duplicates dup;
time_t start_time;
bool query_start_used,last_insert_id_used,insert_id_used, ignore, log_query;
ulonglong last_insert_id;
timestamp_auto_set_type timestamp_field_type;
- uint query_length;
delayed_row(enum_duplicates dup_arg, bool ignore_arg, bool log_query_arg)
- :record(0), query(0), dup(dup_arg), ignore(ignore_arg), log_query(log_query_arg) {}
+ :record(0), dup(dup_arg), ignore(ignore_arg), log_query(log_query_arg) {}
~delayed_row()
{
x_free(record);
@@ -1215,6 +1219,9 @@ public:
class delayed_insert :public ilink {
uint locks_in_memory;
+ char *query;
+ ulong query_length;
+ ulong query_allocated;
public:
THD thd;
TABLE *table;
@@ -1228,7 +1235,7 @@ public:
TABLE_LIST table_list; // Argument
delayed_insert()
- :locks_in_memory(0),
+ :locks_in_memory(0), query(0), query_length(0), query_allocated(0),
table(0),tables_in_use(0),stacked_inserts(0), status(0), dead(0),
group_count(0)
{
@@ -1254,6 +1261,7 @@ public:
}
~delayed_insert()
{
+ my_free(query, MYF(MY_WME|MY_ALLOW_ZERO_PTR));
/* The following is not really needed, but just for safety */
delayed_row *row;
while ((row=rows.get()))
@@ -1273,6 +1281,25 @@ public:
VOID(pthread_cond_broadcast(&COND_thread_count)); /* Tell main we are ready */
}
+ int set_query(char const *q, ulong qlen) {
+ if (q && qlen > 0)
+ {
+ if (query_allocated < qlen + 1)
+ {
+ ulong const flags(MY_WME|MY_FREE_ON_ERROR|MY_ALLOW_ZERO_PTR);
+ query= my_realloc(query, qlen + 1, MYF(flags));
+ if (query == 0)
+ return HA_ERR_OUT_OF_MEM;
+ query_allocated= qlen;
+ }
+ query_length= qlen;
+ memcpy(query, q, qlen + 1);
+ }
+ else
+ query_length= 0;
+ return 0;
+ }
+
/* The following is for checking when we can delete ourselves */
inline void lock()
{
@@ -1562,18 +1589,22 @@ static int write_delayed(THD *thd,TABLE *table,enum_duplicates duplic,
if (thd->killed || !(row= new delayed_row(duplic, ignore, log_on)))
goto err;
+#if 0
if (!query)
query_length=0;
- if (!(row->record= (char*) my_malloc(table->s->reclength+query_length+1,
- MYF(MY_WME))))
+#endif
+ if (!(row->record= (char*) my_malloc(table->s->reclength, MYF(MY_WME))))
goto err;
memcpy(row->record, table->record[0], table->s->reclength);
+ di->set_query(query, query_length);
+#if 0
if (query_length)
{
row->query= row->record+table->s->reclength;
memcpy(row->query,query,query_length+1);
}
row->query_length= query_length;
+#endif
row->start_time= thd->start_time;
row->query_start_used= thd->query_start_used;
row->last_insert_id_used= thd->last_insert_id_used;
@@ -1897,7 +1928,21 @@ bool delayed_insert::handle_inserts(void)
{
int error;
ulong max_rows;
- bool using_ignore=0, using_bin_log=mysql_bin_log.is_open();
+ bool using_ignore=0,
+ using_bin_log= mysql_bin_log.is_open();
+
+#if 0
+ /*
+ The actual text for the query is added to the first row in the
+ list. Since the row is destroyed, with all it's memory, we need
+ to take a copy of it to be able to log it after all rows have been
+ applied.
+ */
+ uint const query_length= rows.head()->query_length;
+ char *const query= static_cast<char*>(my_alloca(query_length+1));
+ memcpy(query, rows.head()->query, query_length);
+#endif
+
delayed_row *row;
DBUG_ENTER("handle_inserts");
@@ -1963,11 +2008,6 @@ bool delayed_insert::handle_inserts(void)
using_ignore=0;
table->file->extra(HA_EXTRA_NO_IGNORE_DUP_KEY);
}
- if (row->query && row->log_query && using_bin_log)
- {
- Query_log_event qinfo(&thd, row->query, row->query_length, 0, FALSE);
- mysql_bin_log.write(&qinfo);
- }
if (table->s->blob_fields)
free_delayed_insert_blobs(table);
thread_safe_sub(delayed_rows_in_use,1,&LOCK_delayed_status);
@@ -1982,8 +2022,7 @@ bool delayed_insert::handle_inserts(void)
on this table until all entries has been processed
*/
if (group_count++ >= max_rows && (row= rows.head()) &&
- (!(row->log_query & using_bin_log) ||
- row->query))
+ (!(row->log_query & using_bin_log)))
{
group_count=0;
if (stacked_inserts || tables_in_use) // Let these wait a while
@@ -2019,6 +2058,10 @@ bool delayed_insert::handle_inserts(void)
thd.proc_info=0;
table->next_number_field=0;
pthread_mutex_unlock(&mutex);
+
+ /* After releasing the mutex, to prevent deadlocks. */
+ thd.binlog_query(THD::ROW_QUERY_TYPE, query, query_length, FALSE, FALSE);
+
if ((error=table->file->extra(HA_EXTRA_NO_CACHE)))
{ // This shouldn't happen
table->file->print_error(error,MYF(0));
@@ -2216,6 +2259,16 @@ select_insert::prepare(List<Item> &values, SELECT_LEX_UNIT *u)
check_that_all_fields_are_given_values(thd, table, table_list)) ||
table_list->prepare_where(thd, 0, TRUE) ||
table_list->prepare_check_option(thd));
+
+ /*
+ For non-transactional non-temporary tables, we set the
+ OPTION_STATUS_NO_TRANS_UPDATE flag here. The send_eof() function
+ is used by both the select_insert and the select_create classes,
+ so setting it there would clash.
+ */
+ if (!(table->file->has_transactions() || table->s->tmp_table))
+ thd->options|=OPTION_STATUS_NO_TRANS_UPDATE;
+
DBUG_RETURN(res);
}
@@ -2345,9 +2398,31 @@ void select_insert::send_error(uint errcode,const char *err)
table->file->end_bulk_insert();
/*
If at least one row has been inserted/modified and will stay in the table
- (the table doesn't have transactions) (example: we got a duplicate key
- error while inserting into a MyISAM table) we must write to the binlog (and
+ (the table doesn't have transactions) we must write to the binlog (and
the error code will make the slave stop).
+
+ For many errors (example: we got a duplicate key error while
+ inserting into a MyISAM table), no row will be added to the table,
+ so passing the error to the slave will not help since there will
+ be an error code mismatch (the inserts will succeed on the slave
+ with no error).
+
+ If we are using row-based replication we have two cases where this
+ code is executed: replication of CREATE-SELECT and replication of
+ INSERT-SELECT.
+
+ When replicating a CREATE-SELECT statement, we shall not write the
+ events to the binary log. To prevent the ha_rollback_stmt() below
+ from writing to the binary log, we have to pretend that the table
+ is transactional, even if it actually is not. Therefore, the
+ OPTION_STATUS_NO_TRANS_UPDATE is cleared in
+ select_create::prepare() and will remain cleared here.
+
+ When replicating INSERT-SELECT, we shall not write the events to
+ the binary log for transactional table, but shall write all events
+ if there is one or more writes to non-transactional tables. In
+ this case, the OPTION_STATUS_NO_TRANS_UPDATE is set if there is a
+ write to a non-transactional table, otherwise it is cleared.
*/
if ((info.copied || info.deleted || info.updated) &&
!table->file->has_transactions())
@@ -2356,11 +2431,10 @@ void select_insert::send_error(uint errcode,const char *err)
thd->insert_id(last_insert_id); // For binary log
if (mysql_bin_log.is_open())
{
- Query_log_event qinfo(thd, thd->query, thd->query_length,
- table->file->has_transactions(), FALSE);
- mysql_bin_log.write(&qinfo);
+ thd->binlog_query(THD::ROW_QUERY_TYPE, thd->query, thd->query_length,
+ table->file->has_transactions(), FALSE);
}
- if (!table->s->tmp_table)
+ if (!binlog_row_based && !table->s->tmp_table)
thd->options|=OPTION_STATUS_NO_TRANS_UPDATE;
}
if (info.copied || info.deleted || info.updated)
@@ -2382,26 +2456,36 @@ bool select_insert::send_eof()
/*
We must invalidate the table in the query cache before binlog writing
- and ha_autocommit_or_rollback
- */
+ and ha_autocommit_or_rollback.
+
+ If nothing was inserted in the table, there is no need to emit a
+ ROLLBACK statement to the binary log, so in that case we clear
+ OPTION_STATUS_NO_TRANS_UPDATE.
+ Observe that select_insert::send_eof() is used by both
+ select_insert and select_create and that they set the flag in
+ different manners. See Note 1 below for more info.
+ */
if (info.copied || info.deleted || info.updated)
- {
query_cache_invalidate3(thd, table, 1);
- if (!(table->file->has_transactions() || table->s->tmp_table))
- thd->options|=OPTION_STATUS_NO_TRANS_UPDATE;
- }
+ else
+ thd->options&= ~OPTION_STATUS_NO_TRANS_UPDATE;
if (last_insert_id)
thd->insert_id(last_insert_id); // For binary log
- /* Write to binlog before commiting transaction */
+ /*
+ Write to binlog before commiting transaction. No statement will
+ be written by the binlog_query() below in RBR mode. All the
+ events are in the transaction cache and will be written when
+ ha_autocommit_or_rollback() is issued below.
+ */
if (mysql_bin_log.is_open())
{
if (!error)
thd->clear_error();
- Query_log_event qinfo(thd, thd->query, thd->query_length,
- table->file->has_transactions(), FALSE);
- mysql_bin_log.write(&qinfo);
+ thd->binlog_query(THD::ROW_QUERY_TYPE,
+ thd->query, thd->query_length,
+ table->file->has_transactions(), FALSE);
}
if ((error2=ha_autocommit_or_rollback(thd,error)) && ! error)
error=error2;
@@ -2467,8 +2551,62 @@ select_create::prepare(List<Item> &values, SELECT_LEX_UNIT *u)
}
+void
+select_create::binlog_show_create_table()
+{
+ /*
+ Note 1: In RBR mode, we generate a CREATE TABLE statement for the
+ created table by calling store_create_info() (behaves as SHOW
+ CREATE TABLE). In the event of an error, nothing should be
+ written to the binary log, even if the table is non-transactional;
+ therefore we pretend that the generated CREATE TABLE statement is
+ for a transactional table. The event will then be put in the
+ transaction cache, and any subsequent events (e.g., table-map
+ events and binrow events) will also be put there. We can then use
+ ha_autocommit_or_rollback() to either throw away the entire
+ kaboodle of events, or write them to the binary log.
+
+ We write the CREATE TABLE statement here and not in prepare()
+ since there potentially are sub-selects or accesses to information
+ schema that will do a close_thread_tables(), destroying the
+ statement transaction cache.
+
+ To ensure that the event kaboodle is not written to the binary log
+ on rollback, we clear the OPTION_STATUS_NO_TRANS_UPDATE bit of
+ thd->options.
+ */
+ DBUG_ASSERT(binlog_row_based && !create_table_written);
+
+ thd->options&= ~OPTION_STATUS_NO_TRANS_UPDATE;
+ char buf[2048];
+ String query(buf, sizeof(buf), system_charset_info);
+ query.length(0); // Have to zero it since constructor doesn't
+
+ TABLE_LIST tables;
+ memset(&tables, 0, sizeof(tables));
+ tables.table = table;
+
+ int result= store_create_info(thd, &tables, &query, create_info);
+ DBUG_ASSERT(result == 0); /* store_create_info() always return 0 */
+ thd->binlog_query(THD::STMT_QUERY_TYPE,
+ query.ptr(), query.length(),
+ /* is_trans */ TRUE,
+ /* suppress_use */ FALSE);
+}
+
+
void select_create::store_values(List<Item> &values)
{
+ /*
+ Before writing the first row, we write the CREATE TABLE statement
+ to the binlog.
+ */
+ if (binlog_row_based && !create_table_written)
+ {
+ binlog_show_create_table();
+ create_table_written= TRUE;
+ }
+
fill_record_n_invoke_before_triggers(thd, field, values, 1,
table->triggers, TRG_EVENT_INSERT);
}
@@ -2488,6 +2626,16 @@ void select_create::send_error(uint errcode,const char *err)
bool select_create::send_eof()
{
+ /*
+ If no rows where written to the binary log, we write the CREATE
+ TABLE statement to the binlog.
+ */
+ if (binlog_row_based && !create_table_written)
+ {
+ binlog_show_create_table();
+ create_table_written= TRUE;
+ }
+
bool tmp=select_insert::send_eof();
if (tmp)
abort();
diff --git a/sql/sql_lex.h b/sql/sql_lex.h
index 303245b38bd..00ba075e922 100644
--- a/sql/sql_lex.h
+++ b/sql/sql_lex.h
@@ -93,7 +93,8 @@ enum enum_sql_command {
SQLCOM_XA_COMMIT, SQLCOM_XA_ROLLBACK, SQLCOM_XA_RECOVER,
SQLCOM_SHOW_PROC_CODE, SQLCOM_SHOW_FUNC_CODE,
SQLCOM_INSTALL_PLUGIN, SQLCOM_UNINSTALL_PLUGIN,
- SQLCOM_SHOW_AUTHORS, SQLCOM_SHOW_PLUGINS,
+ SQLCOM_SHOW_AUTHORS, SQLCOM_BINLOG_BASE64_EVENT,
+ SQLCOM_SHOW_PLUGINS,
/* This should be the last !!! */
SQLCOM_END
diff --git a/sql/sql_list.h b/sql/sql_list.h
index b2bcc4ea401..05f589a2c23 100644
--- a/sql/sql_list.h
+++ b/sql/sql_list.h
@@ -441,6 +441,28 @@ struct ilink
};
+/* Needed to be able to have an I_List of char* strings in mysqld.cc. */
+
+class i_string: public ilink
+{
+public:
+ const char* ptr;
+ i_string():ptr(0) { }
+ i_string(const char* s) : ptr(s) {}
+};
+
+/* needed for linked list of two strings for replicate-rewrite-db */
+class i_string_pair: public ilink
+{
+public:
+ const char* key;
+ const char* val;
+ i_string_pair():key(0),val(0) { }
+ i_string_pair(const char* key_arg, const char* val_arg) :
+ key(key_arg),val(val_arg) {}
+};
+
+
template <class T> class I_List_iterator;
/*
diff --git a/sql/sql_load.cc b/sql/sql_load.cc
index 09bcb9cb9fe..70abe3e659c 100644
--- a/sql/sql_load.cc
+++ b/sql/sql_load.cc
@@ -414,38 +414,55 @@ bool mysql_load(THD *thd,sql_exchange *ex,TABLE_LIST *table_list,
#ifndef EMBEDDED_LIBRARY
if (mysql_bin_log.is_open())
{
+#ifdef HAVE_ROW_BASED_REPLICATION
/*
- Make sure last block (the one which caused the error) gets logged.
- This is needed because otherwise after write of
- (to the binlog, not to read_info (which is a cache))
- Delete_file_log_event the bad block will remain in read_info (because
- pre_read is not called at the end of the last block; remember pre_read
- is called whenever a new block is read from disk).
- At the end of mysql_load(), the destructor of read_info will call
- end_io_cache() which will flush read_info, so we will finally have
- this in the binlog:
- Append_block # The last successfull block
- Delete_file
- Append_block # The failing block
- which is nonsense.
- Or could also be (for a small file)
- Create_file # The failing block
- which is nonsense (Delete_file is not written in this case, because:
- Create_file has not been written, so Delete_file is not written, then
- when read_info is destroyed end_io_cache() is called which writes
- Create_file.
+ We need to do the job that is normally done inside
+ binlog_query() here, which is to ensure that the pending event
+ is written before tables are unlocked and before any other
+ events are written. We also need to update the table map
+ version for the binary log to mark that table maps are invalid
+ after this point.
*/
- read_info.end_io_cache();
- /* If the file was not empty, wrote_create_file is true */
- if (lf_info.wrote_create_file)
+ if (binlog_row_based)
+ thd->binlog_flush_pending_rows_event(true);
+ else
+#endif
{
- if ((info.copied || info.deleted) && !transactional_table)
- write_execute_load_query_log_event(thd, handle_duplicates,
- ignore, transactional_table);
- else
+ /*
+ Make sure last block (the one which caused the error) gets
+ logged. This is needed because otherwise after write of (to
+ the binlog, not to read_info (which is a cache))
+ Delete_file_log_event the bad block will remain in read_info
+ (because pre_read is not called at the end of the last
+ block; remember pre_read is called whenever a new block is
+ read from disk). At the end of mysql_load(), the destructor
+ of read_info will call end_io_cache() which will flush
+ read_info, so we will finally have this in the binlog:
+
+ Append_block # The last successfull block
+ Delete_file
+ Append_block # The failing block
+ which is nonsense.
+ Or could also be (for a small file)
+ Create_file # The failing block
+ which is nonsense (Delete_file is not written in this case, because:
+ Create_file has not been written, so Delete_file is not written, then
+ when read_info is destroyed end_io_cache() is called which writes
+ Create_file.
+ */
+ read_info.end_io_cache();
+ /* If the file was not empty, wrote_create_file is true */
+ if (lf_info.wrote_create_file)
{
- Delete_file_log_event d(thd, db, transactional_table);
- mysql_bin_log.write(&d);
+ if ((info.copied || info.deleted) && !transactional_table)
+ write_execute_load_query_log_event(thd, handle_duplicates,
+ ignore, transactional_table);
+ else
+ {
+ Delete_file_log_event d(thd, db, transactional_table);
+ d.flags|= LOG_EVENT_UPDATE_TABLE_MAP_VERSION_F;
+ mysql_bin_log.write(&d);
+ }
}
}
}
@@ -462,15 +479,32 @@ bool mysql_load(THD *thd,sql_exchange *ex,TABLE_LIST *table_list,
#ifndef EMBEDDED_LIBRARY
if (mysql_bin_log.is_open())
{
+#ifdef HAVE_ROW_BASED_REPLICATION
/*
- As already explained above, we need to call end_io_cache() or the last
- block will be logged only after Execute_load_query_log_event (which is
- wrong), when read_info is destroyed.
- */
- read_info.end_io_cache();
- if (lf_info.wrote_create_file)
- write_execute_load_query_log_event(thd, handle_duplicates,
- ignore, transactional_table);
+ We need to do the job that is normally done inside
+ binlog_query() here, which is to ensure that the pending event
+ is written before tables are unlocked and before any other
+ events are written. We also need to update the table map
+ version for the binary log to mark that table maps are invalid
+ after this point.
+ */
+ if (binlog_row_based)
+ thd->binlog_flush_pending_rows_event(true);
+ else
+#endif
+ {
+ /*
+ As already explained above, we need to call end_io_cache() or the last
+ block will be logged only after Execute_load_query_log_event (which is
+ wrong), when read_info is destroyed.
+ */
+ read_info.end_io_cache();
+ if (lf_info.wrote_create_file)
+ {
+ write_execute_load_query_log_event(thd, handle_duplicates,
+ ignore, transactional_table);
+ }
+ }
}
#endif /*!EMBEDDED_LIBRARY*/
if (transactional_table)
@@ -499,6 +533,7 @@ static bool write_execute_load_query_log_event(THD *thd,
(duplicates == DUP_REPLACE) ? LOAD_DUP_REPLACE :
(ignore ? LOAD_DUP_IGNORE : LOAD_DUP_ERROR),
transactional_table, FALSE);
+ e.flags|= LOG_EVENT_UPDATE_TABLE_MAP_VERSION_F;
return mysql_bin_log.write(&e);
}
@@ -910,7 +945,7 @@ READ_INFO::READ_INFO(File file_par, uint tot_length, CHARSET_INFO *cs,
if (get_it_from_net)
cache.read_function = _my_b_net_read;
- if (mysql_bin_log.is_open())
+ if (!binlog_row_based && mysql_bin_log.is_open())
cache.pre_read = cache.pre_close =
(IO_CACHE_CALLBACK) log_loaded_block;
#endif
diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc
index cf098f1b414..839e1dbd65f 100644
--- a/sql/sql_parse.cc
+++ b/sql/sql_parse.cc
@@ -3712,8 +3712,8 @@ end_with_restore_list:
{
if (mysql_bin_log.is_open())
{
- Query_log_event qinfo(thd, thd->query, thd->query_length, 0, FALSE);
- mysql_bin_log.write(&qinfo);
+ thd->binlog_query(THD::MYSQL_QUERY_TYPE,
+ thd->query, thd->query_length, FALSE, FALSE);
}
send_ok(thd);
}
@@ -3730,8 +3730,8 @@ end_with_restore_list:
{
if (mysql_bin_log.is_open())
{
- Query_log_event qinfo(thd, thd->query, thd->query_length, 0, FALSE);
- mysql_bin_log.write(&qinfo);
+ thd->binlog_query(THD::MYSQL_QUERY_TYPE,
+ thd->query, thd->query_length, FALSE, FALSE);
}
send_ok(thd);
}
@@ -3748,8 +3748,8 @@ end_with_restore_list:
{
if (mysql_bin_log.is_open())
{
- Query_log_event qinfo(thd, thd->query, thd->query_length, 0, FALSE);
- mysql_bin_log.write(&qinfo);
+ thd->binlog_query(THD::MYSQL_QUERY_TYPE,
+ thd->query, thd->query_length, FALSE, FALSE);
}
send_ok(thd);
}
@@ -3764,8 +3764,8 @@ end_with_restore_list:
{
if (mysql_bin_log.is_open())
{
- Query_log_event qinfo(thd, thd->query, thd->query_length, 0, FALSE);
- mysql_bin_log.write(&qinfo);
+ thd->binlog_query(THD::MYSQL_QUERY_TYPE,
+ thd->query, thd->query_length, FALSE, FALSE);
}
send_ok(thd);
}
@@ -3844,8 +3844,8 @@ end_with_restore_list:
if (!res && mysql_bin_log.is_open())
{
thd->clear_error();
- Query_log_event qinfo(thd, thd->query, thd->query_length, 0, FALSE);
- mysql_bin_log.write(&qinfo);
+ thd->binlog_query(THD::MYSQL_QUERY_TYPE,
+ thd->query, thd->query_length, FALSE, FALSE);
}
}
else
@@ -3864,8 +3864,8 @@ end_with_restore_list:
if (mysql_bin_log.is_open())
{
thd->clear_error();
- Query_log_event qinfo(thd, thd->query, thd->query_length, 0, FALSE);
- mysql_bin_log.write(&qinfo);
+ thd->binlog_query(THD::MYSQL_QUERY_TYPE,
+ thd->query, thd->query_length, FALSE, FALSE);
}
if (lex->sql_command == SQLCOM_GRANT)
{
@@ -4161,12 +4161,12 @@ end_with_restore_list:
db, name,
lex->sql_command == SQLCOM_CREATE_PROCEDURE, 1))
{
- close_thread_tables(thd);
if (sp_grant_privileges(thd, db, name,
lex->sql_command == SQLCOM_CREATE_PROCEDURE))
push_warning(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
ER_PROC_AUTO_GRANT_FAIL,
ER(ER_PROC_AUTO_GRANT_FAIL));
+ close_thread_tables(thd);
}
#endif
send_ok(thd);
@@ -4394,8 +4394,8 @@ end_with_restore_list:
if (mysql_bin_log.is_open())
{
thd->clear_error();
- Query_log_event qinfo(thd, thd->query, thd->query_length, 0, FALSE);
- mysql_bin_log.write(&qinfo);
+ thd->binlog_query(THD::MYSQL_QUERY_TYPE,
+ thd->query, thd->query_length, FALSE, FALSE);
}
send_ok(thd);
break;
@@ -4483,8 +4483,8 @@ end_with_restore_list:
if (mysql_bin_log.is_open())
{
thd->clear_error();
- Query_log_event qinfo(thd, thd->query, thd->query_length, 0, FALSE);
- mysql_bin_log.write(&qinfo);
+ thd->binlog_query(THD::MYSQL_QUERY_TYPE,
+ thd->query, thd->query_length, FALSE, FALSE);
}
send_ok(thd);
break;
@@ -4608,8 +4608,8 @@ end_with_restore_list:
buff.append(STRING_WITH_LEN(" AS "));
buff.append(first_table->source.str, first_table->source.length);
- Query_log_event qinfo(thd, buff.ptr(), buff.length(), 0, FALSE);
- mysql_bin_log.write(&qinfo);
+ thd->binlog_query(THD::STMT_QUERY_TYPE,
+ buff.ptr(), buff.length(), FALSE, FALSE);
}
break;
}
@@ -4622,8 +4622,8 @@ end_with_restore_list:
mysql_bin_log.is_open())
{
thd->clear_error();
- Query_log_event qinfo(thd, thd->query, thd->query_length, 0, FALSE);
- mysql_bin_log.write(&qinfo);
+ thd->binlog_query(THD::STMT_QUERY_TYPE,
+ thd->query, thd->query_length, FALSE, FALSE);
}
break;
}
@@ -4826,15 +4826,24 @@ end_with_restore_list:
if (! (res= mysql_uninstall_plugin(thd, &thd->lex->comment)))
send_ok(thd);
break;
+ case SQLCOM_BINLOG_BASE64_EVENT:
+ {
+#ifndef EMBEDDED_LIBRARY
+ mysql_client_binlog_statement(thd);
+#else /* EMBEDDED_LIBRARY */
+ my_error(ER_OPTION_PREVENTS_STATEMENT, MYF(0), "embedded");
+#endif /* EMBEDDED_LIBRARY */
+ break;
+ }
default:
DBUG_ASSERT(0); /* Impossible */
send_ok(thd);
break;
}
thd->proc_info="query end";
- /* Two binlog-related cleanups: */
/*
+ Binlog-related cleanup:
Reset system variables temporarily modified by SET ONE SHOT.
Exception: If this is a SET, do nothing. This is to allow
@@ -5571,7 +5580,6 @@ void mysql_init_multi_delete(LEX *lex)
lex->query_tables_last= &lex->query_tables;
}
-
/*
When you modify mysql_parse(), you may need to mofify
mysql_test_parse_for_slave() in this same file.
diff --git a/sql/sql_plugin.cc b/sql/sql_plugin.cc
index 591289f6ee1..82bd6b2c499 100644
--- a/sql/sql_plugin.cc
+++ b/sql/sql_plugin.cc
@@ -625,7 +625,7 @@ my_bool mysql_install_plugin(THD *thd, LEX_STRING *name, LEX_STRING *dl)
restore_record(table, s->default_values);
table->field[0]->store(name->str, name->length, system_charset_info);
table->field[1]->store(dl->str, dl->length, files_charset_info);
- error= table->file->write_row(table->record[0]);
+ error= table->file->ha_write_row(table->record[0]);
if (error)
{
table->file->print_error(error, MYF(0));
@@ -694,7 +694,7 @@ my_bool mysql_uninstall_plugin(THD *thd, LEX_STRING *name)
HA_READ_KEY_EXACT))
{
int error;
- if ((error= table->file->delete_row(table->record[0])))
+ if ((error= table->file->ha_delete_row(table->record[0])))
{
table->file->print_error(error, MYF(0));
goto err;
diff --git a/sql/sql_rename.cc b/sql/sql_rename.cc
index 2c8c732fe86..a1bbb69bc17 100644
--- a/sql/sql_rename.cc
+++ b/sql/sql_rename.cc
@@ -84,8 +84,8 @@ bool mysql_rename_tables(THD *thd, TABLE_LIST *table_list)
if (mysql_bin_log.is_open())
{
thd->clear_error();
- Query_log_event qinfo(thd, thd->query, thd->query_length, 0, FALSE);
- mysql_bin_log.write(&qinfo);
+ thd->binlog_query(THD::STMT_QUERY_TYPE,
+ thd->query, thd->query_length, FALSE, FALSE);
}
send_ok(thd);
}
diff --git a/sql/sql_repl.h b/sql/sql_repl.h
index ba64e626adc..789de64da85 100644
--- a/sql/sql_repl.h
+++ b/sql/sql_repl.h
@@ -14,6 +14,8 @@
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+#include "rpl_filter.h"
+
#ifdef HAVE_REPLICATION
#include "slave.h"
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index 7b12069b8ec..f2833f94400 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -9288,11 +9288,11 @@ bool create_myisam_from_heap(THD *thd, TABLE *table, TMP_TABLE_PARAM *param,
*/
while (!table->file->rnd_next(new_table.record[1]))
{
- if ((write_err=new_table.file->write_row(new_table.record[1])))
+ if ((write_err=new_table.file->ha_write_row(new_table.record[1])))
goto err;
}
/* copy row that filled HEAP table */
- if ((write_err=new_table.file->write_row(table->record[0])))
+ if ((write_err=new_table.file->ha_write_row(table->record[0])))
{
if (write_err != HA_ERR_FOUND_DUPP_KEY &&
write_err != HA_ERR_FOUND_DUPP_UNIQUE || !ignore_last_dupp_key_error)
@@ -10691,7 +10691,7 @@ end_write(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
{
int error;
join->found_records++;
- if ((error=table->file->write_row(table->record[0])))
+ if ((error=table->file->ha_write_row(table->record[0])))
{
if (error == HA_ERR_FOUND_DUPP_KEY ||
error == HA_ERR_FOUND_DUPP_UNIQUE)
@@ -10753,8 +10753,8 @@ end_update(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
{ /* Update old record */
restore_record(table,record[1]);
update_tmptable_sum_func(join->sum_funcs,table);
- if ((error=table->file->update_row(table->record[1],
- table->record[0])))
+ if ((error=table->file->ha_update_row(table->record[1],
+ table->record[0])))
{
table->file->print_error(error,MYF(0)); /* purecov: inspected */
DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */
@@ -10777,7 +10777,7 @@ end_update(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
}
init_tmptable_sum_functions(join->sum_funcs);
copy_funcs(join->tmp_table_param.items_to_copy);
- if ((error=table->file->write_row(table->record[0])))
+ if ((error=table->file->ha_write_row(table->record[0])))
{
if (create_myisam_from_heap(join->thd, table, &join->tmp_table_param,
error, 0))
@@ -10813,7 +10813,7 @@ end_unique_update(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
copy_fields(&join->tmp_table_param); // Groups are copied twice.
copy_funcs(join->tmp_table_param.items_to_copy);
- if (!(error=table->file->write_row(table->record[0])))
+ if (!(error=table->file->ha_write_row(table->record[0])))
join->send_records++; // New group
else
{
@@ -10829,8 +10829,8 @@ end_unique_update(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
}
restore_record(table,record[1]);
update_tmptable_sum_func(join->sum_funcs,table);
- if ((error=table->file->update_row(table->record[1],
- table->record[0])))
+ if ((error=table->file->ha_update_row(table->record[1],
+ table->record[0])))
{
table->file->print_error(error,MYF(0)); /* purecov: inspected */
DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */
@@ -10873,7 +10873,7 @@ end_write_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
join->sum_funcs_end[send_group_parts]);
if (!join->having || join->having->val_int())
{
- int error= table->file->write_row(table->record[0]);
+ int error= table->file->ha_write_row(table->record[0]);
if (error && create_myisam_from_heap(join->thd, table,
&join->tmp_table_param,
error, 0))
@@ -11735,7 +11735,7 @@ static int remove_dup_with_compare(THD *thd, TABLE *table, Field **first_field,
}
if (having && !having->val_int())
{
- if ((error=file->delete_row(record)))
+ if ((error=file->ha_delete_row(record)))
goto err;
error=file->rnd_next(record);
continue;
@@ -11762,7 +11762,7 @@ static int remove_dup_with_compare(THD *thd, TABLE *table, Field **first_field,
}
if (compare_record(table, first_field) == 0)
{
- if ((error=file->delete_row(record)))
+ if ((error=file->ha_delete_row(record)))
goto err;
}
else if (!found)
@@ -11859,7 +11859,7 @@ static int remove_dup_with_hash_index(THD *thd, TABLE *table,
}
if (having && !having->val_int())
{
- if ((error=file->delete_row(record)))
+ if ((error=file->ha_delete_row(record)))
goto err;
continue;
}
@@ -11876,7 +11876,7 @@ static int remove_dup_with_hash_index(THD *thd, TABLE *table,
if (hash_search(&hash, org_key_pos, key_length))
{
/* Duplicated found ; Remove the row */
- if ((error=file->delete_row(record)))
+ if ((error=file->ha_delete_row(record)))
goto err;
}
else
@@ -13702,7 +13702,7 @@ int JOIN::rollup_write_data(uint idx, TABLE *table)
item->save_in_result_field(1);
}
copy_sum_funcs(sum_funcs_end[i+1], sum_funcs_end[i]);
- if ((error= table->file->write_row(table->record[0])))
+ if ((error= table->file->ha_write_row(table->record[0])))
{
if (create_myisam_from_heap(thd, table, &tmp_table_param,
error, 0))
diff --git a/sql/sql_show.cc b/sql/sql_show.cc
index d3649aa5f6b..42345afbd0d 100644
--- a/sql/sql_show.cc
+++ b/sql/sql_show.cc
@@ -19,6 +19,7 @@
#include "mysql_priv.h"
#include "sql_select.h" // For select_describe
+#include "sql_show.h"
#include "repl_failsafe.h"
#include "sp.h"
#include "sp_head.h"
@@ -37,10 +38,6 @@ static TYPELIB grant_types = { sizeof(grant_names)/sizeof(char **),
grant_names, NULL};
#endif
-static int
-store_create_info(THD *thd, TABLE_LIST *table_list, String *packet);
-static int
-view_store_create_info(THD *thd, TABLE_LIST *table, String *buff);
static bool schema_table_store_record(THD *thd, TABLE *table);
@@ -540,7 +537,7 @@ mysqld_show_create(THD *thd, TABLE_LIST *table_list)
buffer.length(0);
if ((table_list->view ?
view_store_create_info(thd, table_list, &buffer) :
- store_create_info(thd, table_list, &buffer)))
+ store_create_info(thd, table_list, &buffer, NULL)))
DBUG_RETURN(TRUE);
List<Item> field_list;
@@ -719,7 +716,7 @@ mysqld_dump_create_info(THD *thd, TABLE_LIST *table_list, int fd)
DBUG_PRINT("enter",("table: %s",table_list->table->s->table_name.str));
protocol->prepare_for_resend();
- if (store_create_info(thd, table_list, packet))
+ if (store_create_info(thd, table_list, packet, NULL))
DBUG_RETURN(-1);
if (fd < 0)
@@ -872,8 +869,31 @@ static void append_directory(THD *thd, String *packet, const char *dir_type,
#define LIST_PROCESS_HOST_LEN 64
-static int
-store_create_info(THD *thd, TABLE_LIST *table_list, String *packet)
+/*
+ Build a CREATE TABLE statement for a table.
+
+ SYNOPSIS
+ store_create_info()
+ thd The thread
+ table_list A list containing one table to write statement
+ for.
+ packet Pointer to a string where statement will be
+ written.
+ create_info_arg Pointer to create information that can be used
+ to tailor the format of the statement. Can be
+ NULL, in which case only SQL_MODE is considered
+ when building the statement.
+
+ NOTE
+ Currently always return 0, but might return error code in the
+ future.
+
+ RETURN
+ 0 OK
+ */
+int
+store_create_info(THD *thd, TABLE_LIST *table_list, String *packet,
+ HA_CREATE_INFO *create_info_arg)
{
List<Item> field_list;
char tmp[MAX_FIELD_WIDTH], *for_str, buff[128], *end;
@@ -1108,10 +1128,17 @@ store_create_info(THD *thd, TABLE_LIST *table_list, String *packet)
packet->append(STRING_WITH_LEN("\n)"));
if (!(thd->variables.sql_mode & MODE_NO_TABLE_OPTIONS) && !foreign_db_mode)
{
- if (thd->variables.sql_mode & (MODE_MYSQL323 | MODE_MYSQL40))
- packet->append(STRING_WITH_LEN(" TYPE="));
- else
- packet->append(STRING_WITH_LEN(" ENGINE="));
+ /*
+ IF check_create_info
+ THEN add ENGINE only if it was used when creating the table
+ */
+ if (!create_info_arg ||
+ (create_info_arg->used_fields & HA_CREATE_USED_ENGINE))
+ {
+ if (thd->variables.sql_mode & (MODE_MYSQL323 | MODE_MYSQL40))
+ packet->append(STRING_WITH_LEN(" TYPE="));
+ else
+ packet->append(STRING_WITH_LEN(" ENGINE="));
#ifdef WITH_PARTITION_STORAGE_ENGINE
if (table->part_info)
packet->append(ha_resolve_storage_engine_name(
@@ -1119,19 +1146,28 @@ store_create_info(THD *thd, TABLE_LIST *table_list, String *packet)
else
packet->append(file->table_type());
#else
- packet->append(file->table_type());
+ packet->append(file->table_type());
#endif
+ }
if (share->table_charset &&
!(thd->variables.sql_mode & MODE_MYSQL323) &&
!(thd->variables.sql_mode & MODE_MYSQL40))
{
- packet->append(STRING_WITH_LEN(" DEFAULT CHARSET="));
- packet->append(share->table_charset->csname);
- if (!(share->table_charset->state & MY_CS_PRIMARY))
+ /*
+ IF check_create_info
+ THEN add DEFAULT CHARSET only if it was used when creating the table
+ */
+ if (!create_info_arg ||
+ (create_info_arg->used_fields & HA_CREATE_USED_DEFAULT_CHARSET))
{
- packet->append(STRING_WITH_LEN(" COLLATE="));
- packet->append(table->s->table_charset->name);
+ packet->append(STRING_WITH_LEN(" DEFAULT CHARSET="));
+ packet->append(share->table_charset->csname);
+ if (!(share->table_charset->state & MY_CS_PRIMARY))
+ {
+ packet->append(STRING_WITH_LEN(" COLLATE="));
+ packet->append(table->s->table_charset->name);
+ }
}
}
@@ -1236,7 +1272,6 @@ view_store_options(THD *thd, TABLE_LIST *table, String *buff)
buff->append(STRING_WITH_LEN("SQL SECURITY INVOKER "));
}
-
/*
Append DEFINER clause to the given buffer.
@@ -1259,7 +1294,7 @@ void append_definer(THD *thd, String *buffer, const LEX_STRING *definer_user,
}
-static int
+int
view_store_create_info(THD *thd, TABLE_LIST *table, String *buff)
{
my_bool foreign_db_mode= (thd->variables.sql_mode & (MODE_POSTGRESQL |
@@ -1924,7 +1959,7 @@ typedef struct st_index_field_values
static bool schema_table_store_record(THD *thd, TABLE *table)
{
int error;
- if ((error= table->file->write_row(table->record[0])))
+ if ((error= table->file->ha_write_row(table->record[0])))
{
if (create_myisam_from_heap(thd, table,
table->pos_in_table_list->schema_table_param,
diff --git a/sql/sql_show.h b/sql/sql_show.h
new file mode 100644
index 00000000000..6fce5e94ca3
--- /dev/null
+++ b/sql/sql_show.h
@@ -0,0 +1,17 @@
+
+#ifndef SQL_SHOW_H
+#define SQL_SHOW_H
+
+/* Forward declarations */
+class String;
+class THD;
+struct st_ha_create_information;
+struct st_table_list;
+typedef st_ha_create_information HA_CREATE_INFO;
+typedef st_table_list TABLE_LIST;
+
+int store_create_info(THD *thd, TABLE_LIST *table_list, String *packet,
+ HA_CREATE_INFO *create_info_arg);
+int view_store_create_info(THD *thd, TABLE_LIST *table, String *buff);
+
+#endif /* SQL_SHOW_H */
diff --git a/sql/sql_table.cc b/sql/sql_table.cc
index 0e811d63b36..3ac0d3ae466 100644
--- a/sql/sql_table.cc
+++ b/sql/sql_table.cc
@@ -22,6 +22,7 @@
#include <my_dir.h>
#include "sp_head.h"
#include "sql_trigger.h"
+#include "sql_show.h"
#ifdef __WIN__
#include <io.h>
@@ -53,14 +54,15 @@ static bool check_engine(THD *thd, const char *table_name,
file
*/
-static void write_bin_log(THD *thd, bool clear_error)
+static void write_bin_log(THD *thd, bool clear_error,
+ char const* query, ulong query_length)
{
if (mysql_bin_log.is_open())
{
if (clear_error)
thd->clear_error();
- Query_log_event qinfo(thd, thd->query, thd->query_length, FALSE, FALSE);
- mysql_bin_log.write(&qinfo);
+ thd->binlog_query(THD::STMT_QUERY_TYPE,
+ query, query_length, FALSE, FALSE);
}
}
@@ -277,9 +279,19 @@ int mysql_rm_table_part2(THD *thd, TABLE_LIST *tables, bool if_exists,
char path[FN_REFLEN], *alias;
String wrong_tables;
int error;
+ int non_temp_tables_count= 0;
bool some_tables_deleted=0, tmp_table_deleted=0, foreign_key_error=0;
+ String built_query;
DBUG_ENTER("mysql_rm_table_part2");
+ if (binlog_row_based && !dont_log_query)
+ {
+ built_query.set_charset(system_charset_info);
+ if (if_exists)
+ built_query.append("DROP TABLE IF EXISTS ");
+ else
+ built_query.append("DROP TABLE ");
+ }
/*
If we have the table in the definition cache, we don't have to check the
.frm file to find if the table is a normal table (not view) and what
@@ -313,6 +325,30 @@ int mysql_rm_table_part2(THD *thd, TABLE_LIST *tables, bool if_exists,
continue; // removed temporary table
}
+ /*
+ If row-based replication is used and the table is not a
+ temporary table, we add the table name to the drop statement
+ being built. The string always end in a comma and the comma
+ will be chopped off before being written to the binary log.
+ */
+ if (binlog_row_based && !dont_log_query)
+ {
+ ++non_temp_tables_count;
+ /*
+ Don't write the database name if it is the current one (or if
+ thd->db is NULL).
+ */
+ built_query.append("`");
+ if (thd->db == NULL || strcmp(db,thd->db) != 0)
+ {
+ built_query.append(db);
+ built_query.append("`.`");
+ }
+
+ built_query.append(table->table_name);
+ built_query.append("`,");
+ }
+
error=0;
table_type= table->db_type;
if (!drop_temporary)
@@ -401,12 +437,48 @@ int mysql_rm_table_part2(THD *thd, TABLE_LIST *tables, bool if_exists,
if (some_tables_deleted || tmp_table_deleted || !error)
{
query_cache_invalidate3(thd, tables, 0);
- if (!dont_log_query && mysql_bin_log.is_open())
+ if (!dont_log_query)
{
- if (!error)
- thd->clear_error();
- Query_log_event qinfo(thd, thd->query, thd->query_length, FALSE, FALSE);
- mysql_bin_log.write(&qinfo);
+ if (!binlog_row_based ||
+ non_temp_tables_count > 0 && !tmp_table_deleted)
+ {
+ /*
+ In this case, we are either using statement-based
+ replication or using row-based replication but have only
+ deleted one or more non-temporary tables (and no temporary
+ tables). In this case, we can write the original query into
+ the binary log.
+ */
+ write_bin_log(thd, !error, thd->query, thd->query_length);
+ }
+ else if (binlog_row_based &&
+ non_temp_tables_count > 0 &&
+ tmp_table_deleted)
+ {
+ /*
+ In this case we have deleted both temporary and
+ non-temporary tables, so:
+ - since we have deleted a non-temporary table we have to
+ binlog the statement, but
+ - since we have deleted a temporary table we cannot binlog
+ the statement (since the table has not been created on the
+ slave, this might cause the slave to stop).
+
+ Instead, we write a built statement, only containing the
+ non-temporary tables, to the binary log
+ */
+ built_query.chop(); // Chop of the last comma
+ built_query.append(" /* generated by server */");
+ write_bin_log(thd, !error, built_query.ptr(), built_query.length());
+ }
+ /*
+ The remaining cases are:
+ - no tables where deleted and
+ - only temporary tables where deleted and row-based
+ replication is used.
+ In both these cases, nothing should be written to the binary
+ log.
+ */
}
}
@@ -1837,13 +1909,17 @@ bool mysql_create_table(THD *thd,const char *db, const char *table_name,
}
thd->tmp_table_used= 1;
}
- if (!internal_tmp_table && mysql_bin_log.is_open())
- {
- thd->clear_error();
- Query_log_event qinfo(thd, thd->query, thd->query_length, FALSE, FALSE);
- mysql_bin_log.write(&qinfo);
- }
+ /*
+ Don't write statement if:
+ - It is an internal temporary table,
+ - Row-based logging is used and it we are creating a temporary table, or
+ - The binary log is not open.
+ */
+ if (!internal_tmp_table &&
+ !(binlog_row_based &&
+ (create_info->options & HA_LEX_CREATE_TMP_TABLE)))
+ write_bin_log(thd, TRUE, thd->query, thd->query_length);
error= FALSE;
unlock_and_end:
VOID(pthread_mutex_unlock(&LOCK_open));
@@ -2982,8 +3058,63 @@ bool mysql_create_like_table(THD* thd, TABLE_LIST* table,
goto err; /* purecov: inspected */
}
- // Must be written before unlock
- write_bin_log(thd, TRUE);
+ /*
+ We have to write the query before we unlock the tables.
+ */
+ if (binlog_row_based)
+ {
+ /*
+ Since temporary tables are not replicated under row-based
+ replication, CREATE TABLE ... LIKE ... needs special
+ treatement. We have four cases to consider, according to the
+ following decision table:
+
+ ==== ========= ========= ==============================
+ Case Target Source Write to binary log
+ ==== ========= ========= ==============================
+ 1 normal normal Original statement
+ 2 normal temporary Generated statement
+ 3 temporary normal Nothing
+ 4 temporary temporary Nothing
+ ==== ========= ========= ==============================
+
+ The variable 'tmp_table' below is used to see if the source
+ table is a temporary table: if it is set, then the source table
+ was a temporary table and we can take apropriate actions.
+ */
+ if (!(create_info->options & HA_LEX_CREATE_TMP_TABLE))
+ {
+ if (tmp_table) // Case 2
+ {
+ char buf[2048];
+ String query(buf, sizeof(buf), system_charset_info);
+ query.length(0); // Have to zero it since constructor doesn't
+ TABLE *table_ptr;
+ int error;
+
+ /*
+ Let's open and lock the table: it will be closed (and
+ unlocked) by close_thread_tables() at the end of the
+ statement anyway.
+ */
+ if (!(table_ptr= open_ltable(thd, table, TL_READ_NO_INSERT)))
+ goto err;
+
+ int result= store_create_info(thd, table, &query, create_info);
+
+ DBUG_ASSERT(result == 0); // store_create_info() always return 0
+ write_bin_log(thd, TRUE, query.ptr(), query.length());
+ }
+ else // Case 1
+ write_bin_log(thd, TRUE, thd->query, thd->query_length);
+ }
+ /*
+ Case 3 and 4 does nothing under RBR
+ */
+ }
+ else if (!(create_info->options & HA_LEX_CREATE_TMP_TABLE))
+ write_bin_log(thd, TRUE, thd->query, thd->query_length);
+
res= FALSE;
goto err;
@@ -3089,7 +3220,7 @@ mysql_discard_or_import_tablespace(THD *thd,
error=1;
if (error)
goto err;
- write_bin_log(thd, FALSE);
+ write_bin_log(thd, FALSE, thd->query, thd->query_length);
err:
close_thread_tables(thd);
thd->tablespace_op=FALSE;
@@ -4057,7 +4188,7 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name,
}
if (!error)
{
- write_bin_log(thd, TRUE);
+ write_bin_log(thd, TRUE, thd->query, thd->query_length);
if (do_send_ok)
send_ok(thd);
}
@@ -4472,7 +4603,7 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name,
if (!error)
{
close_thread_tables(thd);
- write_bin_log(thd, FALSE);
+ write_bin_log(thd, FALSE, thd->query, thd->query_length);
send_ok(thd);
DBUG_RETURN(FALSE);
}
@@ -4609,7 +4740,9 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name,
my_free((gptr) new_table,MYF(0));
goto err;
}
- write_bin_log(thd, TRUE);
+ /* We don't replicate alter table statement on temporary tables */
+ if (!binlog_row_based)
+ write_bin_log(thd, TRUE, thd->query, thd->query_length);
goto end_temporary;
}
@@ -4751,7 +4884,10 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name,
goto err;
}
thd->proc_info="end";
- write_bin_log(thd, TRUE);
+
+ DBUG_ASSERT(!(mysql_bin_log.is_open() && binlog_row_based &&
+ (create_info->options & HA_LEX_CREATE_TMP_TABLE)));
+ write_bin_log(thd, TRUE, thd->query, thd->query_length);
VOID(pthread_cond_broadcast(&COND_refresh));
VOID(pthread_mutex_unlock(&LOCK_open));
/*
@@ -4928,7 +5064,7 @@ copy_data_between_tables(TABLE *from,TABLE *to,
{
copy_ptr->do_copy(copy_ptr);
}
- if ((error=to->file->write_row((byte*) to->record[0])))
+ if ((error=to->file->ha_write_row((byte*) to->record[0])))
{
if ((!ignore &&
handle_duplicates != DUP_REPLACE) ||
diff --git a/sql/sql_udf.cc b/sql/sql_udf.cc
index 367803f4c86..77bfba5ba28 100644
--- a/sql/sql_udf.cc
+++ b/sql/sql_udf.cc
@@ -455,7 +455,7 @@ int mysql_create_function(THD *thd,udf_func *udf)
table->field[2]->store(u_d->dl,(uint) strlen(u_d->dl), system_charset_info);
if (table->s->fields >= 4) // If not old func format
table->field[3]->store((longlong) u_d->type, TRUE);
- error = table->file->write_row(table->record[0]);
+ error = table->file->ha_write_row(table->record[0]);
close_thread_tables(thd);
if (error)
@@ -514,7 +514,7 @@ int mysql_drop_function(THD *thd,const LEX_STRING *udf_name)
HA_READ_KEY_EXACT))
{
int error;
- if ((error = table->file->delete_row(table->record[0])))
+ if ((error = table->file->ha_delete_row(table->record[0])))
table->file->print_error(error, MYF(0));
}
close_thread_tables(thd);
diff --git a/sql/sql_union.cc b/sql/sql_union.cc
index dee88af7d83..e80aaecfd64 100644
--- a/sql/sql_union.cc
+++ b/sql/sql_union.cc
@@ -62,7 +62,7 @@ bool select_union::send_data(List<Item> &values)
if (thd->net.report_error)
return 1;
- if ((error= table->file->write_row(table->record[0])))
+ if ((error= table->file->ha_write_row(table->record[0])))
{
/* create_myisam_from_heap will generate error if needed */
if (error != HA_ERR_FOUND_DUPP_KEY && error != HA_ERR_FOUND_DUPP_UNIQUE &&
diff --git a/sql/sql_update.cc b/sql/sql_update.cc
index 3aa6e7fc874..bd001cd9a06 100644
--- a/sql/sql_update.cc
+++ b/sql/sql_update.cc
@@ -355,6 +355,9 @@ int mysql_update(THD *thd,
/* If quick select is used, initialize it before retrieving rows. */
if (select && select->quick && select->quick->reset())
goto err;
+
+ table->file->try_semi_consistent_read(1);
+
if (used_index == MAX_KEY || (select && select->quick))
init_read_record(&info,thd,table,select,0,1);
else
@@ -367,6 +370,9 @@ int mysql_update(THD *thd,
{
if (!(select && select->skip_record()))
{
+ if (table->file->was_semi_consistent_read())
+ continue; /* repeat the read of the same row if it still exists */
+
table->file->position(table->record[0]);
if (my_b_write(&tempfile,table->file->ref,
table->file->ref_length))
@@ -386,6 +392,7 @@ int mysql_update(THD *thd,
if (thd->killed && !error)
error= 1; // Aborted
limit= tmp_limit;
+ table->file->try_semi_consistent_read(0);
end_read_record(&info);
/* Change select to use tempfile */
@@ -420,6 +427,7 @@ int mysql_update(THD *thd,
if (select && select->quick && select->quick->reset())
goto err;
+ table->file->try_semi_consistent_read(1);
init_read_record(&info,thd,table,select,0,1);
updated= found= 0;
@@ -435,10 +443,14 @@ int mysql_update(THD *thd,
(MODE_STRICT_TRANS_TABLES |
MODE_STRICT_ALL_TABLES)));
will_batch= !table->file->start_bulk_update();
+
while (!(error=info.read_record(&info)) && !thd->killed)
{
if (!(select && select->skip_record()))
{
+ if (table->file->was_semi_consistent_read())
+ continue; /* repeat the read of the same row if it still exists */
+
store_record(table,record[1]);
if (fill_record_n_invoke_before_triggers(thd, fields, values, 0,
table->triggers,
@@ -498,8 +510,8 @@ int mysql_update(THD *thd,
else
{
/* Non-batched update */
- error= table->file->update_row((byte*) table->record[1],
- (byte*) table->record[0]);
+ error= table->file->ha_update_row((byte*) table->record[1],
+ (byte*) table->record[0]);
}
if (!error)
{
@@ -594,6 +606,7 @@ int mysql_update(THD *thd,
updated-= dup_key_found;
if (will_batch)
table->file->end_bulk_update();
+ table->file->try_semi_consistent_read(0);
end_read_record(&info);
free_io_cache(table); // If ORDER BY
delete select;
@@ -624,10 +637,13 @@ int mysql_update(THD *thd,
{
if (error < 0)
thd->clear_error();
- Query_log_event qinfo(thd, thd->query, thd->query_length,
- transactional_table, FALSE);
- if (mysql_bin_log.write(&qinfo) && transactional_table)
+ if (thd->binlog_query(THD::ROW_QUERY_TYPE,
+ thd->query, thd->query_length,
+ transactional_table, FALSE) &&
+ transactional_table)
+ {
error=1; // Rollback update
+ }
}
if (!transactional_table)
thd->options|=OPTION_STATUS_NO_TRANS_UPDATE;
@@ -1364,8 +1380,8 @@ bool multi_update::send_data(List<Item> &not_used_values)
*/
main_table->file->extra(HA_EXTRA_PREPARE_FOR_UPDATE);
}
- if ((error=table->file->update_row(table->record[1],
- table->record[0])))
+ if ((error=table->file->ha_update_row(table->record[1],
+ table->record[0])))
{
updated--;
if (!ignore || error != HA_ERR_FOUND_DUPP_KEY)
@@ -1400,7 +1416,7 @@ bool multi_update::send_data(List<Item> &not_used_values)
memcpy((char*) tmp_table->field[0]->ptr,
(char*) table->file->ref, table->file->ref_length);
/* Write row, ignoring duplicated updates to a row */
- if (error= tmp_table->file->write_row(tmp_table->record[0]))
+ if (error= tmp_table->file->ha_write_row(tmp_table->record[0]))
{
if (error != HA_ERR_FOUND_DUPP_KEY &&
error != HA_ERR_FOUND_DUPP_UNIQUE &&
@@ -1517,8 +1533,8 @@ int multi_update::do_updates(bool from_send_error)
if (compare_record(table, thd->query_id))
{
- if ((local_error=table->file->update_row(table->record[1],
- table->record[0])))
+ if ((local_error=table->file->ha_update_row(table->record[1],
+ table->record[0])))
{
if (!ignore || local_error != HA_ERR_FOUND_DUPP_KEY)
goto err;
@@ -1597,10 +1613,13 @@ bool multi_update::send_eof()
{
if (local_error == 0)
thd->clear_error();
- Query_log_event qinfo(thd, thd->query, thd->query_length,
- transactional_tables, FALSE);
- if (mysql_bin_log.write(&qinfo) && trans_safe)
+ if (thd->binlog_query(THD::ROW_QUERY_TYPE,
+ thd->query, thd->query_length,
+ transactional_tables, FALSE) &&
+ trans_safe)
+ {
local_error= 1; // Rollback update
+ }
}
if (!transactional_tables)
thd->options|=OPTION_STATUS_NO_TRANS_UPDATE;
diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy
index 4518f9e8de1..bc41178e1be 100644
--- a/sql/sql_yacc.yy
+++ b/sql/sql_yacc.yy
@@ -897,6 +897,7 @@ statement:
alter
| analyze
| backup
+ | binlog_base64_event
| call
| change
| check
@@ -4400,6 +4401,13 @@ analyze:
{}
;
+binlog_base64_event:
+ BINLOG_SYM TEXT_STRING_sys
+ {
+ Lex->sql_command = SQLCOM_BINLOG_BASE64_EVENT;
+ Lex->comment= $2;
+ }
+
check:
CHECK_SYM table_or_tables
{
diff --git a/sql/table.cc b/sql/table.cc
index f9c6344e88f..bf208918346 100644
--- a/sql/table.cc
+++ b/sql/table.cc
@@ -1206,6 +1206,7 @@ int open_table_from_share(THD *thd, TABLE_SHARE *share, const char *alias,
outparam->in_use= thd;
outparam->s= share;
outparam->db_stat= db_stat;
+ outparam->write_row_record= NULL;
init_sql_alloc(&outparam->mem_root, TABLE_ALLOC_BLOCK_SIZE, 0);
*root_ptr= &outparam->mem_root;
@@ -1396,6 +1397,25 @@ int open_table_from_share(THD *thd, TABLE_SHARE *share, const char *alias,
*root_ptr= old_root;
thd->status_var.opened_tables++;
+#ifdef HAVE_REPLICATION
+
+ /*
+ This constant is used to mark that no table map version has been
+ assigned. No arithmetic is done on the value: it will be
+ overwritten with a value taken from MYSQL_BIN_LOG.
+ */
+ share->table_map_version= ~(ulonglong)0;
+
+ /*
+ Since openfrm() can be called without any locking (for example,
+ ha_create_table... functions), we do not assign a table map id
+ here. Instead we assign a value that is not used elsewhere, and
+ then assign a table map id inside open_table() under the
+ protection of the LOCK_open mutex.
+ */
+ share->table_map_id= ULONG_MAX;
+#endif
+
DBUG_RETURN (0);
err:
diff --git a/sql/table.h b/sql/table.h
index d81eb6afe91..c8b9db1b2a5 100644
--- a/sql/table.h
+++ b/sql/table.h
@@ -189,7 +189,8 @@ typedef struct st_table_share
bool is_view;
bool name_lock, replace_with_name_lock;
bool waiting_on_cond; /* Protection against free */
-
+ ulong table_map_id; /* for row-based replication */
+ ulonglong table_map_version;
/*
TRUE if this is a system table like 'mysql.proc', which we want to be
able to open and lock even when we already have some tables open and
@@ -220,6 +221,8 @@ struct st_table {
Field **field; /* Pointer to fields */
byte *record[2]; /* Pointer to records */
+ byte *write_row_record; /* Used as optimisation in
+ THD::write_row */
byte *insert_values; /* used by INSERT ... UPDATE */
key_map quick_keys, used_keys, keys_in_use_for_query;
KEY *key_info; /* data of keys in database */