diff options
author | Marko Mäkelä <marko.makela@mariadb.com> | 2018-02-20 15:10:03 +0200 |
---|---|---|
committer | Marko Mäkelä <marko.makela@mariadb.com> | 2018-02-20 21:36:36 +0200 |
commit | 947efe17ed8188ca4feef6deb0c2831a246b5c8f (patch) | |
tree | 1df8380c4c59cfb9f119f89d3022b66edb5bd610 /storage | |
parent | f6544353e0c84d14a333525ba26ebd30bf125615 (diff) | |
download | mariadb-git-947efe17ed8188ca4feef6deb0c2831a246b5c8f.tar.gz |
MDEV-15158 On commit, do not write to the TRX_SYS page
This is based on a prototype by
Thirunarayanan Balathandayuthapani <thiru@mariadb.com>.
Binlog and Galera write-set replication information was written into
TRX_SYS page on each commit. Instead of writing to the TRX_SYS during
normal operation, InnoDB can make use of rollback segment header pages,
which are already being written to during a commit.
The following list of fields in rollback segment header page are added:
TRX_RSEG_BINLOG_OFFSET
TRX_RSEG_BINLOG_NAME (NUL-terminated; empty name = not present)
TRX_RSEG_WSREP_XID_FORMAT (0=not present; 1=present)
TRX_RSEG_WSREP_XID_GTRID
TRX_RSEG_WSREP_XID_BQUAL
TRX_RSEG_WSREP_XID_DATA
trx_sys_t: Introduce the fields
recovered_binlog_filename, recovered_binlog_offset, recovered_wsrep_xid.
To facilitate upgrade from older mysql or mariaDB versions, we will read
the information in TRX_SYS page. It will be overridden by the
information that we find in rollback segment header pages.
Mariabackup --prepare will read the metadata from the rollback
segment header pages via trx_rseg_array_init(). It will still
not read any undo log pages or recover any transactions.
Diffstat (limited to 'storage')
-rw-r--r-- | storage/innobase/handler/ha_innodb.cc | 13 | ||||
-rw-r--r-- | storage/innobase/include/trx0rseg.h | 68 | ||||
-rw-r--r-- | storage/innobase/include/trx0sys.h | 36 | ||||
-rw-r--r-- | storage/innobase/srv/srv0start.cc | 8 | ||||
-rw-r--r-- | storage/innobase/trx/trx0purge.cc | 51 | ||||
-rw-r--r-- | storage/innobase/trx/trx0rseg.cc | 315 | ||||
-rw-r--r-- | storage/innobase/trx/trx0sys.cc | 198 | ||||
-rw-r--r-- | storage/innobase/trx/trx0trx.cc | 39 |
8 files changed, 439 insertions, 289 deletions
diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 9ab36a97fba..699e2899f57 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -111,10 +111,9 @@ this program; if not, write to the Free Software Foundation, Inc., #include "trx0purge.h" #endif /* UNIV_DEBUG */ #include "trx0roll.h" -#include "trx0sys.h" +#include "trx0rseg.h" #include "trx0trx.h" #include "fil0pagecompress.h" -#include "trx0xa.h" #include "ut0mem.h" #include "row0ext.h" @@ -19679,12 +19678,8 @@ innobase_wsrep_set_checkpoint( DBUG_ASSERT(hton == innodb_hton_ptr); if (wsrep_is_wsrep_xid(xid)) { - mtr_t mtr; - mtr_start(&mtr); - if (buf_block_t* sys_header = trx_sysf_get(&mtr)) { - trx_sys_update_wsrep_checkpoint(xid, sys_header, &mtr); - } - mtr_commit(&mtr); + + trx_rseg_update_wsrep_checkpoint(xid); innobase_flush_logs(hton, false); return 0; } else { @@ -19700,7 +19695,7 @@ innobase_wsrep_get_checkpoint( XID* xid) { DBUG_ASSERT(hton == innodb_hton_ptr); - trx_sys_read_wsrep_checkpoint(xid); + trx_rseg_read_wsrep_checkpoint(*xid); return 0; } diff --git a/storage/innobase/include/trx0rseg.h b/storage/innobase/include/trx0rseg.h index 5642dd0d4a8..8aa91c13add 100644 --- a/storage/innobase/include/trx0rseg.h +++ b/storage/innobase/include/trx0rseg.h @@ -27,10 +27,8 @@ Created 3/26/1996 Heikki Tuuri #ifndef trx0rseg_h #define trx0rseg_h -#include "trx0types.h" #include "trx0sys.h" #include "fut0lst.h" -#include <vector> /** Gets a rollback segment header. @param[in] space space where placed @@ -226,6 +224,30 @@ struct trx_rseg_t { /** Maximum transaction ID (valid only if TRX_RSEG_FORMAT is 0) */ #define TRX_RSEG_MAX_TRX_ID (TRX_RSEG_UNDO_SLOTS + TRX_RSEG_N_SLOTS \ * TRX_RSEG_SLOT_SIZE) + +/** 8 bytes offset within the binlog file */ +#define TRX_RSEG_BINLOG_OFFSET TRX_RSEG_MAX_TRX_ID + 8 +/** MySQL log file name, 512 bytes, including terminating NUL +(valid only if TRX_RSEG_FORMAT is 0). +If no binlog information is present, the first byte is NUL. */ +#define TRX_RSEG_BINLOG_NAME TRX_RSEG_MAX_TRX_ID + 16 +/** Maximum length of binlog file name, including terminating NUL, in bytes */ +#define TRX_RSEG_BINLOG_NAME_LEN 512 + +#ifdef WITH_WSREP +/** The offset to WSREP XID headers */ +#define TRX_RSEG_WSREP_XID_INFO TRX_RSEG_MAX_TRX_ID + 16 + 512 + +/** WSREP XID format (1 if present and valid, 0 if not present) */ +#define TRX_RSEG_WSREP_XID_FORMAT TRX_RSEG_WSREP_XID_INFO +/** WSREP XID GTRID length */ +#define TRX_RSEG_WSREP_XID_GTRID_LEN TRX_RSEG_WSREP_XID_INFO + 4 +/** WSREP XID bqual length */ +#define TRX_RSEG_WSREP_XID_BQUAL_LEN TRX_RSEG_WSREP_XID_INFO + 8 +/** WSREP XID data (XIDDATASIZE bytes) */ +#define TRX_RSEG_WSREP_XID_DATA TRX_RSEG_WSREP_XID_INFO + 12 +#endif /* WITH_WSREP*/ + /*-------------------------------------------------------------*/ /** Read the page number of an undo log slot. @@ -240,6 +262,48 @@ trx_rsegf_get_nth_undo(const trx_rsegf_t* rsegf, ulint n) + n * TRX_RSEG_SLOT_SIZE); } +#ifdef WITH_WSREP +/** Update the WSREP XID information in rollback segment header. +@param[in,out] rseg_header rollback segment header +@param[in] xid WSREP XID +@param[in,out] mtr mini-transaction */ +void +trx_rseg_update_wsrep_checkpoint( + trx_rsegf_t* rseg_header, + const XID* xid, + mtr_t* mtr); + +/** Update WSREP checkpoint XID in first rollback segment header. +@param[in] xid WSREP XID */ +void trx_rseg_update_wsrep_checkpoint(const XID* xid); + +/** Read the WSREP XID information in rollback segment header. +@param[in] rseg_header Rollback segment header +@param[out] xid Transaction XID +@return whether the WSREP XID was present */ +bool trx_rseg_read_wsrep_checkpoint(const trx_rsegf_t* rseg_header, XID& xid); + +/** Recover the latest WSREP checkpoint XID. +@param[out] xid WSREP XID +@return whether the WSREP XID was found */ +bool trx_rseg_read_wsrep_checkpoint(XID& xid); +#endif /* WITH_WSREP */ + +/** Upgrade a rollback segment header page to MariaDB 10.3 format. +@param[in,out] rseg_header rollback segment header page +@param[in,out] mtr mini-transaction */ +void trx_rseg_format_upgrade(trx_rsegf_t* rseg_header, mtr_t* mtr); + +/** Update the offset information about the end of the binlog entry +which corresponds to the transaction just being committed. +In a replication slave, this updates the master binlog position +up to which replication has proceeded. +@param[in,out] rseg_header rollback segment header +@param[in] trx committing transaction +@param[in,out] mtr mini-transaction */ +void +trx_rseg_update_binlog_offset(byte* rseg_header, const trx_t* trx, mtr_t* mtr); + #include "trx0rseg.ic" #endif diff --git a/storage/innobase/include/trx0sys.h b/storage/innobase/include/trx0sys.h index fae9d25d601..1469d8b1dc7 100644 --- a/storage/innobase/include/trx0sys.h +++ b/storage/innobase/include/trx0sys.h @@ -141,26 +141,6 @@ trx_sys_update_mysql_binlog_offset( system header. */ void trx_sys_print_mysql_binlog_offset(); -#ifdef WITH_WSREP - -/** Update WSREP XID info in the TRX_SYS page. -@param[in] xid Transaction XID -@param[in,out] sys_header TRX_SYS page -@param[in,out] mtr mini-transaction */ -UNIV_INTERN -void -trx_sys_update_wsrep_checkpoint( - const XID* xid, - buf_block_t* sys_header, - mtr_t* mtr); - -/** Read WSREP checkpoint XID from sys header. -@param[out] xid WSREP XID -@return whether the checkpoint was present */ -UNIV_INTERN -bool -trx_sys_read_wsrep_checkpoint(XID* xid); -#endif /* WITH_WSREP */ /** Create the rollback segments. @return whether the creation succeeded */ @@ -235,7 +215,8 @@ trx_sysf_rseg_get_page_no(const buf_block_t* sys_header, ulint rseg_id) + sys_header->frame); } -/** Maximum length of MySQL binlog file name, in bytes. */ +/** Maximum length of MySQL binlog file name, in bytes. +(Used before MariaDB 10.3.5.) */ #define TRX_SYS_MYSQL_LOG_NAME_LEN 512 /** Contents of TRX_SYS_MYSQL_LOG_MAGIC_N_FLD */ #define TRX_SYS_MYSQL_LOG_MAGIC_N 873422344 @@ -312,7 +293,7 @@ FIXED WSREP XID info offsets for 4k page size 10.0.32-galera */ #ifdef WITH_WSREP -/** The offset to WSREP XID headers */ +/** The offset to WSREP XID headers (used before MariaDB 10.3.5) */ #define TRX_SYS_WSREP_XID_INFO std::max(srv_page_size - 3500, 1596UL) #define TRX_SYS_WSREP_XID_MAGIC_N_FLD 0 #define TRX_SYS_WSREP_XID_MAGIC_N 0x77737265 @@ -856,7 +837,6 @@ public: by any mutex, because it is read-only during multi-threaded operation */ - /** Lock-free hash of in memory read-write transactions. Works faster when it is on it's own cache line (tested). @@ -865,6 +845,16 @@ public: MY_ALIGNED(CACHE_LINE_SIZE) rw_trx_hash_t rw_trx_hash; +#ifdef WITH_WSREP + /** Latest recovered XID during startup */ + XID recovered_wsrep_xid; +#endif + /** Latest recovered binlog offset */ + int64_t recovered_binlog_offset; + /** Latest recovred binlog file name */ + char recovered_binlog_filename[TRX_SYS_MYSQL_LOG_NAME_LEN]; + + /** Constructor. diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc index 1fb306cec88..bef3fef9cde 100644 --- a/storage/innobase/srv/srv0start.cc +++ b/storage/innobase/srv/srv0start.cc @@ -2230,6 +2230,8 @@ files_checked: if (err != DB_SUCCESS) { return(srv_init_abort(err)); } + /* fall through */ + case SRV_OPERATION_RESTORE: /* This must precede recv_apply_hashed_log_recs(true). */ trx_lists_init_at_db_start(); @@ -2237,12 +2239,6 @@ files_checked: case SRV_OPERATION_RESTORE_DELTA: case SRV_OPERATION_BACKUP: ut_ad(!"wrong mariabackup mode"); - /* fall through */ - case SRV_OPERATION_RESTORE: - /* mariabackup --prepare only deals with - the redo log and the data files, not with - transactions or the data dictionary. */ - break; } if (srv_force_recovery < SRV_FORCE_NO_LOG_REDO) { diff --git a/storage/innobase/trx/trx0purge.cc b/storage/innobase/trx/trx0purge.cc index 34f72929d03..feb283429a5 100644 --- a/storage/innobase/trx/trx0purge.cc +++ b/storage/innobase/trx/trx0purge.cc @@ -44,6 +44,7 @@ Created 3/26/1996 Heikki Tuuri #include "trx0roll.h" #include "trx0rseg.h" #include "trx0trx.h" +#include <mysql/service_wsrep.h> /** Maximum allowable purge history length. <=0 means 'infinite'. */ ulong srv_max_purge_lag = 0; @@ -239,6 +240,8 @@ Remove the undo log segment from the rseg slot if it is too big for reuse. void trx_purge_add_undo_to_history(const trx_t* trx, trx_undo_t*& undo, mtr_t* mtr) { + DBUG_PRINT("trx", ("commit(" TRX_ID_FMT "," TRX_ID_FMT ")", + trx->id, trx->no)); ut_ad(undo == trx->rsegs.m_redo.undo || undo == trx->rsegs.m_redo.old_insert); trx_rseg_t* rseg = trx->rsegs.m_redo.rseg; @@ -251,6 +254,12 @@ trx_purge_add_undo_to_history(const trx_t* trx, trx_undo_t*& undo, mtr_t* mtr) ut_ad(mach_read_from_2(undo_header + TRX_UNDO_NEEDS_PURGE) <= 1); + if (UNIV_UNLIKELY(mach_read_from_4(TRX_RSEG_FORMAT + rseg_header))) { + /* This database must have been upgraded from + before MariaDB 10.3.5. */ + trx_rseg_format_upgrade(rseg_header, mtr); + } + if (undo->state != TRX_UNDO_CACHED) { ulint hist_size; #ifdef UNIV_DEBUG @@ -258,11 +267,7 @@ trx_purge_add_undo_to_history(const trx_t* trx, trx_undo_t*& undo, mtr_t* mtr) #endif /* UNIV_DEBUG */ /* The undo log segment will not be reused */ - - if (UNIV_UNLIKELY(undo->id >= TRX_RSEG_N_SLOTS)) { - ib::fatal() << "undo->id is " << undo->id; - } - + ut_a(undo->id < TRX_RSEG_N_SLOTS); trx_rsegf_set_nth_undo(rseg_header, undo->id, FIL_NULL, mtr); MONITOR_DEC(MONITOR_NUM_UNDO_SLOT_USED); @@ -272,30 +277,17 @@ trx_purge_add_undo_to_history(const trx_t* trx, trx_undo_t*& undo, mtr_t* mtr) ut_ad(undo->size == flst_get_len( seg_header + TRX_UNDO_PAGE_LIST)); - byte* rseg_format = rseg_header + TRX_RSEG_FORMAT; - if (UNIV_UNLIKELY(mach_read_from_4(rseg_format))) { - /* This database must have been upgraded from - before MariaDB 10.3.5. */ - mlog_write_ulint(rseg_format, 0, MLOG_4BYTES, mtr); - /* Clear also possible garbage at the end of - the page. Old InnoDB versions did not initialize - unused parts of pages. */ - ut_ad(page_offset(rseg_header) == TRX_RSEG); - byte* b = rseg_header + TRX_RSEG_MAX_TRX_ID + 8; - ulint len = UNIV_PAGE_SIZE - - (FIL_PAGE_DATA_END - + TRX_RSEG + TRX_RSEG_MAX_TRX_ID + 8); - memset(b, 0, len); - mlog_log_string(b, len, mtr); - } mlog_write_ulint( rseg_header + TRX_RSEG_HISTORY_SIZE, hist_size + undo->size, MLOG_4BYTES, mtr); - mlog_write_ull(rseg_header + TRX_RSEG_MAX_TRX_ID, - trx_sys.get_max_trx_id(), mtr); } + /* This field now also serves as an identifier for the latest + binlog and WSREP XID information. */ + mlog_write_ull(rseg_header + TRX_RSEG_MAX_TRX_ID, + trx_sys.get_max_trx_id(), mtr); + /* Before any transaction-generating background threads or the purge have been started, recv_recovery_rollback_active() can start transactions in row_merge_drop_temp_indexes() and @@ -320,6 +312,19 @@ trx_purge_add_undo_to_history(const trx_t* trx, trx_undo_t*& undo, mtr_t* mtr) || trx->internal) && srv_fast_shutdown)); +#ifdef WITH_WSREP + if (wsrep_is_wsrep_xid(trx->xid)) { + trx_rseg_update_wsrep_checkpoint(rseg_header, trx->xid, mtr); + } +#endif + + if (trx->mysql_log_file_name && *trx->mysql_log_file_name) { + /* Update the latest MySQL binlog name and offset info + in rollback segment header if MySQL binlogging is on + or the database server is a MySQL replication save. */ + trx_rseg_update_binlog_offset(rseg_header, trx, mtr); + } + /* Add the log as the first in the history list */ flst_add_first(rseg_header + TRX_RSEG_HISTORY, undo_header + TRX_UNDO_HISTORY_NODE, mtr); diff --git a/storage/innobase/trx/trx0rseg.cc b/storage/innobase/trx/trx0rseg.cc index 4b09a4cc492..45d260a4480 100644 --- a/storage/innobase/trx/trx0rseg.cc +++ b/storage/innobase/trx/trx0rseg.cc @@ -34,6 +34,214 @@ Created 3/26/1996 Heikki Tuuri #include <algorithm> +#ifdef WITH_WSREP + +#ifdef UNIV_DEBUG +static long long trx_sys_cur_xid_seqno = -1; +static unsigned char trx_sys_cur_xid_uuid[16]; + +/** Read WSREP XID seqno */ +static inline long long read_wsrep_xid_seqno(const XID* xid) +{ + long long seqno; + memcpy(&seqno, xid->data + 24, sizeof(long long)); + return seqno; +} + +/** Read WSREP XID UUID */ +static inline void read_wsrep_xid_uuid(const XID* xid, unsigned char* buf) +{ + memcpy(buf, xid->data + 8, 16); +} + +#endif /* UNIV_DEBUG */ + +/** Update the WSREP XID information in rollback segment header. +@param[in,out] rseg_header rollback segment header +@param[in] xid WSREP XID +@param[in,out] mtr mini-transaction */ +void +trx_rseg_update_wsrep_checkpoint( + trx_rsegf_t* rseg_header, + const XID* xid, + mtr_t* mtr) +{ + ut_ad(xid->formatID == 1); + +#ifdef UNIV_DEBUG + /* Check that seqno is monotonically increasing */ + unsigned char xid_uuid[16]; + long long xid_seqno = read_wsrep_xid_seqno(xid); + read_wsrep_xid_uuid(xid, xid_uuid); + + if (!memcmp(xid_uuid, trx_sys_cur_xid_uuid, 8)) { + ut_ad(xid_seqno > trx_sys_cur_xid_seqno); + trx_sys_cur_xid_seqno = xid_seqno; + } else { + memcpy(trx_sys_cur_xid_uuid, xid_uuid, 16); + } + trx_sys_cur_xid_seqno = xid_seqno; +#endif /* UNIV_DEBUG */ + + mlog_write_ulint(TRX_RSEG_WSREP_XID_FORMAT + rseg_header, + uint32_t(xid->formatID), + MLOG_4BYTES, mtr); + + mlog_write_ulint(TRX_RSEG_WSREP_XID_GTRID_LEN + rseg_header, + uint32_t(xid->gtrid_length), + MLOG_4BYTES, mtr); + + mlog_write_ulint(TRX_RSEG_WSREP_XID_BQUAL_LEN + rseg_header, + uint32_t(xid->bqual_length), + MLOG_4BYTES, mtr); + + mlog_write_string(TRX_RSEG_WSREP_XID_DATA + rseg_header, + reinterpret_cast<const byte*>(xid->data), + XIDDATASIZE, mtr); +} + +/** Update WSREP checkpoint XID in first rollback segment header. +@param[in] xid WSREP XID */ +void trx_rseg_update_wsrep_checkpoint(const XID* xid) +{ + mtr_t mtr; + mtr.start(); + + const trx_rseg_t* rseg = trx_sys.rseg_array[0]; + + trx_rsegf_t* rseg_header = trx_rsegf_get(rseg->space, rseg->page_no, + &mtr); + if (UNIV_UNLIKELY(mach_read_from_4(rseg_header + TRX_RSEG_FORMAT))) { + trx_rseg_format_upgrade(rseg_header, &mtr); + } + + mlog_write_ull(rseg_header + TRX_RSEG_MAX_TRX_ID, + trx_sys.get_max_trx_id(), &mtr); + trx_rseg_update_wsrep_checkpoint(rseg_header, xid, &mtr); + mtr.commit(); +} + +/** Read the WSREP XID information in rollback segment header. +@param[in] rseg_header Rollback segment header +@param[out] xid Transaction XID +@return whether the WSREP XID was present */ +bool trx_rseg_read_wsrep_checkpoint(const trx_rsegf_t* rseg_header, XID& xid) +{ + xid.formatID = (int)mach_read_from_4( + TRX_RSEG_WSREP_XID_FORMAT + rseg_header); + + if (xid.formatID == 0) { + memset(&xid, 0, sizeof(xid)); + long long seqno= -1; + memcpy(xid.data + 24, &seqno, sizeof(long long)); + xid.formatID = -1; + return false; + } + + xid.gtrid_length = (int)mach_read_from_4( + TRX_RSEG_WSREP_XID_GTRID_LEN + rseg_header); + + xid.bqual_length = (int)mach_read_from_4( + TRX_RSEG_WSREP_XID_BQUAL_LEN + rseg_header); + + memcpy(xid.data, TRX_RSEG_WSREP_XID_DATA + rseg_header, XIDDATASIZE); + + return true; +} + +/** Read the WSREP XID from the TRX_SYS page (in case of upgrade). +@param[in] page TRX_SYS page +@param[out] xid WSREP XID (if present) +@return whether the WSREP XID is present */ +static bool trx_rseg_init_wsrep_xid(const page_t* page, XID& xid) +{ + if (mach_read_from_4(TRX_SYS + TRX_SYS_WSREP_XID_INFO + + TRX_SYS_WSREP_XID_MAGIC_N_FLD + + page) + != TRX_SYS_WSREP_XID_MAGIC_N) { + return false; + } + + xid.formatID = (int)mach_read_from_4( + TRX_SYS + TRX_SYS_WSREP_XID_INFO + + TRX_SYS_WSREP_XID_FORMAT + page); + xid.gtrid_length = (int)mach_read_from_4( + TRX_SYS + TRX_SYS_WSREP_XID_INFO + + TRX_SYS_WSREP_XID_GTRID_LEN + page); + xid.bqual_length = (int)mach_read_from_4( + TRX_SYS + TRX_SYS_WSREP_XID_INFO + + TRX_SYS_WSREP_XID_BQUAL_LEN + page); + memcpy(xid.data, + TRX_SYS + TRX_SYS_WSREP_XID_INFO + + TRX_SYS_WSREP_XID_DATA + page, XIDDATASIZE); + return true; +} + +/** Recover the latest WSREP checkpoint XID. +@param[out] xid WSREP XID +@return whether the WSREP XID was found */ +bool trx_rseg_read_wsrep_checkpoint(XID& xid) +{ + mtr_t mtr; + trx_id_t max_id = 0; + bool found = false; + + for (ulint rseg_id = 0; rseg_id < TRX_SYS_N_RSEGS; + rseg_id++, mtr.commit()) { + mtr.start(); + const buf_block_t* sys = trx_sysf_get(&mtr, false); + if (rseg_id == 0) { + found = trx_rseg_init_wsrep_xid(sys->frame, xid); + } + + const uint32_t page_no = trx_sysf_rseg_get_page_no( + sys, rseg_id); + + if (page_no == FIL_NULL) { + continue; + } + + const trx_rsegf_t* rseg_header = trx_rsegf_get_new( + trx_sysf_rseg_get_space(sys, rseg_id), page_no, &mtr); + + if (mach_read_from_4(rseg_header + TRX_RSEG_FORMAT)) { + continue; + } + + trx_id_t id = mach_read_from_8(rseg_header + + TRX_RSEG_MAX_TRX_ID); + + if (id < max_id) { + continue; + } + + max_id = id; + found = trx_rseg_read_wsrep_checkpoint(rseg_header, xid) + || found; + } + + return found; +} +#endif /* WITH_WSREP */ + +/** Upgrade a rollback segment header page to MariaDB 10.3 format. +@param[in,out] rseg_header rollback segment header page +@param[in,out] mtr mini-transaction */ +void trx_rseg_format_upgrade(trx_rsegf_t* rseg_header, mtr_t* mtr) +{ + ut_ad(page_offset(rseg_header) == TRX_RSEG); + byte* rseg_format = TRX_RSEG_FORMAT + rseg_header; + mlog_write_ulint(rseg_format, 0, MLOG_4BYTES, mtr); + /* Clear also possible garbage at the end of the page. Old + InnoDB versions did not initialize unused parts of pages. */ + byte* b = rseg_header + TRX_RSEG_MAX_TRX_ID + 8; + ulint len = UNIV_PAGE_SIZE + - (FIL_PAGE_DATA_END + + TRX_RSEG + TRX_RSEG_MAX_TRX_ID + 8); + memset(b, 0, len); + mlog_log_string(b, len, mtr); +} + /** Creates a rollback segment header. This function is called only when a new rollback segment is created in the database. @@ -192,19 +400,51 @@ trx_undo_lists_init(trx_rseg_t* rseg, trx_id_t& max_trx_id, /** Restore the state of a persistent rollback segment. @param[in,out] rseg persistent rollback segment @param[in,out] max_trx_id maximum observed transaction identifier +@param[in,out] max_rseg_trx_id maximum observed TRX_RSEG_MAX_TRX_ID @param[in,out] mtr mini-transaction */ static void -trx_rseg_mem_restore(trx_rseg_t* rseg, trx_id_t& max_trx_id, mtr_t* mtr) +trx_rseg_mem_restore( + trx_rseg_t* rseg, + trx_id_t& max_trx_id, + trx_id_t& max_rseg_trx_id, + mtr_t* mtr) { - const trx_rsegf_t* rseg_header = trx_rsegf_get_new( + trx_rsegf_t* rseg_header = trx_rsegf_get_new( rseg->space, rseg->page_no, mtr); + if (mach_read_from_4(rseg_header + TRX_RSEG_FORMAT) == 0) { trx_id_t id = mach_read_from_8(rseg_header + TRX_RSEG_MAX_TRX_ID); + if (id > max_trx_id) { max_trx_id = id; } + + if (id > max_rseg_trx_id) { + max_rseg_trx_id = id; + + if (rseg_header[TRX_RSEG_BINLOG_NAME]) { + memcpy(trx_sys.recovered_binlog_filename, + rseg_header + TRX_RSEG_BINLOG_NAME, + TRX_RSEG_BINLOG_NAME_LEN); + trx_sys.recovered_binlog_offset = mach_read_from_8( + rseg_header + + TRX_RSEG_BINLOG_OFFSET); + } + +#ifdef WITH_WSREP + trx_rseg_read_wsrep_checkpoint( + rseg_header, trx_sys.recovered_wsrep_xid); +#endif + } + } + + if (srv_operation == SRV_OPERATION_RESTORE) { + /* mariabackup --prepare only deals with + the redo log and the data files, not with + transactions or the data dictionary. */ + return; } /* Initialize the undo log lists according to the rseg header */ @@ -252,21 +492,55 @@ trx_rseg_mem_restore(trx_rseg_t* rseg, trx_id_t& max_trx_id, mtr_t* mtr) } } +/** Read binlog metadata from the TRX_SYS page, in case we are upgrading +from MySQL or a MariaDB version older than 10.3.5. */ +static void trx_rseg_init_binlog_info(const page_t* page) +{ + if (mach_read_from_4(TRX_SYS + TRX_SYS_MYSQL_LOG_INFO + + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD + + page) + == TRX_SYS_MYSQL_LOG_MAGIC_N) { + memcpy(trx_sys.recovered_binlog_filename, + TRX_SYS_MYSQL_LOG_INFO + TRX_SYS_MYSQL_LOG_NAME + + TRX_SYS + page, TRX_SYS_MYSQL_LOG_NAME_LEN); + trx_sys.recovered_binlog_offset = mach_read_from_8( + TRX_SYS_MYSQL_LOG_INFO + TRX_SYS_MYSQL_LOG_OFFSET + + TRX_SYS + page); + } + +#ifdef WITH_WSREP + trx_rseg_init_wsrep_xid(page, trx_sys.recovered_wsrep_xid); +#endif +} + /** Initialize the rollback segments in memory at database startup. */ void trx_rseg_array_init() { - trx_id_t max_trx_id = 0; + trx_id_t max_trx_id = 0, max_rseg_trx_id = 0; + + *trx_sys.recovered_binlog_filename = '\0'; + trx_sys.recovered_binlog_offset = -1; +#ifdef WITH_WSREP + memset(&trx_sys.recovered_wsrep_xid, 0, + sizeof trx_sys.recovered_wsrep_xid); + trx_sys.recovered_wsrep_xid.formatID = -1; +#endif for (ulint rseg_id = 0; rseg_id < TRX_SYS_N_RSEGS; rseg_id++) { mtr_t mtr; mtr.start(); if (const buf_block_t* sys = trx_sysf_get(&mtr, false)) { if (rseg_id == 0) { + /* In case this is an upgrade from + before MariaDB 10.3.5, fetch the base + information from the TRX_SYS page. */ max_trx_id = mach_read_from_8( TRX_SYS + TRX_SYS_TRX_ID_STORE + sys->frame); + trx_rseg_init_binlog_info(sys->frame); } + const uint32_t page_no = trx_sysf_rseg_get_page_no( sys, rseg_id); if (page_no != FIL_NULL) { @@ -278,7 +552,9 @@ trx_rseg_array_init() ut_ad(rseg->id == rseg_id); ut_ad(!trx_sys.rseg_array[rseg_id]); trx_sys.rseg_array[rseg_id] = rseg; - trx_rseg_mem_restore(rseg, max_trx_id, &mtr); + trx_rseg_mem_restore( + rseg, max_trx_id, max_rseg_trx_id, + &mtr); } } @@ -404,3 +680,34 @@ trx_rseg_get_n_undo_tablespaces( return ulint(end - space_ids); } + +/** Update the offset information about the end of the binlog entry +which corresponds to the transaction just being committed. +In a replication slave, this updates the master binlog position +up to which replication has proceeded. +@param[in,out] rseg_header rollback segment header +@param[in] trx committing transaction +@param[in,out] mtr mini-transaction */ +void +trx_rseg_update_binlog_offset(byte* rseg_header, const trx_t* trx, mtr_t* mtr) +{ + DBUG_LOG("trx", "trx_mysql_binlog_offset: " << trx->mysql_log_offset); + + const size_t len = strlen(trx->mysql_log_file_name) + 1; + + ut_ad(len > 1); + + if (UNIV_UNLIKELY(len > TRX_RSEG_BINLOG_NAME_LEN)) { + return; + } + + mlog_write_ull(rseg_header + TRX_RSEG_BINLOG_OFFSET, + trx->mysql_log_offset, mtr); + byte* p = rseg_header + TRX_RSEG_BINLOG_NAME; + const byte* binlog_name = reinterpret_cast<const byte*> + (trx->mysql_log_file_name); + + if (memcmp(binlog_name, p, len)) { + mlog_write_string(p, binlog_name, len, mtr); + } +} diff --git a/storage/innobase/trx/trx0sys.cc b/storage/innobase/trx/trx0sys.cc index 1b60e2bd121..13ebe27f539 100644 --- a/storage/innobase/trx/trx0sys.cc +++ b/storage/innobase/trx/trx0sys.cc @@ -88,207 +88,21 @@ ReadView::check_trx_id_sanity( uint trx_rseg_n_slots_debug = 0; #endif - -/*****************************************************************//** -Updates the offset information about the end of the MySQL binlog entry -which corresponds to the transaction just being committed. In a MySQL -replication slave updates the latest master binlog position up to which -replication has proceeded. */ -void -trx_sys_update_mysql_binlog_offset( -/*===============================*/ - const char* file_name,/*!< in: MySQL log file name */ - int64_t offset, /*!< in: position in that log file */ - buf_block_t* sys_header, /*!< in,out: trx sys header */ - mtr_t* mtr) /*!< in,out: mini-transaction */ -{ - DBUG_PRINT("InnoDB",("trx_mysql_binlog_offset: %lld", (longlong) offset)); - - const size_t len = strlen(file_name) + 1; - - if (len > TRX_SYS_MYSQL_LOG_NAME_LEN) { - - /* We cannot fit the name to the 512 bytes we have reserved */ - - return; - } - - byte* p = TRX_SYS + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD - + TRX_SYS_MYSQL_LOG_INFO + sys_header->frame; - - if (mach_read_from_4(p) != TRX_SYS_MYSQL_LOG_MAGIC_N) { - mlog_write_ulint(p, - TRX_SYS_MYSQL_LOG_MAGIC_N, - MLOG_4BYTES, mtr); - } - - p = TRX_SYS + TRX_SYS_MYSQL_LOG_NAME + TRX_SYS_MYSQL_LOG_INFO - + sys_header->frame; - - if (memcmp(file_name, p, len)) { - mlog_write_string(p, - reinterpret_cast<const byte*>(file_name), - len, mtr); - } - - mlog_write_ull(TRX_SYS_MYSQL_LOG_INFO + TRX_SYS_MYSQL_LOG_OFFSET - + TRX_SYS + sys_header->frame, offset, mtr); -} - /** Display the MySQL binlog offset info if it is present in the trx system header. */ void trx_sys_print_mysql_binlog_offset() { - mtr_t mtr; - - mtr.start(); - - const buf_block_t* block = trx_sysf_get(&mtr, false); - - if (block - && mach_read_from_4(TRX_SYS + TRX_SYS_MYSQL_LOG_INFO - + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD - + block->frame) - == TRX_SYS_MYSQL_LOG_MAGIC_N) { - ib::info() << "Last binlog file '" - << TRX_SYS_MYSQL_LOG_INFO + TRX_SYS_MYSQL_LOG_NAME - + TRX_SYS + block->frame - << "', position " - << mach_read_from_8(TRX_SYS_MYSQL_LOG_INFO - + TRX_SYS_MYSQL_LOG_OFFSET - + TRX_SYS + block->frame); - } - - mtr.commit(); -} - -#ifdef WITH_WSREP - -#ifdef UNIV_DEBUG -static long long trx_sys_cur_xid_seqno = -1; -static unsigned char trx_sys_cur_xid_uuid[16]; - -/** Read WSREP XID seqno */ -static inline long long read_wsrep_xid_seqno(const XID* xid) -{ - long long seqno; - memcpy(&seqno, xid->data + 24, sizeof(long long)); - return seqno; -} - -/** Read WSREP XID UUID */ -static inline void read_wsrep_xid_uuid(const XID* xid, unsigned char* buf) -{ - memcpy(buf, xid->data + 8, 16); -} - -#endif /* UNIV_DEBUG */ - -/** Update WSREP XID info in the TRX_SYS page. -@param[in] xid Transaction XID -@param[in,out] sys_header TRX_SYS page -@param[in,out] mtr mini-transaction */ -UNIV_INTERN -void -trx_sys_update_wsrep_checkpoint( - const XID* xid, - buf_block_t* sys_header, - mtr_t* mtr) -{ - ut_ad(xid->formatID == 1); - ut_ad(wsrep_is_wsrep_xid(xid)); - - byte* magic = TRX_SYS + TRX_SYS_WSREP_XID_INFO - + TRX_SYS_WSREP_XID_MAGIC_N_FLD - + sys_header->frame; - - if (mach_read_from_4(magic) != TRX_SYS_WSREP_XID_MAGIC_N) { - mlog_write_ulint(magic, TRX_SYS_WSREP_XID_MAGIC_N, - MLOG_4BYTES, mtr); -#ifdef UNIV_DEBUG - } else { - /* Check that seqno is monotonically increasing */ - unsigned char xid_uuid[16]; - long long xid_seqno = read_wsrep_xid_seqno(xid); - read_wsrep_xid_uuid(xid, xid_uuid); - - if (!memcmp(xid_uuid, trx_sys_cur_xid_uuid, 8)) { - ut_ad(xid_seqno > trx_sys_cur_xid_seqno); - trx_sys_cur_xid_seqno = xid_seqno; - } else { - memcpy(trx_sys_cur_xid_uuid, xid_uuid, 16); - } - - trx_sys_cur_xid_seqno = xid_seqno; -#endif /* UNIV_DEBUG */ - } - - mlog_write_ulint(TRX_SYS + TRX_SYS_WSREP_XID_INFO - + TRX_SYS_WSREP_XID_FORMAT + sys_header->frame, - uint32_t(xid->formatID), - MLOG_4BYTES, mtr); - mlog_write_ulint(TRX_SYS + TRX_SYS_WSREP_XID_INFO - + TRX_SYS_WSREP_XID_GTRID_LEN + sys_header->frame, - uint32_t(xid->gtrid_length), - MLOG_4BYTES, mtr); - mlog_write_ulint(TRX_SYS + TRX_SYS_WSREP_XID_INFO - + TRX_SYS_WSREP_XID_BQUAL_LEN + sys_header->frame, - uint32_t(xid->bqual_length), - MLOG_4BYTES, mtr); - mlog_write_string(TRX_SYS + TRX_SYS_WSREP_XID_INFO - + TRX_SYS_WSREP_XID_DATA + sys_header->frame, - reinterpret_cast<const byte*>(xid->data), - XIDDATASIZE, mtr); -} - -/** Read WSREP checkpoint XID from sys header. -@param[out] xid WSREP XID -@return whether the checkpoint was present */ -UNIV_INTERN -bool -trx_sys_read_wsrep_checkpoint(XID* xid) -{ - mtr_t mtr; - - ut_ad(xid); - - mtr.start(); - - const buf_block_t* block = trx_sysf_get(&mtr, false); - - if (!block || - mach_read_from_4(TRX_SYS + TRX_SYS_WSREP_XID_INFO - + TRX_SYS_WSREP_XID_MAGIC_N_FLD + block->frame) - != TRX_SYS_WSREP_XID_MAGIC_N) { - memset(xid, 0, sizeof(*xid)); - long long seqno= -1; - memcpy(xid->data + 24, &seqno, sizeof(long long)); - xid->formatID = -1; - mtr.commit(); - return false; + if (!*trx_sys.recovered_binlog_filename) { + return; } - xid->formatID = (int)mach_read_from_4( - TRX_SYS + TRX_SYS_WSREP_XID_INFO + TRX_SYS_WSREP_XID_FORMAT - + block->frame); - xid->gtrid_length = (int)mach_read_from_4( - TRX_SYS + TRX_SYS_WSREP_XID_INFO + TRX_SYS_WSREP_XID_GTRID_LEN - + block->frame); - xid->bqual_length = (int)mach_read_from_4( - TRX_SYS + TRX_SYS_WSREP_XID_INFO + TRX_SYS_WSREP_XID_BQUAL_LEN - + block->frame); - memcpy(xid->data, - TRX_SYS + TRX_SYS_WSREP_XID_INFO + TRX_SYS_WSREP_XID_DATA - + block->frame, - XIDDATASIZE); - - mtr.commit(); - return true; + ib::info() << "Last binlog file '" + << trx_sys.recovered_binlog_filename + << "', position " + << trx_sys.recovered_binlog_offset; } -#endif /* WITH_WSREP */ - /** Find an available rollback segment. @param[in] sys_header @return an unallocated rollback segment slot in the TRX_SYS header diff --git a/storage/innobase/trx/trx0trx.cc b/storage/innobase/trx/trx0trx.cc index 7b104653cf9..0263b42812a 100644 --- a/storage/innobase/trx/trx0trx.cc +++ b/storage/innobase/trx/trx0trx.cc @@ -885,6 +885,14 @@ trx_lists_init_at_db_start() ut_ad(!srv_was_started); ut_ad(!purge_sys); + if (srv_operation == SRV_OPERATION_RESTORE) { + /* mariabackup --prepare only deals with + the redo log and the data files, not with + transactions or the data dictionary. */ + trx_rseg_array_init(); + return; + } + purge_sys = UT_NEW_NOKEY(purge_sys_t()); if (srv_force_recovery >= SRV_FORCE_NO_UNDO_LOG_SCAN) { @@ -1303,36 +1311,7 @@ trx_write_serialisation_history( MONITOR_INC(MONITOR_TRX_COMMIT_UNDO); -#ifdef WITH_WSREP - const bool update_wsrep = wsrep_is_wsrep_xid(trx->xid); -#endif - const bool update_binlog_pos = trx->mysql_log_file_name - && *trx->mysql_log_file_name; - if (!update_binlog_pos -#ifdef WITH_WSREP - && !update_wsrep -#endif - ) return; - - buf_block_t* block = trx_sysf_get(mtr); -#ifdef WITH_WSREP - if (update_wsrep) - trx_sys_update_wsrep_checkpoint(trx->xid, block, mtr); -#endif /* WITH_WSREP */ - - /* Update the latest MySQL binlog name and offset info - in trx sys header if MySQL binlogging is on or the database - server is a MySQL replication slave */ - - if (update_binlog_pos) { - - trx_sys_update_mysql_binlog_offset( - trx->mysql_log_file_name, - trx->mysql_log_offset, - block, mtr); - - trx->mysql_log_file_name = NULL; - } + trx->mysql_log_file_name = NULL; } /******************************************************************** |