summaryrefslogtreecommitdiff
path: root/storage
diff options
context:
space:
mode:
authorMarko Mäkelä <marko.makela@mariadb.com>2018-02-20 15:10:03 +0200
committerMarko Mäkelä <marko.makela@mariadb.com>2018-02-20 21:36:36 +0200
commit947efe17ed8188ca4feef6deb0c2831a246b5c8f (patch)
tree1df8380c4c59cfb9f119f89d3022b66edb5bd610 /storage
parentf6544353e0c84d14a333525ba26ebd30bf125615 (diff)
downloadmariadb-git-947efe17ed8188ca4feef6deb0c2831a246b5c8f.tar.gz
MDEV-15158 On commit, do not write to the TRX_SYS page
This is based on a prototype by Thirunarayanan Balathandayuthapani <thiru@mariadb.com>. Binlog and Galera write-set replication information was written into TRX_SYS page on each commit. Instead of writing to the TRX_SYS during normal operation, InnoDB can make use of rollback segment header pages, which are already being written to during a commit. The following list of fields in rollback segment header page are added: TRX_RSEG_BINLOG_OFFSET TRX_RSEG_BINLOG_NAME (NUL-terminated; empty name = not present) TRX_RSEG_WSREP_XID_FORMAT (0=not present; 1=present) TRX_RSEG_WSREP_XID_GTRID TRX_RSEG_WSREP_XID_BQUAL TRX_RSEG_WSREP_XID_DATA trx_sys_t: Introduce the fields recovered_binlog_filename, recovered_binlog_offset, recovered_wsrep_xid. To facilitate upgrade from older mysql or mariaDB versions, we will read the information in TRX_SYS page. It will be overridden by the information that we find in rollback segment header pages. Mariabackup --prepare will read the metadata from the rollback segment header pages via trx_rseg_array_init(). It will still not read any undo log pages or recover any transactions.
Diffstat (limited to 'storage')
-rw-r--r--storage/innobase/handler/ha_innodb.cc13
-rw-r--r--storage/innobase/include/trx0rseg.h68
-rw-r--r--storage/innobase/include/trx0sys.h36
-rw-r--r--storage/innobase/srv/srv0start.cc8
-rw-r--r--storage/innobase/trx/trx0purge.cc51
-rw-r--r--storage/innobase/trx/trx0rseg.cc315
-rw-r--r--storage/innobase/trx/trx0sys.cc198
-rw-r--r--storage/innobase/trx/trx0trx.cc39
8 files changed, 439 insertions, 289 deletions
diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
index 9ab36a97fba..699e2899f57 100644
--- a/storage/innobase/handler/ha_innodb.cc
+++ b/storage/innobase/handler/ha_innodb.cc
@@ -111,10 +111,9 @@ this program; if not, write to the Free Software Foundation, Inc.,
#include "trx0purge.h"
#endif /* UNIV_DEBUG */
#include "trx0roll.h"
-#include "trx0sys.h"
+#include "trx0rseg.h"
#include "trx0trx.h"
#include "fil0pagecompress.h"
-#include "trx0xa.h"
#include "ut0mem.h"
#include "row0ext.h"
@@ -19679,12 +19678,8 @@ innobase_wsrep_set_checkpoint(
DBUG_ASSERT(hton == innodb_hton_ptr);
if (wsrep_is_wsrep_xid(xid)) {
- mtr_t mtr;
- mtr_start(&mtr);
- if (buf_block_t* sys_header = trx_sysf_get(&mtr)) {
- trx_sys_update_wsrep_checkpoint(xid, sys_header, &mtr);
- }
- mtr_commit(&mtr);
+
+ trx_rseg_update_wsrep_checkpoint(xid);
innobase_flush_logs(hton, false);
return 0;
} else {
@@ -19700,7 +19695,7 @@ innobase_wsrep_get_checkpoint(
XID* xid)
{
DBUG_ASSERT(hton == innodb_hton_ptr);
- trx_sys_read_wsrep_checkpoint(xid);
+ trx_rseg_read_wsrep_checkpoint(*xid);
return 0;
}
diff --git a/storage/innobase/include/trx0rseg.h b/storage/innobase/include/trx0rseg.h
index 5642dd0d4a8..8aa91c13add 100644
--- a/storage/innobase/include/trx0rseg.h
+++ b/storage/innobase/include/trx0rseg.h
@@ -27,10 +27,8 @@ Created 3/26/1996 Heikki Tuuri
#ifndef trx0rseg_h
#define trx0rseg_h
-#include "trx0types.h"
#include "trx0sys.h"
#include "fut0lst.h"
-#include <vector>
/** Gets a rollback segment header.
@param[in] space space where placed
@@ -226,6 +224,30 @@ struct trx_rseg_t {
/** Maximum transaction ID (valid only if TRX_RSEG_FORMAT is 0) */
#define TRX_RSEG_MAX_TRX_ID (TRX_RSEG_UNDO_SLOTS + TRX_RSEG_N_SLOTS \
* TRX_RSEG_SLOT_SIZE)
+
+/** 8 bytes offset within the binlog file */
+#define TRX_RSEG_BINLOG_OFFSET TRX_RSEG_MAX_TRX_ID + 8
+/** MySQL log file name, 512 bytes, including terminating NUL
+(valid only if TRX_RSEG_FORMAT is 0).
+If no binlog information is present, the first byte is NUL. */
+#define TRX_RSEG_BINLOG_NAME TRX_RSEG_MAX_TRX_ID + 16
+/** Maximum length of binlog file name, including terminating NUL, in bytes */
+#define TRX_RSEG_BINLOG_NAME_LEN 512
+
+#ifdef WITH_WSREP
+/** The offset to WSREP XID headers */
+#define TRX_RSEG_WSREP_XID_INFO TRX_RSEG_MAX_TRX_ID + 16 + 512
+
+/** WSREP XID format (1 if present and valid, 0 if not present) */
+#define TRX_RSEG_WSREP_XID_FORMAT TRX_RSEG_WSREP_XID_INFO
+/** WSREP XID GTRID length */
+#define TRX_RSEG_WSREP_XID_GTRID_LEN TRX_RSEG_WSREP_XID_INFO + 4
+/** WSREP XID bqual length */
+#define TRX_RSEG_WSREP_XID_BQUAL_LEN TRX_RSEG_WSREP_XID_INFO + 8
+/** WSREP XID data (XIDDATASIZE bytes) */
+#define TRX_RSEG_WSREP_XID_DATA TRX_RSEG_WSREP_XID_INFO + 12
+#endif /* WITH_WSREP*/
+
/*-------------------------------------------------------------*/
/** Read the page number of an undo log slot.
@@ -240,6 +262,48 @@ trx_rsegf_get_nth_undo(const trx_rsegf_t* rsegf, ulint n)
+ n * TRX_RSEG_SLOT_SIZE);
}
+#ifdef WITH_WSREP
+/** Update the WSREP XID information in rollback segment header.
+@param[in,out] rseg_header rollback segment header
+@param[in] xid WSREP XID
+@param[in,out] mtr mini-transaction */
+void
+trx_rseg_update_wsrep_checkpoint(
+ trx_rsegf_t* rseg_header,
+ const XID* xid,
+ mtr_t* mtr);
+
+/** Update WSREP checkpoint XID in first rollback segment header.
+@param[in] xid WSREP XID */
+void trx_rseg_update_wsrep_checkpoint(const XID* xid);
+
+/** Read the WSREP XID information in rollback segment header.
+@param[in] rseg_header Rollback segment header
+@param[out] xid Transaction XID
+@return whether the WSREP XID was present */
+bool trx_rseg_read_wsrep_checkpoint(const trx_rsegf_t* rseg_header, XID& xid);
+
+/** Recover the latest WSREP checkpoint XID.
+@param[out] xid WSREP XID
+@return whether the WSREP XID was found */
+bool trx_rseg_read_wsrep_checkpoint(XID& xid);
+#endif /* WITH_WSREP */
+
+/** Upgrade a rollback segment header page to MariaDB 10.3 format.
+@param[in,out] rseg_header rollback segment header page
+@param[in,out] mtr mini-transaction */
+void trx_rseg_format_upgrade(trx_rsegf_t* rseg_header, mtr_t* mtr);
+
+/** Update the offset information about the end of the binlog entry
+which corresponds to the transaction just being committed.
+In a replication slave, this updates the master binlog position
+up to which replication has proceeded.
+@param[in,out] rseg_header rollback segment header
+@param[in] trx committing transaction
+@param[in,out] mtr mini-transaction */
+void
+trx_rseg_update_binlog_offset(byte* rseg_header, const trx_t* trx, mtr_t* mtr);
+
#include "trx0rseg.ic"
#endif
diff --git a/storage/innobase/include/trx0sys.h b/storage/innobase/include/trx0sys.h
index fae9d25d601..1469d8b1dc7 100644
--- a/storage/innobase/include/trx0sys.h
+++ b/storage/innobase/include/trx0sys.h
@@ -141,26 +141,6 @@ trx_sys_update_mysql_binlog_offset(
system header. */
void
trx_sys_print_mysql_binlog_offset();
-#ifdef WITH_WSREP
-
-/** Update WSREP XID info in the TRX_SYS page.
-@param[in] xid Transaction XID
-@param[in,out] sys_header TRX_SYS page
-@param[in,out] mtr mini-transaction */
-UNIV_INTERN
-void
-trx_sys_update_wsrep_checkpoint(
- const XID* xid,
- buf_block_t* sys_header,
- mtr_t* mtr);
-
-/** Read WSREP checkpoint XID from sys header.
-@param[out] xid WSREP XID
-@return whether the checkpoint was present */
-UNIV_INTERN
-bool
-trx_sys_read_wsrep_checkpoint(XID* xid);
-#endif /* WITH_WSREP */
/** Create the rollback segments.
@return whether the creation succeeded */
@@ -235,7 +215,8 @@ trx_sysf_rseg_get_page_no(const buf_block_t* sys_header, ulint rseg_id)
+ sys_header->frame);
}
-/** Maximum length of MySQL binlog file name, in bytes. */
+/** Maximum length of MySQL binlog file name, in bytes.
+(Used before MariaDB 10.3.5.) */
#define TRX_SYS_MYSQL_LOG_NAME_LEN 512
/** Contents of TRX_SYS_MYSQL_LOG_MAGIC_N_FLD */
#define TRX_SYS_MYSQL_LOG_MAGIC_N 873422344
@@ -312,7 +293,7 @@ FIXED WSREP XID info offsets for 4k page size 10.0.32-galera
*/
#ifdef WITH_WSREP
-/** The offset to WSREP XID headers */
+/** The offset to WSREP XID headers (used before MariaDB 10.3.5) */
#define TRX_SYS_WSREP_XID_INFO std::max(srv_page_size - 3500, 1596UL)
#define TRX_SYS_WSREP_XID_MAGIC_N_FLD 0
#define TRX_SYS_WSREP_XID_MAGIC_N 0x77737265
@@ -856,7 +837,6 @@ public:
by any mutex, because it is read-only
during multi-threaded operation */
-
/**
Lock-free hash of in memory read-write transactions.
Works faster when it is on it's own cache line (tested).
@@ -865,6 +845,16 @@ public:
MY_ALIGNED(CACHE_LINE_SIZE) rw_trx_hash_t rw_trx_hash;
+#ifdef WITH_WSREP
+ /** Latest recovered XID during startup */
+ XID recovered_wsrep_xid;
+#endif
+ /** Latest recovered binlog offset */
+ int64_t recovered_binlog_offset;
+ /** Latest recovred binlog file name */
+ char recovered_binlog_filename[TRX_SYS_MYSQL_LOG_NAME_LEN];
+
+
/**
Constructor.
diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc
index 1fb306cec88..bef3fef9cde 100644
--- a/storage/innobase/srv/srv0start.cc
+++ b/storage/innobase/srv/srv0start.cc
@@ -2230,6 +2230,8 @@ files_checked:
if (err != DB_SUCCESS) {
return(srv_init_abort(err));
}
+ /* fall through */
+ case SRV_OPERATION_RESTORE:
/* This must precede
recv_apply_hashed_log_recs(true). */
trx_lists_init_at_db_start();
@@ -2237,12 +2239,6 @@ files_checked:
case SRV_OPERATION_RESTORE_DELTA:
case SRV_OPERATION_BACKUP:
ut_ad(!"wrong mariabackup mode");
- /* fall through */
- case SRV_OPERATION_RESTORE:
- /* mariabackup --prepare only deals with
- the redo log and the data files, not with
- transactions or the data dictionary. */
- break;
}
if (srv_force_recovery < SRV_FORCE_NO_LOG_REDO) {
diff --git a/storage/innobase/trx/trx0purge.cc b/storage/innobase/trx/trx0purge.cc
index 34f72929d03..feb283429a5 100644
--- a/storage/innobase/trx/trx0purge.cc
+++ b/storage/innobase/trx/trx0purge.cc
@@ -44,6 +44,7 @@ Created 3/26/1996 Heikki Tuuri
#include "trx0roll.h"
#include "trx0rseg.h"
#include "trx0trx.h"
+#include <mysql/service_wsrep.h>
/** Maximum allowable purge history length. <=0 means 'infinite'. */
ulong srv_max_purge_lag = 0;
@@ -239,6 +240,8 @@ Remove the undo log segment from the rseg slot if it is too big for reuse.
void
trx_purge_add_undo_to_history(const trx_t* trx, trx_undo_t*& undo, mtr_t* mtr)
{
+ DBUG_PRINT("trx", ("commit(" TRX_ID_FMT "," TRX_ID_FMT ")",
+ trx->id, trx->no));
ut_ad(undo == trx->rsegs.m_redo.undo
|| undo == trx->rsegs.m_redo.old_insert);
trx_rseg_t* rseg = trx->rsegs.m_redo.rseg;
@@ -251,6 +254,12 @@ trx_purge_add_undo_to_history(const trx_t* trx, trx_undo_t*& undo, mtr_t* mtr)
ut_ad(mach_read_from_2(undo_header + TRX_UNDO_NEEDS_PURGE) <= 1);
+ if (UNIV_UNLIKELY(mach_read_from_4(TRX_RSEG_FORMAT + rseg_header))) {
+ /* This database must have been upgraded from
+ before MariaDB 10.3.5. */
+ trx_rseg_format_upgrade(rseg_header, mtr);
+ }
+
if (undo->state != TRX_UNDO_CACHED) {
ulint hist_size;
#ifdef UNIV_DEBUG
@@ -258,11 +267,7 @@ trx_purge_add_undo_to_history(const trx_t* trx, trx_undo_t*& undo, mtr_t* mtr)
#endif /* UNIV_DEBUG */
/* The undo log segment will not be reused */
-
- if (UNIV_UNLIKELY(undo->id >= TRX_RSEG_N_SLOTS)) {
- ib::fatal() << "undo->id is " << undo->id;
- }
-
+ ut_a(undo->id < TRX_RSEG_N_SLOTS);
trx_rsegf_set_nth_undo(rseg_header, undo->id, FIL_NULL, mtr);
MONITOR_DEC(MONITOR_NUM_UNDO_SLOT_USED);
@@ -272,30 +277,17 @@ trx_purge_add_undo_to_history(const trx_t* trx, trx_undo_t*& undo, mtr_t* mtr)
ut_ad(undo->size == flst_get_len(
seg_header + TRX_UNDO_PAGE_LIST));
- byte* rseg_format = rseg_header + TRX_RSEG_FORMAT;
- if (UNIV_UNLIKELY(mach_read_from_4(rseg_format))) {
- /* This database must have been upgraded from
- before MariaDB 10.3.5. */
- mlog_write_ulint(rseg_format, 0, MLOG_4BYTES, mtr);
- /* Clear also possible garbage at the end of
- the page. Old InnoDB versions did not initialize
- unused parts of pages. */
- ut_ad(page_offset(rseg_header) == TRX_RSEG);
- byte* b = rseg_header + TRX_RSEG_MAX_TRX_ID + 8;
- ulint len = UNIV_PAGE_SIZE
- - (FIL_PAGE_DATA_END
- + TRX_RSEG + TRX_RSEG_MAX_TRX_ID + 8);
- memset(b, 0, len);
- mlog_log_string(b, len, mtr);
- }
mlog_write_ulint(
rseg_header + TRX_RSEG_HISTORY_SIZE,
hist_size + undo->size, MLOG_4BYTES, mtr);
- mlog_write_ull(rseg_header + TRX_RSEG_MAX_TRX_ID,
- trx_sys.get_max_trx_id(), mtr);
}
+ /* This field now also serves as an identifier for the latest
+ binlog and WSREP XID information. */
+ mlog_write_ull(rseg_header + TRX_RSEG_MAX_TRX_ID,
+ trx_sys.get_max_trx_id(), mtr);
+
/* Before any transaction-generating background threads or the
purge have been started, recv_recovery_rollback_active() can
start transactions in row_merge_drop_temp_indexes() and
@@ -320,6 +312,19 @@ trx_purge_add_undo_to_history(const trx_t* trx, trx_undo_t*& undo, mtr_t* mtr)
|| trx->internal)
&& srv_fast_shutdown));
+#ifdef WITH_WSREP
+ if (wsrep_is_wsrep_xid(trx->xid)) {
+ trx_rseg_update_wsrep_checkpoint(rseg_header, trx->xid, mtr);
+ }
+#endif
+
+ if (trx->mysql_log_file_name && *trx->mysql_log_file_name) {
+ /* Update the latest MySQL binlog name and offset info
+ in rollback segment header if MySQL binlogging is on
+ or the database server is a MySQL replication save. */
+ trx_rseg_update_binlog_offset(rseg_header, trx, mtr);
+ }
+
/* Add the log as the first in the history list */
flst_add_first(rseg_header + TRX_RSEG_HISTORY,
undo_header + TRX_UNDO_HISTORY_NODE, mtr);
diff --git a/storage/innobase/trx/trx0rseg.cc b/storage/innobase/trx/trx0rseg.cc
index 4b09a4cc492..45d260a4480 100644
--- a/storage/innobase/trx/trx0rseg.cc
+++ b/storage/innobase/trx/trx0rseg.cc
@@ -34,6 +34,214 @@ Created 3/26/1996 Heikki Tuuri
#include <algorithm>
+#ifdef WITH_WSREP
+
+#ifdef UNIV_DEBUG
+static long long trx_sys_cur_xid_seqno = -1;
+static unsigned char trx_sys_cur_xid_uuid[16];
+
+/** Read WSREP XID seqno */
+static inline long long read_wsrep_xid_seqno(const XID* xid)
+{
+ long long seqno;
+ memcpy(&seqno, xid->data + 24, sizeof(long long));
+ return seqno;
+}
+
+/** Read WSREP XID UUID */
+static inline void read_wsrep_xid_uuid(const XID* xid, unsigned char* buf)
+{
+ memcpy(buf, xid->data + 8, 16);
+}
+
+#endif /* UNIV_DEBUG */
+
+/** Update the WSREP XID information in rollback segment header.
+@param[in,out] rseg_header rollback segment header
+@param[in] xid WSREP XID
+@param[in,out] mtr mini-transaction */
+void
+trx_rseg_update_wsrep_checkpoint(
+ trx_rsegf_t* rseg_header,
+ const XID* xid,
+ mtr_t* mtr)
+{
+ ut_ad(xid->formatID == 1);
+
+#ifdef UNIV_DEBUG
+ /* Check that seqno is monotonically increasing */
+ unsigned char xid_uuid[16];
+ long long xid_seqno = read_wsrep_xid_seqno(xid);
+ read_wsrep_xid_uuid(xid, xid_uuid);
+
+ if (!memcmp(xid_uuid, trx_sys_cur_xid_uuid, 8)) {
+ ut_ad(xid_seqno > trx_sys_cur_xid_seqno);
+ trx_sys_cur_xid_seqno = xid_seqno;
+ } else {
+ memcpy(trx_sys_cur_xid_uuid, xid_uuid, 16);
+ }
+ trx_sys_cur_xid_seqno = xid_seqno;
+#endif /* UNIV_DEBUG */
+
+ mlog_write_ulint(TRX_RSEG_WSREP_XID_FORMAT + rseg_header,
+ uint32_t(xid->formatID),
+ MLOG_4BYTES, mtr);
+
+ mlog_write_ulint(TRX_RSEG_WSREP_XID_GTRID_LEN + rseg_header,
+ uint32_t(xid->gtrid_length),
+ MLOG_4BYTES, mtr);
+
+ mlog_write_ulint(TRX_RSEG_WSREP_XID_BQUAL_LEN + rseg_header,
+ uint32_t(xid->bqual_length),
+ MLOG_4BYTES, mtr);
+
+ mlog_write_string(TRX_RSEG_WSREP_XID_DATA + rseg_header,
+ reinterpret_cast<const byte*>(xid->data),
+ XIDDATASIZE, mtr);
+}
+
+/** Update WSREP checkpoint XID in first rollback segment header.
+@param[in] xid WSREP XID */
+void trx_rseg_update_wsrep_checkpoint(const XID* xid)
+{
+ mtr_t mtr;
+ mtr.start();
+
+ const trx_rseg_t* rseg = trx_sys.rseg_array[0];
+
+ trx_rsegf_t* rseg_header = trx_rsegf_get(rseg->space, rseg->page_no,
+ &mtr);
+ if (UNIV_UNLIKELY(mach_read_from_4(rseg_header + TRX_RSEG_FORMAT))) {
+ trx_rseg_format_upgrade(rseg_header, &mtr);
+ }
+
+ mlog_write_ull(rseg_header + TRX_RSEG_MAX_TRX_ID,
+ trx_sys.get_max_trx_id(), &mtr);
+ trx_rseg_update_wsrep_checkpoint(rseg_header, xid, &mtr);
+ mtr.commit();
+}
+
+/** Read the WSREP XID information in rollback segment header.
+@param[in] rseg_header Rollback segment header
+@param[out] xid Transaction XID
+@return whether the WSREP XID was present */
+bool trx_rseg_read_wsrep_checkpoint(const trx_rsegf_t* rseg_header, XID& xid)
+{
+ xid.formatID = (int)mach_read_from_4(
+ TRX_RSEG_WSREP_XID_FORMAT + rseg_header);
+
+ if (xid.formatID == 0) {
+ memset(&xid, 0, sizeof(xid));
+ long long seqno= -1;
+ memcpy(xid.data + 24, &seqno, sizeof(long long));
+ xid.formatID = -1;
+ return false;
+ }
+
+ xid.gtrid_length = (int)mach_read_from_4(
+ TRX_RSEG_WSREP_XID_GTRID_LEN + rseg_header);
+
+ xid.bqual_length = (int)mach_read_from_4(
+ TRX_RSEG_WSREP_XID_BQUAL_LEN + rseg_header);
+
+ memcpy(xid.data, TRX_RSEG_WSREP_XID_DATA + rseg_header, XIDDATASIZE);
+
+ return true;
+}
+
+/** Read the WSREP XID from the TRX_SYS page (in case of upgrade).
+@param[in] page TRX_SYS page
+@param[out] xid WSREP XID (if present)
+@return whether the WSREP XID is present */
+static bool trx_rseg_init_wsrep_xid(const page_t* page, XID& xid)
+{
+ if (mach_read_from_4(TRX_SYS + TRX_SYS_WSREP_XID_INFO
+ + TRX_SYS_WSREP_XID_MAGIC_N_FLD
+ + page)
+ != TRX_SYS_WSREP_XID_MAGIC_N) {
+ return false;
+ }
+
+ xid.formatID = (int)mach_read_from_4(
+ TRX_SYS + TRX_SYS_WSREP_XID_INFO
+ + TRX_SYS_WSREP_XID_FORMAT + page);
+ xid.gtrid_length = (int)mach_read_from_4(
+ TRX_SYS + TRX_SYS_WSREP_XID_INFO
+ + TRX_SYS_WSREP_XID_GTRID_LEN + page);
+ xid.bqual_length = (int)mach_read_from_4(
+ TRX_SYS + TRX_SYS_WSREP_XID_INFO
+ + TRX_SYS_WSREP_XID_BQUAL_LEN + page);
+ memcpy(xid.data,
+ TRX_SYS + TRX_SYS_WSREP_XID_INFO
+ + TRX_SYS_WSREP_XID_DATA + page, XIDDATASIZE);
+ return true;
+}
+
+/** Recover the latest WSREP checkpoint XID.
+@param[out] xid WSREP XID
+@return whether the WSREP XID was found */
+bool trx_rseg_read_wsrep_checkpoint(XID& xid)
+{
+ mtr_t mtr;
+ trx_id_t max_id = 0;
+ bool found = false;
+
+ for (ulint rseg_id = 0; rseg_id < TRX_SYS_N_RSEGS;
+ rseg_id++, mtr.commit()) {
+ mtr.start();
+ const buf_block_t* sys = trx_sysf_get(&mtr, false);
+ if (rseg_id == 0) {
+ found = trx_rseg_init_wsrep_xid(sys->frame, xid);
+ }
+
+ const uint32_t page_no = trx_sysf_rseg_get_page_no(
+ sys, rseg_id);
+
+ if (page_no == FIL_NULL) {
+ continue;
+ }
+
+ const trx_rsegf_t* rseg_header = trx_rsegf_get_new(
+ trx_sysf_rseg_get_space(sys, rseg_id), page_no, &mtr);
+
+ if (mach_read_from_4(rseg_header + TRX_RSEG_FORMAT)) {
+ continue;
+ }
+
+ trx_id_t id = mach_read_from_8(rseg_header
+ + TRX_RSEG_MAX_TRX_ID);
+
+ if (id < max_id) {
+ continue;
+ }
+
+ max_id = id;
+ found = trx_rseg_read_wsrep_checkpoint(rseg_header, xid)
+ || found;
+ }
+
+ return found;
+}
+#endif /* WITH_WSREP */
+
+/** Upgrade a rollback segment header page to MariaDB 10.3 format.
+@param[in,out] rseg_header rollback segment header page
+@param[in,out] mtr mini-transaction */
+void trx_rseg_format_upgrade(trx_rsegf_t* rseg_header, mtr_t* mtr)
+{
+ ut_ad(page_offset(rseg_header) == TRX_RSEG);
+ byte* rseg_format = TRX_RSEG_FORMAT + rseg_header;
+ mlog_write_ulint(rseg_format, 0, MLOG_4BYTES, mtr);
+ /* Clear also possible garbage at the end of the page. Old
+ InnoDB versions did not initialize unused parts of pages. */
+ byte* b = rseg_header + TRX_RSEG_MAX_TRX_ID + 8;
+ ulint len = UNIV_PAGE_SIZE
+ - (FIL_PAGE_DATA_END
+ + TRX_RSEG + TRX_RSEG_MAX_TRX_ID + 8);
+ memset(b, 0, len);
+ mlog_log_string(b, len, mtr);
+}
+
/** Creates a rollback segment header.
This function is called only when a new rollback segment is created in
the database.
@@ -192,19 +400,51 @@ trx_undo_lists_init(trx_rseg_t* rseg, trx_id_t& max_trx_id,
/** Restore the state of a persistent rollback segment.
@param[in,out] rseg persistent rollback segment
@param[in,out] max_trx_id maximum observed transaction identifier
+@param[in,out] max_rseg_trx_id maximum observed TRX_RSEG_MAX_TRX_ID
@param[in,out] mtr mini-transaction */
static
void
-trx_rseg_mem_restore(trx_rseg_t* rseg, trx_id_t& max_trx_id, mtr_t* mtr)
+trx_rseg_mem_restore(
+ trx_rseg_t* rseg,
+ trx_id_t& max_trx_id,
+ trx_id_t& max_rseg_trx_id,
+ mtr_t* mtr)
{
- const trx_rsegf_t* rseg_header = trx_rsegf_get_new(
+ trx_rsegf_t* rseg_header = trx_rsegf_get_new(
rseg->space, rseg->page_no, mtr);
+
if (mach_read_from_4(rseg_header + TRX_RSEG_FORMAT) == 0) {
trx_id_t id = mach_read_from_8(rseg_header
+ TRX_RSEG_MAX_TRX_ID);
+
if (id > max_trx_id) {
max_trx_id = id;
}
+
+ if (id > max_rseg_trx_id) {
+ max_rseg_trx_id = id;
+
+ if (rseg_header[TRX_RSEG_BINLOG_NAME]) {
+ memcpy(trx_sys.recovered_binlog_filename,
+ rseg_header + TRX_RSEG_BINLOG_NAME,
+ TRX_RSEG_BINLOG_NAME_LEN);
+ trx_sys.recovered_binlog_offset = mach_read_from_8(
+ rseg_header
+ + TRX_RSEG_BINLOG_OFFSET);
+ }
+
+#ifdef WITH_WSREP
+ trx_rseg_read_wsrep_checkpoint(
+ rseg_header, trx_sys.recovered_wsrep_xid);
+#endif
+ }
+ }
+
+ if (srv_operation == SRV_OPERATION_RESTORE) {
+ /* mariabackup --prepare only deals with
+ the redo log and the data files, not with
+ transactions or the data dictionary. */
+ return;
}
/* Initialize the undo log lists according to the rseg header */
@@ -252,21 +492,55 @@ trx_rseg_mem_restore(trx_rseg_t* rseg, trx_id_t& max_trx_id, mtr_t* mtr)
}
}
+/** Read binlog metadata from the TRX_SYS page, in case we are upgrading
+from MySQL or a MariaDB version older than 10.3.5. */
+static void trx_rseg_init_binlog_info(const page_t* page)
+{
+ if (mach_read_from_4(TRX_SYS + TRX_SYS_MYSQL_LOG_INFO
+ + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD
+ + page)
+ == TRX_SYS_MYSQL_LOG_MAGIC_N) {
+ memcpy(trx_sys.recovered_binlog_filename,
+ TRX_SYS_MYSQL_LOG_INFO + TRX_SYS_MYSQL_LOG_NAME
+ + TRX_SYS + page, TRX_SYS_MYSQL_LOG_NAME_LEN);
+ trx_sys.recovered_binlog_offset = mach_read_from_8(
+ TRX_SYS_MYSQL_LOG_INFO + TRX_SYS_MYSQL_LOG_OFFSET
+ + TRX_SYS + page);
+ }
+
+#ifdef WITH_WSREP
+ trx_rseg_init_wsrep_xid(page, trx_sys.recovered_wsrep_xid);
+#endif
+}
+
/** Initialize the rollback segments in memory at database startup. */
void
trx_rseg_array_init()
{
- trx_id_t max_trx_id = 0;
+ trx_id_t max_trx_id = 0, max_rseg_trx_id = 0;
+
+ *trx_sys.recovered_binlog_filename = '\0';
+ trx_sys.recovered_binlog_offset = -1;
+#ifdef WITH_WSREP
+ memset(&trx_sys.recovered_wsrep_xid, 0,
+ sizeof trx_sys.recovered_wsrep_xid);
+ trx_sys.recovered_wsrep_xid.formatID = -1;
+#endif
for (ulint rseg_id = 0; rseg_id < TRX_SYS_N_RSEGS; rseg_id++) {
mtr_t mtr;
mtr.start();
if (const buf_block_t* sys = trx_sysf_get(&mtr, false)) {
if (rseg_id == 0) {
+ /* In case this is an upgrade from
+ before MariaDB 10.3.5, fetch the base
+ information from the TRX_SYS page. */
max_trx_id = mach_read_from_8(
TRX_SYS + TRX_SYS_TRX_ID_STORE
+ sys->frame);
+ trx_rseg_init_binlog_info(sys->frame);
}
+
const uint32_t page_no = trx_sysf_rseg_get_page_no(
sys, rseg_id);
if (page_no != FIL_NULL) {
@@ -278,7 +552,9 @@ trx_rseg_array_init()
ut_ad(rseg->id == rseg_id);
ut_ad(!trx_sys.rseg_array[rseg_id]);
trx_sys.rseg_array[rseg_id] = rseg;
- trx_rseg_mem_restore(rseg, max_trx_id, &mtr);
+ trx_rseg_mem_restore(
+ rseg, max_trx_id, max_rseg_trx_id,
+ &mtr);
}
}
@@ -404,3 +680,34 @@ trx_rseg_get_n_undo_tablespaces(
return ulint(end - space_ids);
}
+
+/** Update the offset information about the end of the binlog entry
+which corresponds to the transaction just being committed.
+In a replication slave, this updates the master binlog position
+up to which replication has proceeded.
+@param[in,out] rseg_header rollback segment header
+@param[in] trx committing transaction
+@param[in,out] mtr mini-transaction */
+void
+trx_rseg_update_binlog_offset(byte* rseg_header, const trx_t* trx, mtr_t* mtr)
+{
+ DBUG_LOG("trx", "trx_mysql_binlog_offset: " << trx->mysql_log_offset);
+
+ const size_t len = strlen(trx->mysql_log_file_name) + 1;
+
+ ut_ad(len > 1);
+
+ if (UNIV_UNLIKELY(len > TRX_RSEG_BINLOG_NAME_LEN)) {
+ return;
+ }
+
+ mlog_write_ull(rseg_header + TRX_RSEG_BINLOG_OFFSET,
+ trx->mysql_log_offset, mtr);
+ byte* p = rseg_header + TRX_RSEG_BINLOG_NAME;
+ const byte* binlog_name = reinterpret_cast<const byte*>
+ (trx->mysql_log_file_name);
+
+ if (memcmp(binlog_name, p, len)) {
+ mlog_write_string(p, binlog_name, len, mtr);
+ }
+}
diff --git a/storage/innobase/trx/trx0sys.cc b/storage/innobase/trx/trx0sys.cc
index 1b60e2bd121..13ebe27f539 100644
--- a/storage/innobase/trx/trx0sys.cc
+++ b/storage/innobase/trx/trx0sys.cc
@@ -88,207 +88,21 @@ ReadView::check_trx_id_sanity(
uint trx_rseg_n_slots_debug = 0;
#endif
-
-/*****************************************************************//**
-Updates the offset information about the end of the MySQL binlog entry
-which corresponds to the transaction just being committed. In a MySQL
-replication slave updates the latest master binlog position up to which
-replication has proceeded. */
-void
-trx_sys_update_mysql_binlog_offset(
-/*===============================*/
- const char* file_name,/*!< in: MySQL log file name */
- int64_t offset, /*!< in: position in that log file */
- buf_block_t* sys_header, /*!< in,out: trx sys header */
- mtr_t* mtr) /*!< in,out: mini-transaction */
-{
- DBUG_PRINT("InnoDB",("trx_mysql_binlog_offset: %lld", (longlong) offset));
-
- const size_t len = strlen(file_name) + 1;
-
- if (len > TRX_SYS_MYSQL_LOG_NAME_LEN) {
-
- /* We cannot fit the name to the 512 bytes we have reserved */
-
- return;
- }
-
- byte* p = TRX_SYS + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD
- + TRX_SYS_MYSQL_LOG_INFO + sys_header->frame;
-
- if (mach_read_from_4(p) != TRX_SYS_MYSQL_LOG_MAGIC_N) {
- mlog_write_ulint(p,
- TRX_SYS_MYSQL_LOG_MAGIC_N,
- MLOG_4BYTES, mtr);
- }
-
- p = TRX_SYS + TRX_SYS_MYSQL_LOG_NAME + TRX_SYS_MYSQL_LOG_INFO
- + sys_header->frame;
-
- if (memcmp(file_name, p, len)) {
- mlog_write_string(p,
- reinterpret_cast<const byte*>(file_name),
- len, mtr);
- }
-
- mlog_write_ull(TRX_SYS_MYSQL_LOG_INFO + TRX_SYS_MYSQL_LOG_OFFSET
- + TRX_SYS + sys_header->frame, offset, mtr);
-}
-
/** Display the MySQL binlog offset info if it is present in the trx
system header. */
void
trx_sys_print_mysql_binlog_offset()
{
- mtr_t mtr;
-
- mtr.start();
-
- const buf_block_t* block = trx_sysf_get(&mtr, false);
-
- if (block
- && mach_read_from_4(TRX_SYS + TRX_SYS_MYSQL_LOG_INFO
- + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD
- + block->frame)
- == TRX_SYS_MYSQL_LOG_MAGIC_N) {
- ib::info() << "Last binlog file '"
- << TRX_SYS_MYSQL_LOG_INFO + TRX_SYS_MYSQL_LOG_NAME
- + TRX_SYS + block->frame
- << "', position "
- << mach_read_from_8(TRX_SYS_MYSQL_LOG_INFO
- + TRX_SYS_MYSQL_LOG_OFFSET
- + TRX_SYS + block->frame);
- }
-
- mtr.commit();
-}
-
-#ifdef WITH_WSREP
-
-#ifdef UNIV_DEBUG
-static long long trx_sys_cur_xid_seqno = -1;
-static unsigned char trx_sys_cur_xid_uuid[16];
-
-/** Read WSREP XID seqno */
-static inline long long read_wsrep_xid_seqno(const XID* xid)
-{
- long long seqno;
- memcpy(&seqno, xid->data + 24, sizeof(long long));
- return seqno;
-}
-
-/** Read WSREP XID UUID */
-static inline void read_wsrep_xid_uuid(const XID* xid, unsigned char* buf)
-{
- memcpy(buf, xid->data + 8, 16);
-}
-
-#endif /* UNIV_DEBUG */
-
-/** Update WSREP XID info in the TRX_SYS page.
-@param[in] xid Transaction XID
-@param[in,out] sys_header TRX_SYS page
-@param[in,out] mtr mini-transaction */
-UNIV_INTERN
-void
-trx_sys_update_wsrep_checkpoint(
- const XID* xid,
- buf_block_t* sys_header,
- mtr_t* mtr)
-{
- ut_ad(xid->formatID == 1);
- ut_ad(wsrep_is_wsrep_xid(xid));
-
- byte* magic = TRX_SYS + TRX_SYS_WSREP_XID_INFO
- + TRX_SYS_WSREP_XID_MAGIC_N_FLD
- + sys_header->frame;
-
- if (mach_read_from_4(magic) != TRX_SYS_WSREP_XID_MAGIC_N) {
- mlog_write_ulint(magic, TRX_SYS_WSREP_XID_MAGIC_N,
- MLOG_4BYTES, mtr);
-#ifdef UNIV_DEBUG
- } else {
- /* Check that seqno is monotonically increasing */
- unsigned char xid_uuid[16];
- long long xid_seqno = read_wsrep_xid_seqno(xid);
- read_wsrep_xid_uuid(xid, xid_uuid);
-
- if (!memcmp(xid_uuid, trx_sys_cur_xid_uuid, 8)) {
- ut_ad(xid_seqno > trx_sys_cur_xid_seqno);
- trx_sys_cur_xid_seqno = xid_seqno;
- } else {
- memcpy(trx_sys_cur_xid_uuid, xid_uuid, 16);
- }
-
- trx_sys_cur_xid_seqno = xid_seqno;
-#endif /* UNIV_DEBUG */
- }
-
- mlog_write_ulint(TRX_SYS + TRX_SYS_WSREP_XID_INFO
- + TRX_SYS_WSREP_XID_FORMAT + sys_header->frame,
- uint32_t(xid->formatID),
- MLOG_4BYTES, mtr);
- mlog_write_ulint(TRX_SYS + TRX_SYS_WSREP_XID_INFO
- + TRX_SYS_WSREP_XID_GTRID_LEN + sys_header->frame,
- uint32_t(xid->gtrid_length),
- MLOG_4BYTES, mtr);
- mlog_write_ulint(TRX_SYS + TRX_SYS_WSREP_XID_INFO
- + TRX_SYS_WSREP_XID_BQUAL_LEN + sys_header->frame,
- uint32_t(xid->bqual_length),
- MLOG_4BYTES, mtr);
- mlog_write_string(TRX_SYS + TRX_SYS_WSREP_XID_INFO
- + TRX_SYS_WSREP_XID_DATA + sys_header->frame,
- reinterpret_cast<const byte*>(xid->data),
- XIDDATASIZE, mtr);
-}
-
-/** Read WSREP checkpoint XID from sys header.
-@param[out] xid WSREP XID
-@return whether the checkpoint was present */
-UNIV_INTERN
-bool
-trx_sys_read_wsrep_checkpoint(XID* xid)
-{
- mtr_t mtr;
-
- ut_ad(xid);
-
- mtr.start();
-
- const buf_block_t* block = trx_sysf_get(&mtr, false);
-
- if (!block ||
- mach_read_from_4(TRX_SYS + TRX_SYS_WSREP_XID_INFO
- + TRX_SYS_WSREP_XID_MAGIC_N_FLD + block->frame)
- != TRX_SYS_WSREP_XID_MAGIC_N) {
- memset(xid, 0, sizeof(*xid));
- long long seqno= -1;
- memcpy(xid->data + 24, &seqno, sizeof(long long));
- xid->formatID = -1;
- mtr.commit();
- return false;
+ if (!*trx_sys.recovered_binlog_filename) {
+ return;
}
- xid->formatID = (int)mach_read_from_4(
- TRX_SYS + TRX_SYS_WSREP_XID_INFO + TRX_SYS_WSREP_XID_FORMAT
- + block->frame);
- xid->gtrid_length = (int)mach_read_from_4(
- TRX_SYS + TRX_SYS_WSREP_XID_INFO + TRX_SYS_WSREP_XID_GTRID_LEN
- + block->frame);
- xid->bqual_length = (int)mach_read_from_4(
- TRX_SYS + TRX_SYS_WSREP_XID_INFO + TRX_SYS_WSREP_XID_BQUAL_LEN
- + block->frame);
- memcpy(xid->data,
- TRX_SYS + TRX_SYS_WSREP_XID_INFO + TRX_SYS_WSREP_XID_DATA
- + block->frame,
- XIDDATASIZE);
-
- mtr.commit();
- return true;
+ ib::info() << "Last binlog file '"
+ << trx_sys.recovered_binlog_filename
+ << "', position "
+ << trx_sys.recovered_binlog_offset;
}
-#endif /* WITH_WSREP */
-
/** Find an available rollback segment.
@param[in] sys_header
@return an unallocated rollback segment slot in the TRX_SYS header
diff --git a/storage/innobase/trx/trx0trx.cc b/storage/innobase/trx/trx0trx.cc
index 7b104653cf9..0263b42812a 100644
--- a/storage/innobase/trx/trx0trx.cc
+++ b/storage/innobase/trx/trx0trx.cc
@@ -885,6 +885,14 @@ trx_lists_init_at_db_start()
ut_ad(!srv_was_started);
ut_ad(!purge_sys);
+ if (srv_operation == SRV_OPERATION_RESTORE) {
+ /* mariabackup --prepare only deals with
+ the redo log and the data files, not with
+ transactions or the data dictionary. */
+ trx_rseg_array_init();
+ return;
+ }
+
purge_sys = UT_NEW_NOKEY(purge_sys_t());
if (srv_force_recovery >= SRV_FORCE_NO_UNDO_LOG_SCAN) {
@@ -1303,36 +1311,7 @@ trx_write_serialisation_history(
MONITOR_INC(MONITOR_TRX_COMMIT_UNDO);
-#ifdef WITH_WSREP
- const bool update_wsrep = wsrep_is_wsrep_xid(trx->xid);
-#endif
- const bool update_binlog_pos = trx->mysql_log_file_name
- && *trx->mysql_log_file_name;
- if (!update_binlog_pos
-#ifdef WITH_WSREP
- && !update_wsrep
-#endif
- ) return;
-
- buf_block_t* block = trx_sysf_get(mtr);
-#ifdef WITH_WSREP
- if (update_wsrep)
- trx_sys_update_wsrep_checkpoint(trx->xid, block, mtr);
-#endif /* WITH_WSREP */
-
- /* Update the latest MySQL binlog name and offset info
- in trx sys header if MySQL binlogging is on or the database
- server is a MySQL replication slave */
-
- if (update_binlog_pos) {
-
- trx_sys_update_mysql_binlog_offset(
- trx->mysql_log_file_name,
- trx->mysql_log_offset,
- block, mtr);
-
- trx->mysql_log_file_name = NULL;
- }
+ trx->mysql_log_file_name = NULL;
}
/********************************************************************