summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarko Mäkelä <marko.makela@mariadb.com>2020-04-07 16:43:46 +0300
committerMarko Mäkelä <marko.makela@mariadb.com>2020-04-07 17:18:05 +0300
commit1738c0f1bec2b67d1294e894a62de1c5c15d9361 (patch)
tree5cd2cb375ff966a68034ba4865fce87b3a9c2ff8
parentd848fcad69d3e3a385290b67f809a96d87f40386 (diff)
downloadmariadb-git-1738c0f1bec2b67d1294e894a62de1c5c15d9361.tar.gz
MDEV-22169 Recovery fails after failing to insert into mlog_init
In a multi-batch recovery, we must ensure that INIT_PAGE and especially the MDEV-15528 FREE_PAGE records will be taken properly into account. Writing a FREE_PAGE record gives the server permission to omit a page write. If recovery insists on applying log to a page whose page flush has been omitted, then the consistency checks in the application of high-level redo log records (appending an undo log record, inserting or deleting an index record) will likely fail. mlog_init_t::add(): Return whether the state was changed. mlog_init_t::will_avoid_read(): Determine whether a page read will be avoided and whether older log records can be safely skipped. recv_sys_t::parse(): Even if store==STORE_NO, process the records INIT_PAGE and FREE_PAGE. While processing them, we can delete older redo log records for the page. If store!=STORE_NO, we can directly skip redo log recods of other types if mlog_init indicates that the page will be freed or initialized by at a later LSN. This fix was developed in cooperation with Thirunarayanan Balathandayuthapani.
-rw-r--r--storage/innobase/log/log0recv.cc52
1 files changed, 40 insertions, 12 deletions
diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc
index 3533cd3058b..cb1b8b31dcb 100644
--- a/storage/innobase/log/log0recv.cc
+++ b/storage/innobase/log/log0recv.cc
@@ -107,11 +107,11 @@ private:
/** @return start of the log records */
byte *begin() { return reinterpret_cast<byte*>(&len + 1); }
- /** @return start of the log records */
- const byte *begin() const { return const_cast<log_phys_t*>(this)->begin(); }
/** @return end of the log records */
byte *end() { byte *e= begin() + len; ut_ad(!*e); return e; }
public:
+ /** @return start of the log records */
+ const byte *begin() const { return const_cast<log_phys_t*>(this)->begin(); }
/** @return end of the log records */
const byte *end() const { return const_cast<log_phys_t*>(this)->end(); }
@@ -598,17 +598,19 @@ private:
public:
/** Record that a page will be initialized by the redo log.
@param[in] page_id page identifier
- @param[in] lsn log sequence number */
- void add(const page_id_t page_id, lsn_t lsn)
+ @param[in] lsn log sequence number
+ @return whether the state was changed */
+ bool add(const page_id_t page_id, lsn_t lsn)
{
ut_ad(mutex_own(&recv_sys.mutex));
const init init = { lsn, false };
std::pair<map::iterator, bool> p = inits.insert(
map::value_type(page_id, init));
ut_ad(!p.first->second.created);
- if (!p.second && p.first->second.lsn < init.lsn) {
- p.first->second = init;
- }
+ if (p.second) return true;
+ if (p.first->second.lsn >= init.lsn) return false;
+ p.first->second = init;
+ return true;
}
/** Get the last stored lsn of the page id and its respective
@@ -623,6 +625,17 @@ public:
return inits.find(page_id)->second;
}
+ /** Determine if a page will be initialized or freed after a time.
+ @param page_id page identifier
+ @param lsn log sequence number
+ @return whether page_id will be freed or initialized after lsn */
+ bool will_avoid_read(page_id_t page_id, lsn_t lsn) const
+ {
+ ut_ad(mutex_own(&recv_sys.mutex));
+ auto i= inits.find(page_id);
+ return i != inits.end() && i->second.lsn > lsn;
+ }
+
/** At the end of each recovery batch, reset the 'created' flags. */
void reset()
{
@@ -1938,7 +1951,7 @@ same_page:
if (got_page_op)
{
- ut_d(const page_id_t id(space_id, page_no));
+ const page_id_t id(space_id, page_no);
ut_d(if ((b & 0x70) == INIT_PAGE) freed.erase(id));
ut_ad(freed.find(id) == freed.end());
switch (b & 0x70) {
@@ -2059,16 +2072,31 @@ same_page:
ut_ad(modified.emplace(id).second || (b & 0x70) != INIT_PAGE);
}
#endif
+ const bool is_init= (b & 0x70) <= INIT_PAGE;
switch (store) {
- case STORE_NO:
- continue;
case STORE_IF_EXISTS:
if (!fil_space_get_size(space_id))
continue;
/* fall through */
case STORE_YES:
- add(page_id_t(space_id, page_no), start_lsn, end_lsn, recs,
- static_cast<size_t>(l + rlen - recs));
+ if (is_init || !mlog_init.will_avoid_read(id, start_lsn))
+ add(id, start_lsn, end_lsn, recs,
+ static_cast<size_t>(l + rlen - recs));
+ continue;
+ case STORE_NO:
+ if (!is_init)
+ continue;
+ map::iterator i= pages.find(id);
+ if (i == pages.end())
+ continue;
+ if ((*static_cast<const log_phys_t*>(*i->second.log.begin())->begin() &
+ 0x70) <= INIT_PAGE)
+ {
+ ut_ad(i->second.state == page_recv_t::RECV_WILL_NOT_READ);
+ continue;
+ }
+ pages.erase(i);
+ mlog_init.add(id, start_lsn);
}
}
#if 1 /* MDEV-14425 FIXME: this must be in the checkpoint file only! */