summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarko Mäkelä <marko.makela@mariadb.com>2020-02-06 09:00:19 +0200
committerMarko Mäkelä <marko.makela@mariadb.com>2020-02-06 09:00:19 +0200
commit6d214415c9547593fa0fd22b25fc4f6cd1c6cd65 (patch)
tree9bbe40d101015d6708ccfd1c249d24b7cc46c816
parentd0c8316bf5a372d8098cdfb9b8e3d4735f58e093 (diff)
downloadmariadb-git-6d214415c9547593fa0fd22b25fc4f6cd1c6cd65.tar.gz
MDEV-21351: Free processed recv_sys_t::blocks
Release memory as soon as redo log records are processed. Because the memory allocation and deallocation of parsed redo log records must be protected by recv_sys.mutex, it is better to avoid using a std::atomic field for bookkeeping. buf_page_t::access_time: Keep track of the recv_sys.pages record allocations. The most significant 16 bits will count allocated blocks (which were previously counted by buf_page_t::buf_fix_count in the debug version), and the least significant 16 bits indicate the number of allocated bytes in the block (which was previously managed in buf_block_t::modify_clock), which must be a positive number, up to innodb_page_size. The byte offset 65536 is represented as the value 0. recv_recover_page(): Let the caller erase the log. recv_validate_tablespace(): Acquire recv_sys_t::mutex.
-rw-r--r--storage/innobase/include/buf0buf.h14
-rw-r--r--storage/innobase/include/log0recv.h75
-rw-r--r--storage/innobase/log/log0recv.cc162
3 files changed, 142 insertions, 109 deletions
diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h
index f9a9539bf9c..8d0176e8f4e 100644
--- a/storage/innobase/include/buf0buf.h
+++ b/storage/innobase/include/buf0buf.h
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2013, 2019, MariaDB Corporation.
+Copyright (c) 2013, 2020, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -1568,7 +1568,17 @@ public:
unsigned access_time; /*!< time of first access, or
0 if the block was never accessed
in the buffer pool. Protected by
- block mutex */
+ block mutex for buf_page_in_file()
+ blocks.
+
+ For state==BUF_BLOCK_MEMORY
+ blocks, this field can be repurposed
+ for something else.
+
+ When this field counts log records
+ and bytes allocated for recv_sys.pages,
+ the field is protected by
+ recv_sys_t::mutex. */
# ifdef UNIV_DEBUG
ibool file_page_was_freed;
/*!< this is set to TRUE when
diff --git a/storage/innobase/include/log0recv.h b/storage/innobase/include/log0recv.h
index 6587cb475e2..0c81a1c0f05 100644
--- a/storage/innobase/include/log0recv.h
+++ b/storage/innobase/include/log0recv.h
@@ -196,9 +196,9 @@ struct page_recv_t
tail= recs;
}
- /** Trim old log records for a page
+ /** Trim old log records for a page.
@param start_lsn oldest log sequence number to preserve
- @return whether the entire log was trimmed */
+ @return whether all the log for the page was trimmed */
inline bool trim(lsn_t start_lsn);
/** @return the last log snippet */
const log_rec_t* last() const { return tail; }
@@ -215,11 +215,8 @@ struct page_recv_t
iterator begin() { return head; }
iterator end() { return NULL; }
bool empty() const { ut_ad(!head == !tail); return !head; }
+ /** Clear and free the records; @see recv_sys_t::alloc() */
inline void clear();
-#ifdef UNIV_DEBUG
- /** Declare the records as freed; @see recv_sys_t::alloc() */
- inline void free() const;
-#endif
} log;
/** Ignore any earlier redo log records for this page. */
@@ -282,32 +279,32 @@ struct recv_sys_t{
/** the time when progress was last reported */
time_t progress_time;
- using map = std::map<const page_id_t, page_recv_t,
- std::less<const page_id_t>,
- ut_allocator
- <std::pair<const page_id_t, page_recv_t>>>;
- /** buffered records waiting to be applied to pages */
- map pages;
-
- /** Process a record that indicates that a tablespace is
- being shrunk in size.
- @param page_id first page identifier that is not in the file
- @param lsn log sequence number of the shrink operation */
- inline void trim(const page_id_t page_id, lsn_t lsn);
-
- /** Undo tablespaces for which truncate has been logged
- (indexed by id - srv_undo_space_id_start) */
- struct trunc {
- /** log sequence number of MLOG_FILE_CREATE2, or 0 if none */
- lsn_t lsn;
- /** truncated size of the tablespace, or 0 if not truncated */
- unsigned pages;
- } truncated_undo_spaces[127];
-
- recv_dblwr_t dblwr;
-
- /** Last added LSN to pages. */
- lsn_t last_stored_lsn;
+ using map = std::map<const page_id_t, page_recv_t,
+ std::less<const page_id_t>,
+ ut_allocator<std::pair<const page_id_t, page_recv_t>>>;
+ /** buffered records waiting to be applied to pages */
+ map pages;
+
+ /** Process a record that indicates that a tablespace size is being shrunk.
+ @param page_id first page that is not in the file
+ @param lsn log sequence number of the shrink operation */
+ inline void trim(const page_id_t page_id, lsn_t lsn);
+
+ /** Undo tablespaces for which truncate has been logged
+ (indexed by page_id_t::space() - srv_undo_space_id_start) */
+ struct trunc
+ {
+ /** log sequence number of MLOG_FILE_CREATE2, or 0 if none */
+ lsn_t lsn;
+ /** truncated size of the tablespace, or 0 if not truncated */
+ unsigned pages;
+ } truncated_undo_spaces[127];
+
+ /** The contents of the doublewrite buffer */
+ recv_dblwr_t dblwr;
+
+ /** Last added LSN to pages. */
+ lsn_t last_stored_lsn;
private:
/** Maximum number of buffer pool blocks to allocate for redo log records */
@@ -374,17 +371,9 @@ public:
@return pointer to len bytes of memory (never NULL) */
inline byte *alloc(size_t len, bool store_recv= false);
-#ifdef UNIV_DEBUG
-private:
- /** Find the buffer pool block that is storing a redo log record.
- @param[in] data pointer to buffer returned by alloc()
- @return redo list element */
- inline buf_block_t *find_block(const void *data) const;
-public:
- /** Declare a redo log record freed from a buffer pool block.
- @param[in] data pointer to buffer returned by alloc() */
- inline void free(const void *data) const;
-#endif
+ /** Free a redo log snippet.
+ @param data buffer returned by alloc() */
+ inline void free(const void *data);
/** @return the free length of the latest alloc() block, in bytes */
inline size_t get_free_len() const;
diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc
index 1d3be9ae6cc..d378638b154 100644
--- a/storage/innobase/log/log0recv.cc
+++ b/storage/innobase/log/log0recv.cc
@@ -152,17 +152,21 @@ struct recv_t : public log_rec_t
@param d log snippet
*/
void append(data_t *d) { ut_ad(!next); ut_ad(!d->next); next= d; }
-#ifdef UNIV_DEBUG
- /** Declare the record freed in the buffer pool */
- void free()
+ }* data;
+
+ /** Free the log snippet */
+ void free() const
+ {
+ data_t *d= data;
+ do
{
- data_t *recv_data= this;
- do
- recv_sys.free(recv_data);
- while ((recv_data= recv_data->next));
+ data_t *next= d->next;
+ recv_sys.free(d);
+ d= next;
}
-#endif
- }* data;
+ while (d);
+ recv_sys.free(this);
+ }
};
@@ -684,7 +688,9 @@ void recv_sys_t::close()
if (is_initialised()) {
dblwr.pages.clear();
- pages.clear();
+ ut_d(mutex_enter(&mutex));
+ clear();
+ ut_d(mutex_exit(&mutex));
if (flush_start) {
os_event_destroy(flush_start);
@@ -828,16 +834,14 @@ inline void recv_sys_t::clear()
ut_ad(mutex_own(&mutex));
apply_log_recs= false;
apply_batch_on= false;
+ ut_ad(!after_apply || !UT_LIST_GET_LAST(blocks));
pages.clear();
for (buf_block_t *block= UT_LIST_GET_LAST(blocks); block; )
{
buf_block_t *prev_block= UT_LIST_GET_PREV(unzip_LRU, block);
ut_ad(buf_block_get_state(block) == BUF_BLOCK_MEMORY);
- /* Check buf_fix_count after applying all buffered redo log records */
- ut_ad(!after_apply || !block->page.buf_fix_count);
UT_LIST_REMOVE(blocks, block);
- ut_d(block->page.buf_fix_count= 0);
buf_block_free(block);
block= prev_block;
}
@@ -868,11 +872,13 @@ void recv_sys_t::debug_free()
inline size_t recv_sys_t::get_free_len() const
{
- if (UT_LIST_GET_LEN(blocks) == 0)
- return 0;
-
- return srv_page_size -
- static_cast<size_t>(UT_LIST_GET_FIRST(blocks)->modify_clock);
+ if (const buf_block_t* block= UT_LIST_GET_FIRST(blocks))
+ {
+ if (const size_t used= static_cast<uint16_t>(block->page.access_time))
+ return srv_page_size - used;
+ ut_ad(srv_page_size == 65536);
+ }
+ return 0;
}
inline byte* recv_sys_t::alloc(size_t len, bool store_recv)
@@ -886,41 +892,59 @@ inline byte* recv_sys_t::alloc(size_t len, bool store_recv)
{
create_block:
block= buf_block_alloc(nullptr);
- block->modify_clock= len;
+ block->page.access_time= 1U << 16 | static_cast<uint16_t>(len);
UT_LIST_ADD_FIRST(blocks, block);
+ UNIV_MEM_INVALID(block->frame, len);
+ UNIV_MEM_FREE(block->frame + len, srv_page_size - len);
return block->frame;
}
- size_t free_offset= static_cast<size_t>(block->modify_clock);
+ size_t free_offset= static_cast<uint16_t>(block->page.access_time);
+ if (UNIV_UNLIKELY(!free_offset))
+ {
+ ut_ad(srv_page_size == 65536);
+ goto create_block;
+ }
ut_ad(free_offset <= srv_page_size);
+ free_offset+= len;
- if (store_recv &&
- free_offset + len + sizeof(recv_t::data) + 1 > srv_page_size)
+ if (store_recv && free_offset + sizeof(recv_t::data) + 1 > srv_page_size)
goto create_block;
- if (free_offset + len > srv_page_size)
+ if (free_offset > srv_page_size)
goto create_block;
- block->modify_clock= free_offset + len;
- return block->frame + free_offset;
+
+ block->page.access_time= ((block->page.access_time >> 16) + 1) << 16 |
+ static_cast<uint16_t>(free_offset);
+ UNIV_MEM_ALLOC(block->frame + free_offset - len, len);
+ return block->frame + free_offset - len;
}
-#ifdef UNIV_DEBUG
-inline buf_block_t *recv_sys_t::find_block(const void* data) const
+
+/** Free a redo log snippet.
+@param data buffer returned by alloc() */
+inline void recv_sys_t::free(const void *data)
{
data= page_align(data);
+ ut_ad(mutex_own(&mutex));
for (buf_block_t *block= UT_LIST_GET_LAST(blocks);
block; block = UT_LIST_GET_PREV(unzip_LRU, block))
+ {
+ ut_ad(buf_block_get_state(block) == BUF_BLOCK_MEMORY);
+ ut_ad(block->page.access_time >= 1U << 16);
if (block->frame == data)
- return block;
+ {
+ if (!((block->page.access_time -= 1U << 16) >> 16))
+ {
+ UT_LIST_REMOVE(blocks, block);
+ buf_block_free(block);
+ }
+ return;
+ }
+ }
ut_ad(0);
- return nullptr;
}
-inline void recv_sys_t::free(const void *data) const
-{
- find_block(data)->unfix();
-}
-#endif
/** Read a log segment to log_sys.buf.
@param[in,out] start_lsn in: read area start,
@@ -1826,7 +1850,6 @@ inline void recv_sys_t::add(mlog_id_t type, const page_id_t page_id,
const size_t l= std::min(len, get_free_len() - sizeof(recv_t::data));
recv_t::data_t *d= new (alloc(sizeof(recv_t::data) + l))
recv_t::data_t(body, l);
- ut_d(find_block(d)->fix());
if (prev)
prev->append(d);
else
@@ -1840,30 +1863,32 @@ inline void recv_sys_t::add(mlog_id_t type, const page_id_t page_id,
}
}
-/** Trim old log records for a page
+/** Trim old log records for a page.
@param start_lsn oldest log sequence number to preserve
-@return whether the entire log was trimmed */
+@return whether all the log for the page was trimmed */
inline bool page_recv_t::recs_t::trim(lsn_t start_lsn)
{
- for (log_rec_t** prev= &head; *prev; *prev= (*prev)->next)
+ while (head)
{
- if ((*prev)->lsn >= start_lsn) return false;
- ut_d(static_cast<const recv_t*>(*prev)->data->free());
+ if (head->lsn >= start_lsn) return false;
+ log_rec_t *next= head->next;
+ static_cast<const recv_t*>(head)->free();
+ head= next;
}
+ tail= nullptr;
return true;
}
-#ifdef UNIV_DEBUG
-inline void page_recv_t::recs_t::free() const
-{
- for (const log_rec_t *l= head; l; l= l->next)
- static_cast<const recv_t*>(l)->data->free();
-}
-#endif
inline void page_recv_t::recs_t::clear()
{
- ut_d(free());
+ ut_ad(mutex_own(&recv_sys.mutex));
+ for (const log_rec_t *l= head; l; )
+ {
+ const log_rec_t *next= l->next;
+ static_cast<const recv_t*>(l)->free();
+ l= next;
+ }
head= tail= nullptr;
}
@@ -2016,8 +2041,6 @@ static void recv_recover_page(buf_block_t* block, mtr_t& mtr,
end_lsn);
}
}
-
- ut_d(recv->data->free(););
}
#ifdef UNIV_ZIP_DEBUG
@@ -2055,7 +2078,6 @@ static void recv_recover_page(buf_block_t* block, mtr_t& mtr,
ut_ad(p->second.is_being_processed());
ut_ad(!recv_sys.pages.empty());
- recv_sys.pages.erase(p);
if (recv_sys.report(now)) {
const ulint n = recv_sys.pages.size();
@@ -2071,12 +2093,12 @@ This function should only be called when innodb_force_recovery is set.
ATTRIBUTE_COLD void recv_sys_t::free_corrupted_page(page_id_t page_id)
{
mutex_enter(&mutex);
-#ifdef UNIV_DEBUG
- map::const_iterator p= pages.find(page_id);
+ map::iterator p= pages.find(page_id);
if (p != pages.end())
- p->second.log.free();
-#endif
- pages.erase(page_id);
+ {
+ p->second.log.clear();
+ pages.erase(p);
+ }
mutex_exit(&mutex);
}
@@ -2106,6 +2128,8 @@ void recv_recover_page(buf_page_t* bpage)
if (p != recv_sys.pages.end()
&& !p->second.is_being_processed()) {
recv_recover_page(block, mtr, p);
+ p->second.log.clear();
+ recv_sys.pages.erase(p);
goto func_exit;
}
}
@@ -2239,8 +2263,15 @@ void recv_apply_hashed_log_recs(bool last_batch)
} else {
mtr.commit();
recv_read_in_area(page_id);
+ break;
}
- break;
+ ignore:
+ {
+ recv_sys_t::map::iterator r = p++;
+ r->second.log.clear();
+ recv_sys.pages.erase(r);
+ }
+ continue;
case page_recv_t::RECV_WILL_NOT_READ:
mlog_init_t::init& i = mlog_init.last(page_id);
const lsn_t end_lsn = recs.log.last()->lsn;
@@ -2249,11 +2280,7 @@ void recv_apply_hashed_log_recs(bool last_batch)
<< page_id
<< " LSN " << end_lsn
<< " < " << i.lsn);
-ignore:
- recv_sys_t::map::iterator r = p++;
- ut_d(r->second.log.free());
- recv_sys.pages.erase(r);
- continue;
+ goto ignore;
}
fil_space_t* space = fil_space_acquire_for_io(
@@ -2311,6 +2338,8 @@ do_read:
mtr.x_latch_at_savepoint(0, block);
recv_recover_page(block, mtr, p, &i);
ut_ad(mtr.has_committed());
+ p->second.log.clear();
+ recv_sys.pages.erase(p);
}
space->release_for_io();
@@ -3311,6 +3340,8 @@ recv_validate_tablespace(bool rescan, bool& missing_tablespace)
{
dberr_t err = DB_SUCCESS;
+ mutex_enter(&recv_sys.mutex);
+
for (recv_sys_t::map::iterator p = recv_sys.pages.begin();
p != recv_sys.pages.end();) {
ut_ad(!p->second.log.empty());
@@ -3333,7 +3364,7 @@ next:
/* fall through */
case file_name_t::DELETED:
recv_sys_t::map::iterator r = p++;
- ut_d(r->second.log.free(););
+ r->second.log.clear();
recv_sys.pages.erase(r);
continue;
}
@@ -3341,6 +3372,8 @@ next:
}
if (err != DB_SUCCESS) {
+func_exit:
+ mutex_exit(&recv_sys.mutex);
return(err);
}
@@ -3375,7 +3408,8 @@ next:
missing_tablespace = false;
}
- return DB_SUCCESS;
+ err = DB_SUCCESS;
+ goto func_exit;
}
/** Check if all tablespaces were found for crash recovery.