summaryrefslogtreecommitdiff
path: root/storage/innobase/include/buf0buf.h
diff options
context:
space:
mode:
Diffstat (limited to 'storage/innobase/include/buf0buf.h')
-rw-r--r--storage/innobase/include/buf0buf.h619
1 files changed, 258 insertions, 361 deletions
diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h
index fce5702c171..20a76992b7d 100644
--- a/storage/innobase/include/buf0buf.h
+++ b/storage/innobase/include/buf0buf.h
@@ -73,22 +73,6 @@ extern my_bool buf_disable_resize_buffer_pool_debug; /*!< if TRUE, resizing
buffer pool is not allowed. */
# endif /* UNIV_DEBUG */
-/** buf_page_t::state() values, distinguishing buf_page_t and buf_block_t */
-enum buf_page_state
-{
- /** available in buf_pool.free or buf_pool.watch */
- BUF_BLOCK_NOT_USED,
- /** allocated for something else than a file page */
- BUF_BLOCK_MEMORY,
- /** a previously allocated file page, in transit to NOT_USED */
- BUF_BLOCK_REMOVE_HASH,
- /** a buf_block_t that is also in buf_pool.LRU */
- BUF_BLOCK_FILE_PAGE,
- /** the buf_page_t of a ROW_FORMAT=COMPRESSED page
- whose uncompressed page frame has been evicted */
- BUF_BLOCK_ZIP_PAGE
-};
-
/** This structure defines information we will fetch from each buffer pool. It
will be used to print table IO stats */
struct buf_pool_info_t
@@ -170,12 +154,9 @@ operator<<(
#ifndef UNIV_INNOCHECKSUM
# define buf_pool_get_curr_size() srv_buf_pool_curr_size
-# define buf_page_alloc_descriptor() \
- static_cast<buf_page_t*>(ut_zalloc_nokey(sizeof(buf_page_t)))
-# define buf_page_free_descriptor(bpage) ut_free(bpage)
/** Allocate a buffer block.
-@return own: the allocated block, in state BUF_BLOCK_MEMORY */
+@return own: the allocated block, state()==MEMORY */
inline buf_block_t *buf_block_alloc();
/********************************************************************//**
Frees a buffer block which does not contain a file page. */
@@ -200,17 +181,14 @@ error-prone programming not to set a latch, and it should be used
with care. */
#define buf_page_get_with_no_latch(ID, SIZE, MTR) \
buf_page_get_gen(ID, SIZE, RW_NO_LATCH, NULL, BUF_GET_NO_LATCH, MTR)
-/********************************************************************//**
-This is the general function used to get optimistic access to a database
-page.
-@return TRUE if success */
-ibool
-buf_page_optimistic_get(
-/*====================*/
- ulint rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH */
- buf_block_t* block, /*!< in: guessed block */
- ib_uint64_t modify_clock,/*!< in: modify clock value */
- mtr_t* mtr); /*!< in: mini-transaction */
+/** Try to acquire a page latch.
+@param rw_latch RW_S_LATCH or RW_X_LATCH
+@param block guessed block
+@param modify_clock expected value of block->modify_clock
+@param mtr mini-transaction
+@return whether the latch was acquired (the page is an allocated file page) */
+bool buf_page_optimistic_get(ulint rw_latch, buf_block_t *block,
+ uint64_t modify_clock, mtr_t *mtr);
/** Try to S-latch a page.
Suitable for using when holding the lock_sys latches (as it avoids deadlock).
@@ -222,15 +200,15 @@ buf_block_t *buf_page_try_get(const page_id_t page_id, mtr_t *mtr);
/** Get read access to a compressed page (usually of type
FIL_PAGE_TYPE_ZBLOB or FIL_PAGE_TYPE_ZBLOB2).
-The page must be released with buf_page_release_zip().
+The page must be released with unfix().
NOTE: the page is not protected by any latch. Mutual exclusion has to
be implemented at a higher level. In other words, all possible
accesses to a given page through this function must be protected by
the same set of mutexes or latches.
-@param[in] page_id page id
-@param[in] zip_size ROW_FORMAT=COMPRESSED page size
-@return pointer to the block */
-buf_page_t* buf_page_get_zip(const page_id_t page_id, ulint zip_size);
+@param page_id page identifier
+@param zip_size ROW_FORMAT=COMPRESSED page size in bytes
+@return pointer to the block, s-latched */
+buf_page_t *buf_page_get_zip(const page_id_t page_id, ulint zip_size);
/** Get access to a database page. Buffered redo log may be applied.
@param[in] page_id page id
@@ -282,8 +260,8 @@ buf_page_get_low(
/** Initialize a page in the buffer pool. The page is usually not read
from a file even if it cannot be found in the buffer buf_pool. This is one
-of the functions which perform to a block a state transition NOT_USED =>
-FILE_PAGE (the other is buf_page_get_gen).
+of the functions which perform to a block a state transition NOT_USED => LRU
+(the other is buf_page_get_low()).
@param[in,out] space space object
@param[in] offset offset of the tablespace
@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
@@ -305,22 +283,6 @@ buf_block_t*
buf_page_create_deferred(uint32_t space_id, ulint zip_size, mtr_t *mtr,
buf_block_t *free_block);
-/********************************************************************//**
-Releases a compressed-only page acquired with buf_page_get_zip(). */
-UNIV_INLINE
-void
-buf_page_release_zip(
-/*=================*/
- buf_page_t* bpage); /*!< in: buffer block */
-/********************************************************************//**
-Releases a latch, if specified. */
-UNIV_INLINE
-void
-buf_page_release_latch(
-/*=====================*/
- buf_block_t* block, /*!< in: buffer block */
- ulint rw_latch); /*!< in: RW_S_LATCH, RW_X_LATCH,
- RW_NO_LATCH */
/** Move a block to the start of the LRU list. */
void buf_page_make_young(buf_page_t *bpage);
/** Mark the page status as FREED for the given tablespace and page number.
@@ -371,13 +333,6 @@ ib_uint64_t
buf_block_get_modify_clock(
/*=======================*/
buf_block_t* block); /*!< in: block */
-/*******************************************************************//**
-Increments the bufferfix count. */
-# define buf_block_buf_fix_inc(block) (block)->fix()
-
-/*******************************************************************//**
-Decrements the bufferfix count. */
-# define buf_block_buf_fix_dec(block) (block)->unfix()
#endif /* !UNIV_INNOCHECKSUM */
/** Check if a buffer is all zeroes.
@@ -516,19 +471,7 @@ void buf_pool_invalidate();
--------------------------- LOWER LEVEL ROUTINES -------------------------
=========================================================================*/
-#ifdef UNIV_DEBUG
-/*********************************************************************//**
-Gets a pointer to the memory frame of a block.
-@return pointer to the frame */
-UNIV_INLINE
-buf_frame_t*
-buf_block_get_frame(
-/*================*/
- const buf_block_t* block) /*!< in: pointer to the control block */
- MY_ATTRIBUTE((warn_unused_result));
-#else /* UNIV_DEBUG */
-# define buf_block_get_frame(block) (block)->frame
-#endif /* UNIV_DEBUG */
+#define buf_block_get_frame(block) (block)->page.frame
/*********************************************************************//**
Gets the compressed page descriptor corresponding to an uncompressed page
@@ -541,18 +484,8 @@ if applicable. */
/** Monitor the buffer page read/write activity, and increment corresponding
counter value in MONITOR_MODULE_BUF_PAGE.
@param bpage buffer page whose read or write was completed
-@param io_type BUF_IO_READ or BUF_IO_WRITE */
-ATTRIBUTE_COLD __attribute__((nonnull))
-void buf_page_monitor(const buf_page_t *bpage, buf_io_fix io_type);
-
-/** Complete a read request of a file page to buf_pool.
-@param bpage recently read page
-@param node data file
-@return whether the operation succeeded
-@retval DB_SUCCESS always when writing, or if a read page was OK
-@retval DB_PAGE_CORRUPTED if the checksum fails on a page read
-@retval DB_DECRYPTION_FAILED if the page cannot be decrypted */
-dberr_t buf_page_read_complete(buf_page_t *bpage, const fil_node_t &node);
+@param read true=read, false=write */
+ATTRIBUTE_COLD void buf_page_monitor(const buf_page_t &bpage, bool read);
/** Calculate aligned buffer pool size based on srv_buf_pool_chunk_unit,
if needed.
@@ -622,6 +555,7 @@ class buf_page_t
{
friend buf_pool_t;
friend buf_block_t;
+
/** @name General fields */
/* @{ */
@@ -629,10 +563,9 @@ public: // FIXME: fix fil_iterate()
/** Page id. Protected by buf_pool.page_hash.lock_get() when
the page is in buf_pool.page_hash. */
page_id_t id_;
+ /** buf_pool.page_hash link; protected by buf_pool.page_hash.lock_get() */
+ buf_page_t *hash;
private:
- /** Count of how manyfold this block is currently bufferfixed. */
- Atomic_counter<uint32_t> buf_fix_count_;
-
/** log sequence number of the START of the log entry written of the
oldest modification to this block which has not yet been written
to the data file;
@@ -643,49 +576,64 @@ private:
(because id().space() is the temporary tablespace). */
Atomic_relaxed<lsn_t> oldest_modification_;
- /** type of pending I/O operation; protected by buf_pool.mutex
- if in_LRU_list */
- Atomic_relaxed<buf_io_fix> io_fix_;
- /** Block state. @see in_file().
- State transitions between in_file() states and to
- BUF_BLOCK_REMOVE_HASH are protected by buf_pool.page_hash.lock_get()
- when the block is in buf_pool.page_hash.
- Other transitions when in_LRU_list are protected by buf_pool.mutex. */
- buf_page_state state_;
-
public:
- /** buf_pool.page_hash link; protected by buf_pool.page_hash.lock_get() */
- buf_page_t *hash;
+ /** state() of unused block (in buf_pool.free list) */
+ static constexpr uint32_t NOT_USED= 0;
+ /** state() of block allocated as general-purpose memory */
+ static constexpr uint32_t MEMORY= 1;
+ /** state() of block that is being freed */
+ static constexpr uint32_t REMOVE_HASH= 2;
+ /** smallest state() of a buffer page that is freed in the tablespace */
+ static constexpr uint32_t FREED= 3;
+ /** smallest state() for a block that belongs to buf_pool.LRU */
+ static constexpr uint32_t UNFIXED= 1U << 29;
+ /** smallest state() of a block for which buffered changes may exist */
+ static constexpr uint32_t IBUF_EXIST= 2U << 29;
+ /** smallest state() of a (re)initialized page (no doublewrite needed) */
+ static constexpr uint32_t REINIT= 3U << 29;
+ /** smallest state() for an io-fixed block */
+ static constexpr uint32_t READ_FIX= 4U << 29;
+ /** smallest state() for a write-fixed block */
+ static constexpr uint32_t WRITE_FIX= 5U << 29;
+ /** smallest state() for a write-fixed block with buffered changes */
+ static constexpr uint32_t WRITE_FIX_IBUF= 6U << 29;
+ /** smallest state() for a write-fixed block (no doublewrite was used) */
+ static constexpr uint32_t WRITE_FIX_REINIT= 7U << 29;
+ /** buf_pool.LRU status mask in state() */
+ static constexpr uint32_t LRU_MASK= 7U << 29;
+
+ /** lock covering the contents of frame */
+ block_lock lock;
+ /** pointer to aligned, uncompressed page frame of innodb_page_size */
+ byte *frame;
/* @} */
- page_zip_des_t zip; /*!< compressed page; zip.data
- (but not the data it points to) is
- also protected by buf_pool.mutex;
- state == BUF_BLOCK_ZIP_PAGE and
- zip.data == NULL means an active
- buf_pool.watch */
+ /** ROW_FORMAT=COMPRESSED page; zip.data (but not the data it points to)
+ is also protected by buf_pool.mutex;
+ !frame && !zip.data means an active buf_pool.watch */
+ page_zip_des_t zip;
#ifdef UNIV_DEBUG
/** whether this->list is in buf_pool.zip_hash; protected by buf_pool.mutex */
bool in_zip_hash;
- /** whether this->LRU is in buf_pool.LRU (in_file() holds);
+ /** whether this->LRU is in buf_pool.LRU (in_file());
protected by buf_pool.mutex */
bool in_LRU_list;
- /** whether this is in buf_pool.page_hash (in_file() holds);
+ /** whether this is in buf_pool.page_hash (in_file());
protected by buf_pool.mutex */
bool in_page_hash;
- /** whether this->list is in buf_pool.free (state() == BUF_BLOCK_NOT_USED);
+ /** whether this->list is in buf_pool.free (state() == NOT_USED);
protected by buf_pool.flush_list_mutex */
bool in_free_list;
#endif /* UNIV_DEBUG */
/** list member in one of the lists of buf_pool; protected by
buf_pool.mutex or buf_pool.flush_list_mutex
- state() == BUF_BLOCK_NOT_USED: buf_pool.free or buf_pool.withdraw
+ state() == NOT_USED: buf_pool.free or buf_pool.withdraw
in_file() && oldest_modification():
buf_pool.flush_list (protected by buf_pool.flush_list_mutex)
The contents is undefined if in_file() && !oldest_modification(),
- or if state() is BUF_BLOCK_MEMORY or BUF_BLOCK_REMOVE_HASH. */
+ or if state() == MEMORY or state() == REMOVE_HASH. */
UT_LIST_NODE_T(buf_page_t) list;
/** @name LRU replacement algorithm fields.
@@ -709,7 +657,7 @@ public:
0 if the block was never accessed
in the buffer pool.
- For state==BUF_BLOCK_MEMORY
+ For state() == MEMORY
blocks, this field can be repurposed
for something else.
@@ -717,88 +665,126 @@ public:
and bytes allocated for recv_sys.pages,
the field is protected by
recv_sys_t::mutex. */
- /** Change buffer entries for the page exist.
- Protected by io_fix()==BUF_IO_READ or by buf_block_t::lock. */
- bool ibuf_exist;
-
- /** Block initialization status. Can be modified while holding io_fix()
- or buf_block_t::lock X-latch */
- enum {
- /** the page was read normally and should be flushed normally */
- NORMAL = 0,
- /** the page was (re)initialized, and the doublewrite buffer can be
- skipped on the next flush */
- INIT_ON_FLUSH,
- /** the page was freed and need to be flushed.
- For page_compressed, page flush will punch a hole to free space.
- Else if innodb_immediate_scrub_data_uncompressed, the page will
- be overwritten with zeroes. */
- FREED
- } status;
-
- buf_page_t() : id_(0)
+ buf_page_t() : id_{0}
{
- static_assert(BUF_BLOCK_NOT_USED == 0, "compatibility");
+ static_assert(NOT_USED == 0, "compatibility");
memset((void*) this, 0, sizeof *this);
}
- /** Initialize some fields */
- void init()
+ buf_page_t(const buf_page_t &b) :
+ id_(b.id_), hash(b.hash),
+ oldest_modification_(b.oldest_modification_),
+ lock() /* not copied */,
+ frame(b.frame), zip(b.zip),
+#ifdef UNIV_DEBUG
+ in_zip_hash(b.in_zip_hash), in_LRU_list(b.in_LRU_list),
+ in_page_hash(b.in_page_hash), in_free_list(b.in_free_list),
+#endif /* UNIV_DEBUG */
+ list(b.list), LRU(b.LRU), old(b.old), freed_page_clock(b.freed_page_clock),
+ access_time(b.access_time)
{
- io_fix_= BUF_IO_NONE;
- buf_fix_count_= 0;
- old= 0;
- freed_page_clock= 0;
- access_time= 0;
+ lock.init();
+ }
+
+ /** Initialize some more fields */
+ void init(uint32_t state, page_id_t id)
+ {
+ ut_ad(state < REMOVE_HASH || state >= UNFIXED);
+ id_= id;
+ zip.fix= state;
oldest_modification_= 0;
- ibuf_exist= false;
- status= NORMAL;
+ lock.init();
ut_d(in_zip_hash= false);
ut_d(in_free_list= false);
ut_d(in_LRU_list= false);
ut_d(in_page_hash= false);
+ old= 0;
+ freed_page_clock= 0;
+ access_time= 0;
}
- /** Initialize some more fields */
- void init(buf_page_state state, page_id_t id, uint32_t buf_fix_count= 0)
+public:
+ const page_id_t &id() const { return id_; }
+ uint32_t state() const { return zip.fix; }
+ uint32_t buf_fix_count() const
{
- init();
- state_= state;
- id_= id;
- buf_fix_count_= buf_fix_count;
+ uint32_t f= state();
+ ut_ad(f >= FREED);
+ return f < UNFIXED ? (f - FREED) : (~LRU_MASK & f);
}
+ /** @return whether this block is read or write fixed;
+ read_complete() or write_complete() will always release
+ the io-fix before releasing U-lock or X-lock */
+ bool is_io_fixed() const
+ { const auto s= state(); ut_ad(s >= FREED); return s >= READ_FIX; }
+ /** @return whether this block is write fixed;
+ write_complete() will always release the write-fix before releasing U-lock */
+ bool is_write_fixed() const { return state() >= WRITE_FIX; }
+ /** @return whether this block is read fixed; this should never hold
+ when a thread is holding the block lock in any mode */
+ bool is_read_fixed() const { return is_io_fixed() && !is_write_fixed(); }
- /** Initialize some more fields */
- void init(page_id_t id, uint32_t buf_fix_count= 0)
+ /** @return if this belongs to buf_pool.unzip_LRU */
+ bool belongs_to_unzip_LRU() const
+ { return UNIV_LIKELY_NULL(zip.data) && frame; }
+
+ bool is_freed() const
+ { const auto s= state(); ut_ad(s >= FREED); return s < UNFIXED; }
+ bool is_ibuf_exist() const
{
- init();
- id_= id;
- buf_fix_count_= buf_fix_count;
- hash= nullptr;
+ const auto s= state();
+ ut_ad(s >= UNFIXED);
+ ut_ad(s < READ_FIX);
+ return (s & LRU_MASK) == IBUF_EXIST;
}
+ bool is_reinit() const { return !(~state() & REINIT); }
-public:
- const page_id_t &id() const { return id_; }
- buf_page_state state() const { return state_; }
- uint32_t buf_fix_count() const { return buf_fix_count_; }
- buf_io_fix io_fix() const { return io_fix_; }
- void io_unfix()
+ void set_reinit(uint32_t prev_state)
{
- ut_d(const auto old_io_fix= io_fix());
- ut_ad(old_io_fix == BUF_IO_READ || old_io_fix == BUF_IO_PIN);
- io_fix_= BUF_IO_NONE;
+ ut_ad(prev_state < READ_FIX);
+ ut_d(const auto s=) zip.fix.fetch_add(REINIT - prev_state);
+ ut_ad(s > prev_state);
+ ut_ad(s < prev_state + UNFIXED);
}
- /** @return if this belongs to buf_pool.unzip_LRU */
- bool belongs_to_unzip_LRU() const
+ void set_ibuf_exist()
+ {
+ ut_ad(lock.is_write_locked());
+ ut_ad(id() < page_id_t(SRV_SPACE_ID_UPPER_BOUND, 0));
+ const auto s= state();
+ ut_ad(s >= UNFIXED);
+ ut_ad(s < READ_FIX);
+ ut_ad(s < IBUF_EXIST || s >= REINIT);
+ zip.fix.fetch_add(IBUF_EXIST - (LRU_MASK & s));
+ }
+ void clear_ibuf_exist()
+ {
+ ut_ad(lock.is_write_locked());
+ ut_ad(id() < page_id_t(SRV_SPACE_ID_UPPER_BOUND, 0));
+ ut_d(const auto s=) zip.fix.fetch_sub(IBUF_EXIST - UNFIXED);
+ ut_ad(s >= IBUF_EXIST);
+ ut_ad(s < REINIT);
+ }
+
+ void read_unfix(uint32_t s)
{
- return zip.data && state() != BUF_BLOCK_ZIP_PAGE;
+ ut_ad(lock.is_write_locked());
+ ut_ad(s == UNFIXED + 1 || s == IBUF_EXIST + 1 || s == REINIT + 1);
+ ut_d(auto old_state=) zip.fix.fetch_add(s - READ_FIX);
+ ut_ad(old_state >= READ_FIX);
+ ut_ad(old_state < WRITE_FIX);
}
- inline void add_buf_fix_count(uint32_t count);
- inline void set_buf_fix_count(uint32_t count);
- inline void set_state(buf_page_state state);
- inline void set_io_fix(buf_io_fix io_fix);
+ void set_freed(uint32_t prev_state, uint32_t count= 0)
+ {
+ ut_ad(lock.is_write_locked());
+ ut_ad(prev_state >= UNFIXED);
+ ut_ad(prev_state < READ_FIX);
+ ut_d(auto s=) zip.fix.fetch_sub((prev_state & LRU_MASK) - FREED - count);
+ ut_ad(!((prev_state ^ s) & LRU_MASK));
+ }
+
+ inline void set_state(uint32_t s);
inline void set_corrupt_id();
/** @return the log sequence number of the oldest pending modification
@@ -818,15 +804,35 @@ public:
inline void set_oldest_modification(lsn_t lsn);
/** Clear oldest_modification after removing from buf_pool.flush_list */
inline void clear_oldest_modification();
+ /** Reset the oldest_modification when marking a persistent page freed */
+ void reset_oldest_modification()
+ {
+ ut_ad(oldest_modification() > 2);
+ oldest_modification_.store(1, std::memory_order_release);
+ }
+
+ /** Complete a read of a page.
+ @param node data file
+ @return whether the operation succeeded
+ @retval DB_PAGE_CORRUPTED if the checksum fails
+ @retval DB_DECRYPTION_FAILED if the page cannot be decrypted */
+ dberr_t read_complete(const fil_node_t &node);
+
/** Note that a block is no longer dirty, while not removing
it from buf_pool.flush_list */
- inline void clear_oldest_modification(bool temporary);
+ inline void write_complete(bool temporary);
+
+ /** Write a flushable page to a file. buf_pool.mutex must be held.
+ @param lru true=buf_pool.LRU; false=buf_pool.flush_list
+ @param space tablespace
+ @return whether the page was flushed and buf_pool.mutex was released */
+ inline bool flush(bool lru, fil_space_t *space);
/** Notify that a page in a temporary tablespace has been modified. */
void set_temp_modified()
{
ut_ad(fsp_is_system_temporary(id().space()));
- ut_ad(state() == BUF_BLOCK_FILE_PAGE);
+ ut_ad(in_file());
ut_ad(!oldest_modification());
oldest_modification_= 2;
}
@@ -834,19 +840,35 @@ public:
/** Prepare to release a file page to buf_pool.free. */
void free_file_page()
{
- ut_ad(state() == BUF_BLOCK_REMOVE_HASH);
+ ut_ad((zip.fix.fetch_sub(REMOVE_HASH - MEMORY)) == REMOVE_HASH);
/* buf_LRU_block_free_non_file_page() asserts !oldest_modification() */
ut_d(oldest_modification_= 0;)
- set_corrupt_id();
- ut_d(set_state(BUF_BLOCK_MEMORY));
+ id_= page_id_t(~0ULL);
+ }
+
+ void fix_on_recovery()
+ {
+ ut_d(const auto f=) zip.fix.fetch_sub(READ_FIX - UNFIXED - 1);
+ ut_ad(f >= READ_FIX);
+ ut_ad(f < WRITE_FIX);
+ }
+
+ uint32_t fix(uint32_t count= 1)
+ {
+ ut_ad(count);
+ ut_ad(count < IBUF_EXIST);
+ uint32_t f= zip.fix.fetch_add(count);
+ ut_ad(f >= FREED);
+ ut_ad(!((f ^ (f + 1)) & LRU_MASK));
+ return f;
}
- void fix() { buf_fix_count_++; }
uint32_t unfix()
{
- uint32_t count= buf_fix_count_--;
- ut_ad(count != 0);
- return count - 1;
+ uint32_t f= zip.fix.fetch_sub(1);
+ ut_ad(f > FREED);
+ ut_ad(!((f ^ (f - 1)) & LRU_MASK));
+ return f - 1;
}
/** @return the physical size, in bytes */
@@ -872,27 +894,10 @@ public:
}
/** @return whether the block is mapped to a data file */
- bool in_file() const
- {
- switch (state_) {
- case BUF_BLOCK_ZIP_PAGE:
- case BUF_BLOCK_FILE_PAGE:
- return true;
- case BUF_BLOCK_NOT_USED:
- case BUF_BLOCK_MEMORY:
- case BUF_BLOCK_REMOVE_HASH:
- return false;
- }
-
- ut_error;
- return false;
- }
+ bool in_file() const { return state() >= FREED; }
/** @return whether the block is modified and ready for flushing */
inline bool ready_for_flush() const;
- /** @return whether the state can be changed to BUF_BLOCK_NOT_USED */
- bool ready_for_replace() const
- { return !oldest_modification() && can_relocate(); }
/** @return whether the block can be relocated in memory.
The block can be dirty, but it must not be I/O-fixed or bufferfixed. */
inline bool can_relocate() const;
@@ -924,27 +929,18 @@ struct buf_block_t{
be the first field, so that
buf_pool.page_hash can point
to buf_page_t or buf_block_t */
- byte* frame; /*!< pointer to buffer frame which
- is of size srv_page_size, and
- aligned to an address divisible by
- srv_page_size */
- /** read-write lock covering frame */
- block_lock lock;
#ifdef UNIV_DEBUG
/** whether page.list is in buf_pool.withdraw
- ((state() == BUF_BLOCK_NOT_USED)) and the buffer pool is being shrunk;
+ ((state() == NOT_USED)) and the buffer pool is being shrunk;
protected by buf_pool.mutex */
bool in_withdraw_list;
/** whether unzip_LRU is in buf_pool.unzip_LRU
- (state() == BUF_BLOCK_FILE_PAGE and zip.data != nullptr);
+ (in_file() && frame && zip.data);
protected by buf_pool.mutex */
bool in_unzip_LRU_list;
#endif
- UT_LIST_NODE_T(buf_block_t) unzip_LRU;
- /*!< node of the decompressed LRU list;
- a block is in the unzip_LRU list
- if page.state() == BUF_BLOCK_FILE_PAGE
- and page.zip.data != NULL */
+ /** member of buf_pool.unzip_LRU (if belongs_to_unzip_LRU()) */
+ UT_LIST_NODE_T(buf_block_t) unzip_LRU;
/* @} */
/** @name Optimistic search field */
/* @{ */
@@ -983,17 +979,15 @@ struct buf_block_t{
These 5 fields may only be modified when:
we are holding the appropriate x-latch in btr_search_latches[], and
one of the following holds:
- (1) the block state is BUF_BLOCK_FILE_PAGE, and
- we are holding an s-latch or x-latch on buf_block_t::lock, or
- (2) buf_block_t::buf_fix_count == 0, or
- (3) the block state is BUF_BLOCK_REMOVE_HASH.
+ (1) in_file(), and we are holding lock in any mode, or
+ (2) !is_read_fixed()&&(state()>=UNFIXED||state()==REMOVE_HASH).
An exception to this is when we init or create a page
in the buffer pool in buf0buf.cc.
Another exception for buf_pool_t::clear_hash_index() is that
assigning block->index = NULL (and block->n_pointers = 0)
- is allowed whenever btr_search_own_all(RW_LOCK_X).
+ is allowed whenever all AHI latches are exclusively locked.
Another exception is that ha_insert_for_fold() may
decrement n_pointers without holding the appropriate latch
@@ -1002,8 +996,8 @@ struct buf_block_t{
This implies that the fields may be read without race
condition whenever any of the following hold:
- - the btr_search_latches[] s-latch or x-latch is being held, or
- - the block state is not BUF_BLOCK_FILE_PAGE or BUF_BLOCK_REMOVE_HASH,
+ - the btr_search_sys.partition[].latch is being held, or
+ - state() == NOT_USED || state() == MEMORY,
and holding some latch prevents the state from changing to that.
Some use of assert_block_ahi_empty() or assert_block_ahi_valid()
@@ -1017,9 +1011,7 @@ struct buf_block_t{
Atomic_counter<ulint>
n_pointers; /*!< used in debugging: the number of
pointers in the adaptive hash index
- pointing to this frame;
- protected by atomic memory access
- or btr_search_own_all(). */
+ pointing to this frame */
# define assert_block_ahi_empty(block) \
ut_a((block)->n_pointers == 0)
# define assert_block_ahi_empty_on_init(block) do { \
@@ -1054,13 +1046,7 @@ struct buf_block_t{
# define assert_block_ahi_valid(block) /* nothing */
#endif /* BTR_CUR_HASH_ADAPT */
void fix() { page.fix(); }
- uint32_t unfix()
- {
- ut_ad(page.buf_fix_count() || page.io_fix() != BUF_IO_NONE ||
- page.state() == BUF_BLOCK_ZIP_PAGE ||
- !lock.have_any());
- return page.unfix();
- }
+ uint32_t unfix() { return page.unfix(); }
/** @return the physical size, in bytes */
ulint physical_size() const { return page.physical_size(); }
@@ -1072,15 +1058,15 @@ struct buf_block_t{
/** Initialize the block.
@param page_id page identifier
@param zip_size ROW_FORMAT=COMPRESSED page size, or 0
- @param fix initial buf_fix_count() */
- void initialise(const page_id_t page_id, ulint zip_size, uint32_t fix= 0);
+ @param state initial state() */
+ void initialise(const page_id_t page_id, ulint zip_size, uint32_t state);
};
/**********************************************************************//**
Compute the hash fold value for blocks in buf_pool.zip_hash. */
/* @{ */
#define BUF_POOL_ZIP_FOLD_PTR(ptr) (ulint(ptr) >> srv_page_size_shift)
-#define BUF_POOL_ZIP_FOLD(b) BUF_POOL_ZIP_FOLD_PTR((b)->frame)
+#define BUF_POOL_ZIP_FOLD(b) BUF_POOL_ZIP_FOLD_PTR((b)->page.frame)
#define BUF_POOL_ZIP_FOLD_BPAGE(b) BUF_POOL_ZIP_FOLD((buf_block_t*) (b))
/* @} */
@@ -1276,7 +1262,7 @@ class buf_pool_t
size_t mem_size() const { return mem_pfx.m_size; }
/** Register the chunk */
- void reg() { map_reg->emplace(map::value_type(blocks->frame, this)); }
+ void reg() { map_reg->emplace(map::value_type(blocks->page.frame, this)); }
/** Allocate a chunk of buffer frames.
@param bytes requested size
@@ -1368,7 +1354,7 @@ public:
}
/** Determine whether a frame is intended to be withdrawn during resize().
- @param ptr pointer within a buf_block_t::frame
+ @param ptr pointer within a buf_page_t::frame
@return whether the frame will be withdrawn */
bool will_be_withdrawn(const byte *ptr) const
{
@@ -1381,8 +1367,8 @@ public:
for (const chunk_t *chunk= chunks + n_chunks_new,
* const echunk= chunks + n_chunks;
chunk != echunk; chunk++)
- if (ptr >= chunk->blocks->frame &&
- ptr < (chunk->blocks + chunk->size - 1)->frame + srv_page_size)
+ if (ptr >= chunk->blocks->page.frame &&
+ ptr < (chunk->blocks + chunk->size - 1)->page.frame + srv_page_size)
return true;
return false;
}
@@ -1486,17 +1472,11 @@ public:
buf_page_t *bpage= page_hash.get(page_id, chain);
if (bpage >= &watch[0] && bpage < &watch[UT_ARR_SIZE(watch)])
{
- ut_ad(bpage->state() == BUF_BLOCK_ZIP_PAGE);
ut_ad(!bpage->in_zip_hash);
ut_ad(!bpage->zip.data);
if (!allow_watch)
bpage= nullptr;
}
- else if (bpage)
- {
- ut_ad(page_id == bpage->id());
- ut_ad(bpage->in_file());
- }
return bpage;
}
@@ -1510,15 +1490,9 @@ public:
page_hash.lock_get(page_hash.cell_get(bpage.id().fold())).
is_locked());
#endif /* SAFE_MUTEX */
- if (&bpage < &watch[0] || &bpage >= &watch[UT_ARR_SIZE(watch)])
- {
- ut_ad(bpage.state() == BUF_BLOCK_ZIP_PAGE
- ? !!bpage.zip.data
- : bpage.state() == BUF_BLOCK_FILE_PAGE);
+ ut_ad(bpage.in_file());
+ if (&bpage < &watch[0] || &bpage >= &watch[array_elements(watch)])
return false;
- }
-
- ut_ad(bpage.state() == BUF_BLOCK_ZIP_PAGE);
ut_ad(!bpage.in_zip_hash);
ut_ad(!bpage.zip.data);
return true;
@@ -1818,7 +1792,7 @@ public:
}
// n_flush_LRU() + n_flush_list()
- // is approximately COUNT(io_fix()==BUF_IO_WRITE) in flush_list
+ // is approximately COUNT(is_write_fixed()) in flush_list
unsigned freed_page_clock;/*!< a sequence number used
to count the number of buffer
@@ -1991,6 +1965,7 @@ inline buf_page_t *buf_pool_t::page_hash_table::get(const page_id_t id,
for (buf_page_t *bpage= chain.first; bpage; bpage= bpage->hash)
{
ut_ad(bpage->in_page_hash);
+ ut_ad(bpage->in_file());
if (bpage->id() == id)
return bpage;
}
@@ -2012,50 +1987,12 @@ inline void page_hash_latch::lock()
}
#endif /* SUX_LOCK_GENERIC */
-inline void buf_page_t::add_buf_fix_count(uint32_t count)
-{
- mysql_mutex_assert_owner(&buf_pool.mutex);
- buf_fix_count_+= count;
-}
-
-inline void buf_page_t::set_buf_fix_count(uint32_t count)
-{
- mysql_mutex_assert_owner(&buf_pool.mutex);
- buf_fix_count_= count;
-}
-
-inline void buf_page_t::set_state(buf_page_state state)
-{
- mysql_mutex_assert_owner(&buf_pool.mutex);
-#ifdef UNIV_DEBUG
- switch (state) {
- case BUF_BLOCK_REMOVE_HASH:
- /* buf_pool_t::corrupted_evict() invokes set_corrupt_id()
- before buf_LRU_free_one_page(), so we cannot assert that
- we are holding the hash_lock. */
- break;
- case BUF_BLOCK_MEMORY:
- break;
- case BUF_BLOCK_NOT_USED:
- break;
- case BUF_BLOCK_ZIP_PAGE:
- if (this >= &buf_pool.watch[0] &&
- this <= &buf_pool.watch[UT_ARR_SIZE(buf_pool.watch)])
- break;
- /* fall through */
- case BUF_BLOCK_FILE_PAGE:
- ut_ad(buf_pool.page_hash.lock_get(buf_pool.page_hash.cell_get(id_.fold())).
- is_write_locked());
- break;
- }
-#endif
- state_= state;
-}
-
-inline void buf_page_t::set_io_fix(buf_io_fix io_fix)
+inline void buf_page_t::set_state(uint32_t s)
{
mysql_mutex_assert_owner(&buf_pool.mutex);
- io_fix_= io_fix;
+ ut_ad(s <= REMOVE_HASH || s >= UNFIXED);
+ ut_ad(s <= READ_FIX);
+ zip.fix= s;
}
inline void buf_page_t::set_corrupt_id()
@@ -2072,17 +2009,12 @@ inline void buf_page_t::set_corrupt_id()
default:
ut_ad("block is dirty" == 0);
}
- switch (state()) {
- case BUF_BLOCK_REMOVE_HASH:
- break;
- case BUF_BLOCK_ZIP_PAGE:
- case BUF_BLOCK_FILE_PAGE:
+ const auto f= state();
+ if (f != REMOVE_HASH)
+ {
+ ut_ad(f >= UNFIXED);
ut_ad(buf_pool.page_hash.lock_get(buf_pool.page_hash.cell_get(id_.fold())).
is_write_locked());
- break;
- case BUF_BLOCK_NOT_USED:
- case BUF_BLOCK_MEMORY:
- ut_ad("invalid state" == 0);
}
#endif
id_= page_id_t(~0ULL);
@@ -2101,9 +2033,8 @@ inline void buf_page_t::set_oldest_modification(lsn_t lsn)
inline void buf_page_t::clear_oldest_modification()
{
mysql_mutex_assert_owner(&buf_pool.flush_list_mutex);
- ut_d(const auto state= state_);
- ut_ad(state == BUF_BLOCK_FILE_PAGE || state == BUF_BLOCK_ZIP_PAGE ||
- state == BUF_BLOCK_REMOVE_HASH);
+ ut_d(const auto s= state());
+ ut_ad(s >= REMOVE_HASH);
ut_ad(oldest_modification());
ut_ad(!list.prev);
ut_ad(!list.next);
@@ -2113,36 +2044,15 @@ inline void buf_page_t::clear_oldest_modification()
oldest_modification_.store(0, std::memory_order_release);
}
-/** Note that a block is no longer dirty, while not removing
-it from buf_pool.flush_list */
-inline void buf_page_t::clear_oldest_modification(bool temporary)
-{
- ut_ad(temporary == fsp_is_system_temporary(id().space()));
- if (temporary)
- {
- ut_ad(oldest_modification() == 2);
- oldest_modification_= 0;
- }
- else
- {
- /* We use release memory order to guarantee that callers of
- oldest_modification_acquire() will observe the block as
- being detached from buf_pool.flush_list, after reading the value 0. */
- ut_ad(oldest_modification() > 2);
- oldest_modification_.store(1, std::memory_order_release);
- }
-}
-
/** @return whether the block is modified and ready for flushing */
inline bool buf_page_t::ready_for_flush() const
{
mysql_mutex_assert_owner(&buf_pool.mutex);
ut_ad(in_LRU_list);
- ut_a(in_file());
- ut_ad(fsp_is_system_temporary(id().space())
- ? oldest_modification() == 2
- : oldest_modification() > 2);
- return io_fix_ == BUF_IO_NONE;
+ const auto s= state();
+ ut_a(s >= FREED);
+ ut_ad(!fsp_is_system_temporary(id().space()) || oldest_modification() == 2);
+ return s < READ_FIX;
}
/** @return whether the block can be relocated in memory.
@@ -2150,9 +2060,11 @@ The block can be dirty, but it must not be I/O-fixed or bufferfixed. */
inline bool buf_page_t::can_relocate() const
{
mysql_mutex_assert_owner(&buf_pool.mutex);
- ut_ad(in_file());
+ const auto f= state();
+ ut_ad(f >= FREED);
ut_ad(in_LRU_list);
- return io_fix_ == BUF_IO_NONE && !buf_fix_count_;
+ return (f == FREED || (f < READ_FIX && !(f & ~LRU_MASK))) &&
+ !lock.is_locked_or_waiting();
}
/** @return whether the block has been flagged old in buf_pool.LRU */
@@ -2213,41 +2125,26 @@ inline void buf_page_t::set_old(bool old)
/**********************************************************************
Let us list the consistency conditions for different control block states.
-NOT_USED: is in free list, not in LRU list, not in flush list, nor
- page hash table
-MEMORY: is not in free list, LRU list, or flush list, nor page
- hash table
-FILE_PAGE: space and offset are defined, is in page hash table
- if io_fix == BUF_IO_WRITE,
- buf_pool.n_flush_LRU() || buf_pool.n_flush_list()
-
- (1) if buf_fix_count == 0, then
- is in LRU list, not in free list
- is in flush list,
- if and only if oldest_modification > 0
- is x-locked,
- if and only if io_fix == BUF_IO_READ
- is s-locked,
- if and only if io_fix == BUF_IO_WRITE
-
- (2) if buf_fix_count > 0, then
- is not in LRU list, not in free list
- is in flush list,
- if and only if oldest_modification > 0
- if io_fix == BUF_IO_READ,
- is x-locked
- if io_fix == BUF_IO_WRITE,
- is s-locked
+NOT_USED: is in free list, not LRU, not flush_list, nor page_hash
+MEMORY: is not in any of free, LRU, flush_list, page_hash
+in_file(): is not in free list, is in LRU list, id() is defined,
+ is in page_hash (not necessarily if is_read_fixed())
+
+ is in buf_pool.flush_list, if and only
+ if oldest_modification == 1 || oldest_modification > 2
+
+ (1) if is_write_fixed(): is u-locked
+ (2) if is_read_fixed(): is x-locked
State transitions:
NOT_USED => MEMORY
-MEMORY => FILE_PAGE
MEMORY => NOT_USED
-FILE_PAGE => NOT_USED NOTE: This transition is allowed if and only if
- (1) buf_fix_count == 0,
- (2) oldest_modification == 0, and
- (3) io_fix == 0.
+MEMORY => UNFIXED
+UNFIXED => in_file()
+in_file() => UNFIXED or FREED
+UNFIXED or FREED => REMOVE_HASH
+REMOVE_HASH => NOT_USED (if and only if !oldest_modification())
*/
/** Select from where to start a scan. If we have scanned