summaryrefslogtreecommitdiff
path: root/storage/innobase/buf/buf0lru.c
diff options
context:
space:
mode:
Diffstat (limited to 'storage/innobase/buf/buf0lru.c')
-rw-r--r--storage/innobase/buf/buf0lru.c1052
1 files changed, 1052 insertions, 0 deletions
diff --git a/storage/innobase/buf/buf0lru.c b/storage/innobase/buf/buf0lru.c
new file mode 100644
index 00000000000..18c4f8c10fb
--- /dev/null
+++ b/storage/innobase/buf/buf0lru.c
@@ -0,0 +1,1052 @@
+/******************************************************
+The database buffer replacement algorithm
+
+(c) 1995 Innobase Oy
+
+Created 11/5/1995 Heikki Tuuri
+*******************************************************/
+
+#include "buf0lru.h"
+
+#ifdef UNIV_NONINL
+#include "buf0lru.ic"
+#include "srv0srv.h" /* Needed to getsrv_print_innodb_monitor */
+#endif
+
+#include "ut0byte.h"
+#include "ut0lst.h"
+#include "ut0rnd.h"
+#include "sync0sync.h"
+#include "sync0rw.h"
+#include "hash0hash.h"
+#include "os0sync.h"
+#include "fil0fil.h"
+#include "btr0btr.h"
+#include "buf0buf.h"
+#include "buf0flu.h"
+#include "buf0rea.h"
+#include "btr0sea.h"
+#include "os0file.h"
+#include "log0recv.h"
+
+/* The number of blocks from the LRU_old pointer onward, including the block
+pointed to, must be 3/8 of the whole LRU list length, except that the
+tolerance defined below is allowed. Note that the tolerance must be small
+enough such that for even the BUF_LRU_OLD_MIN_LEN long LRU list, the
+LRU_old pointer is not allowed to point to either end of the LRU list. */
+
+#define BUF_LRU_OLD_TOLERANCE 20
+
+/* The whole LRU list length is divided by this number to determine an
+initial segment in buf_LRU_get_recent_limit */
+
+#define BUF_LRU_INITIAL_RATIO 8
+
+/* If we switch on the InnoDB monitor because there are too few available
+frames in the buffer pool, we set this to TRUE */
+ibool buf_lru_switched_on_innodb_mon = FALSE;
+
+/**********************************************************************
+Takes a block out of the LRU list and page hash table and sets the block
+state to BUF_BLOCK_REMOVE_HASH. */
+static
+void
+buf_LRU_block_remove_hashed_page(
+/*=============================*/
+ buf_block_t* block); /* in: block, must contain a file page and
+ be in a state where it can be freed; there
+ may or may not be a hash index to the page */
+/**********************************************************************
+Puts a file page whose has no hash index to the free list. */
+static
+void
+buf_LRU_block_free_hashed_page(
+/*===========================*/
+ buf_block_t* block); /* in: block, must contain a file page and
+ be in a state where it can be freed */
+
+/**********************************************************************
+Invalidates all pages belonging to a given tablespace when we are deleting
+the data file(s) of that tablespace. */
+
+void
+buf_LRU_invalidate_tablespace(
+/*==========================*/
+ ulint id) /* in: space id */
+{
+ buf_block_t* block;
+ ulint page_no;
+ ibool all_freed;
+
+scan_again:
+ mutex_enter(&(buf_pool->mutex));
+
+ all_freed = TRUE;
+
+ block = UT_LIST_GET_LAST(buf_pool->LRU);
+
+ while (block != NULL) {
+ ut_a(block->state == BUF_BLOCK_FILE_PAGE);
+
+ if (block->space == id
+ && (block->buf_fix_count > 0 || block->io_fix != 0)) {
+
+ /* We cannot remove this page during this scan yet;
+ maybe the system is currently reading it in, or
+ flushing the modifications to the file */
+
+ all_freed = FALSE;
+
+ goto next_page;
+ }
+
+ if (block->space == id) {
+#ifdef UNIV_DEBUG
+ if (buf_debug_prints) {
+ printf(
+ "Dropping space %lu page %lu\n",
+ (ulong) block->space,
+ (ulong) block->offset);
+ }
+#endif
+ if (block->is_hashed) {
+ page_no = block->offset;
+
+ mutex_exit(&(buf_pool->mutex));
+
+ /* Note that the following call will acquire
+ an S-latch on the page */
+
+ btr_search_drop_page_hash_when_freed(id,
+ page_no);
+ goto scan_again;
+ }
+
+ if (0 != ut_dulint_cmp(block->oldest_modification,
+ ut_dulint_zero)) {
+
+ /* Remove from the flush list of modified
+ blocks */
+ block->oldest_modification = ut_dulint_zero;
+
+ UT_LIST_REMOVE(flush_list,
+ buf_pool->flush_list, block);
+ }
+
+ /* Remove from the LRU list */
+ buf_LRU_block_remove_hashed_page(block);
+ buf_LRU_block_free_hashed_page(block);
+ }
+next_page:
+ block = UT_LIST_GET_PREV(LRU, block);
+ }
+
+ mutex_exit(&(buf_pool->mutex));
+
+ if (!all_freed) {
+ os_thread_sleep(20000);
+
+ goto scan_again;
+ }
+}
+
+/**********************************************************************
+Gets the minimum LRU_position field for the blocks in an initial segment
+(determined by BUF_LRU_INITIAL_RATIO) of the LRU list. The limit is not
+guaranteed to be precise, because the ulint_clock may wrap around. */
+
+ulint
+buf_LRU_get_recent_limit(void)
+/*==========================*/
+ /* out: the limit; zero if could not determine it */
+{
+ buf_block_t* block;
+ ulint len;
+ ulint limit;
+
+ mutex_enter(&(buf_pool->mutex));
+
+ len = UT_LIST_GET_LEN(buf_pool->LRU);
+
+ if (len < BUF_LRU_OLD_MIN_LEN) {
+ /* The LRU list is too short to do read-ahead */
+
+ mutex_exit(&(buf_pool->mutex));
+
+ return(0);
+ }
+
+ block = UT_LIST_GET_FIRST(buf_pool->LRU);
+
+ limit = block->LRU_position - len / BUF_LRU_INITIAL_RATIO;
+
+ mutex_exit(&(buf_pool->mutex));
+
+ return(limit);
+}
+
+/**********************************************************************
+Look for a replaceable block from the end of the LRU list and put it to
+the free list if found. */
+
+ibool
+buf_LRU_search_and_free_block(
+/*==========================*/
+ /* out: TRUE if freed */
+ ulint n_iterations) /* in: how many times this has been called
+ repeatedly without result: a high value means
+ that we should search farther; if value is
+ k < 10, then we only search k/10 * [number
+ of pages in the buffer pool] from the end
+ of the LRU list */
+{
+ buf_block_t* block;
+ ulint distance = 0;
+ ibool freed;
+
+ mutex_enter(&(buf_pool->mutex));
+
+ freed = FALSE;
+ block = UT_LIST_GET_LAST(buf_pool->LRU);
+
+ while (block != NULL) {
+ ut_a(block->in_LRU_list);
+ if (buf_flush_ready_for_replace(block)) {
+
+ if (buf_debug_prints) {
+ fprintf(stderr,
+ "Putting space %lu page %lu to free list\n",
+ (ulong) block->space,
+ (ulong) block->offset);
+ }
+
+ buf_LRU_block_remove_hashed_page(block);
+
+ mutex_exit(&(buf_pool->mutex));
+
+ /* Remove possible adaptive hash index built on the
+ page; in the case of AWE the block may not have a
+ frame at all */
+
+ if (block->frame) {
+ btr_search_drop_page_hash_index(block->frame);
+ }
+ mutex_enter(&(buf_pool->mutex));
+
+ ut_a(block->buf_fix_count == 0);
+
+ buf_LRU_block_free_hashed_page(block);
+ freed = TRUE;
+
+ break;
+ }
+ block = UT_LIST_GET_PREV(LRU, block);
+ distance++;
+
+ if (!freed && n_iterations <= 10
+ && distance > 100 + (n_iterations * buf_pool->curr_size)
+ / 10) {
+ buf_pool->LRU_flush_ended = 0;
+
+ mutex_exit(&(buf_pool->mutex));
+
+ return(FALSE);
+ }
+ }
+ if (buf_pool->LRU_flush_ended > 0) {
+ buf_pool->LRU_flush_ended--;
+ }
+ if (!freed) {
+ buf_pool->LRU_flush_ended = 0;
+ }
+ mutex_exit(&(buf_pool->mutex));
+
+ return(freed);
+}
+
+/**********************************************************************
+Tries to remove LRU flushed blocks from the end of the LRU list and put them
+to the free list. This is beneficial for the efficiency of the insert buffer
+operation, as flushed pages from non-unique non-clustered indexes are here
+taken out of the buffer pool, and their inserts redirected to the insert
+buffer. Otherwise, the flushed blocks could get modified again before read
+operations need new buffer blocks, and the i/o work done in flushing would be
+wasted. */
+
+void
+buf_LRU_try_free_flushed_blocks(void)
+/*=================================*/
+{
+ mutex_enter(&(buf_pool->mutex));
+
+ while (buf_pool->LRU_flush_ended > 0) {
+
+ mutex_exit(&(buf_pool->mutex));
+
+ buf_LRU_search_and_free_block(1);
+
+ mutex_enter(&(buf_pool->mutex));
+ }
+
+ mutex_exit(&(buf_pool->mutex));
+}
+
+/**********************************************************************
+Returns TRUE if less than 15 % of the buffer pool is available. This can be
+used in heuristics to prevent huge transactions eating up the whole buffer
+pool for their locks. */
+
+ibool
+buf_LRU_buf_pool_running_out(void)
+/*==============================*/
+ /* out: TRUE if less than 15 % of buffer pool
+ left */
+{
+ ibool ret = FALSE;
+
+ mutex_enter(&(buf_pool->mutex));
+
+ if (!recv_recovery_on && UT_LIST_GET_LEN(buf_pool->free)
+ + UT_LIST_GET_LEN(buf_pool->LRU) < buf_pool->max_size / 7) {
+
+ ret = TRUE;
+ }
+
+ mutex_exit(&(buf_pool->mutex));
+
+ return(ret);
+}
+
+/**********************************************************************
+Returns a free block from buf_pool. The block is taken off the free list.
+If it is empty, blocks are moved from the end of the LRU list to the free
+list. */
+
+buf_block_t*
+buf_LRU_get_free_block(void)
+/*========================*/
+ /* out: the free control block; also if AWE is
+ used, it is guaranteed that the block has its
+ page mapped to a frame when we return */
+{
+ buf_block_t* block = NULL;
+ ibool freed;
+ ulint n_iterations = 1;
+ ibool mon_value_was = FALSE;
+ ibool started_monitor = FALSE;
+loop:
+ mutex_enter(&(buf_pool->mutex));
+
+ if (!recv_recovery_on && UT_LIST_GET_LEN(buf_pool->free)
+ + UT_LIST_GET_LEN(buf_pool->LRU) < buf_pool->max_size / 10) {
+ ut_print_timestamp(stderr);
+
+ fprintf(stderr,
+" InnoDB: ERROR: over 9 / 10 of the buffer pool is occupied by\n"
+"InnoDB: lock heaps or the adaptive hash index! Check that your\n"
+"InnoDB: transactions do not set too many row locks.\n"
+"InnoDB: Your buffer pool size is %lu MB. Maybe you should make\n"
+"InnoDB: the buffer pool bigger?\n"
+"InnoDB: We intentionally generate a seg fault to print a stack trace\n"
+"InnoDB: on Linux!\n",
+ (ulong)(buf_pool->curr_size / (1024 * 1024 / UNIV_PAGE_SIZE)));
+
+ ut_error;
+
+ } else if (!recv_recovery_on && UT_LIST_GET_LEN(buf_pool->free)
+ + UT_LIST_GET_LEN(buf_pool->LRU) < buf_pool->max_size / 5) {
+
+ if (!buf_lru_switched_on_innodb_mon) {
+
+ /* Over 80 % of the buffer pool is occupied by lock
+ heaps or the adaptive hash index. This may be a memory
+ leak! */
+
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+" InnoDB: WARNING: over 4 / 5 of the buffer pool is occupied by\n"
+"InnoDB: lock heaps or the adaptive hash index! Check that your\n"
+"InnoDB: transactions do not set too many row locks.\n"
+"InnoDB: Your buffer pool size is %lu MB. Maybe you should make\n"
+"InnoDB: the buffer pool bigger?\n"
+"InnoDB: Starting the InnoDB Monitor to print diagnostics, including\n"
+"InnoDB: lock heap and hash index sizes.\n",
+ (ulong) (buf_pool->curr_size / (1024 * 1024 / UNIV_PAGE_SIZE)));
+
+ buf_lru_switched_on_innodb_mon = TRUE;
+ srv_print_innodb_monitor = TRUE;
+ os_event_set(srv_lock_timeout_thread_event);
+ }
+ } else if (buf_lru_switched_on_innodb_mon) {
+
+ /* Switch off the InnoDB Monitor; this is a simple way
+ to stop the monitor if the situation becomes less urgent,
+ but may also surprise users if the user also switched on the
+ monitor! */
+
+ buf_lru_switched_on_innodb_mon = FALSE;
+ srv_print_innodb_monitor = FALSE;
+ }
+
+ /* If there is a block in the free list, take it */
+ if (UT_LIST_GET_LEN(buf_pool->free) > 0) {
+
+ block = UT_LIST_GET_FIRST(buf_pool->free);
+ ut_a(block->in_free_list);
+ UT_LIST_REMOVE(free, buf_pool->free, block);
+ block->in_free_list = FALSE;
+ ut_a(block->state != BUF_BLOCK_FILE_PAGE);
+ ut_a(!block->in_LRU_list);
+
+ if (srv_use_awe) {
+ if (block->frame) {
+ /* Remove from the list of mapped pages */
+
+ UT_LIST_REMOVE(awe_LRU_free_mapped,
+ buf_pool->awe_LRU_free_mapped, block);
+ } else {
+ /* We map the page to a frame; second param
+ FALSE below because we do not want it to be
+ added to the awe_LRU_free_mapped list */
+
+ buf_awe_map_page_to_frame(block, FALSE);
+ }
+ }
+
+ block->state = BUF_BLOCK_READY_FOR_USE;
+
+ mutex_exit(&(buf_pool->mutex));
+
+ if (started_monitor) {
+ srv_print_innodb_monitor = mon_value_was;
+ }
+
+ return(block);
+ }
+
+ /* If no block was in the free list, search from the end of the LRU
+ list and try to free a block there */
+
+ mutex_exit(&(buf_pool->mutex));
+
+ freed = buf_LRU_search_and_free_block(n_iterations);
+
+ if (freed > 0) {
+ goto loop;
+ }
+
+ if (n_iterations > 30) {
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+ "InnoDB: Warning: difficult to find free blocks from\n"
+ "InnoDB: the buffer pool (%lu search iterations)! Consider\n"
+ "InnoDB: increasing the buffer pool size.\n"
+ "InnoDB: It is also possible that in your Unix version\n"
+ "InnoDB: fsync is very slow, or completely frozen inside\n"
+ "InnoDB: the OS kernel. Then upgrading to a newer version\n"
+ "InnoDB: of your operating system may help. Look at the\n"
+ "InnoDB: number of fsyncs in diagnostic info below.\n"
+ "InnoDB: Pending flushes (fsync) log: %lu; buffer pool: %lu\n"
+ "InnoDB: %lu OS file reads, %lu OS file writes, %lu OS fsyncs\n"
+ "InnoDB: Starting InnoDB Monitor to print further\n"
+ "InnoDB: diagnostics to the standard output.\n",
+ (ulong) n_iterations,
+ (ulong) fil_n_pending_log_flushes,
+ (ulong) fil_n_pending_tablespace_flushes,
+ (ulong) os_n_file_reads, (ulong) os_n_file_writes,
+ (ulong) os_n_fsyncs);
+
+ mon_value_was = srv_print_innodb_monitor;
+ started_monitor = TRUE;
+ srv_print_innodb_monitor = TRUE;
+ os_event_set(srv_lock_timeout_thread_event);
+ }
+
+ /* No free block was found: try to flush the LRU list */
+
+ buf_flush_free_margin();
+ ++srv_buf_pool_wait_free;
+
+ os_aio_simulated_wake_handler_threads();
+
+ mutex_enter(&(buf_pool->mutex));
+
+ if (buf_pool->LRU_flush_ended > 0) {
+ /* We have written pages in an LRU flush. To make the insert
+ buffer more efficient, we try to move these pages to the free
+ list. */
+
+ mutex_exit(&(buf_pool->mutex));
+
+ buf_LRU_try_free_flushed_blocks();
+ } else {
+ mutex_exit(&(buf_pool->mutex));
+ }
+
+ if (n_iterations > 10) {
+
+ os_thread_sleep(500000);
+ }
+
+ n_iterations++;
+
+ goto loop;
+}
+
+/***********************************************************************
+Moves the LRU_old pointer so that the length of the old blocks list
+is inside the allowed limits. */
+UNIV_INLINE
+void
+buf_LRU_old_adjust_len(void)
+/*========================*/
+{
+ ulint old_len;
+ ulint new_len;
+
+ ut_a(buf_pool->LRU_old);
+#ifdef UNIV_SYNC_DEBUG
+ ut_ad(mutex_own(&(buf_pool->mutex)));
+#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(3 * (BUF_LRU_OLD_MIN_LEN / 8) > BUF_LRU_OLD_TOLERANCE + 5);
+
+ for (;;) {
+ old_len = buf_pool->LRU_old_len;
+ new_len = 3 * (UT_LIST_GET_LEN(buf_pool->LRU) / 8);
+
+ ut_a(buf_pool->LRU_old->in_LRU_list);
+
+ /* Update the LRU_old pointer if necessary */
+
+ if (old_len < new_len - BUF_LRU_OLD_TOLERANCE) {
+
+ buf_pool->LRU_old = UT_LIST_GET_PREV(LRU,
+ buf_pool->LRU_old);
+ (buf_pool->LRU_old)->old = TRUE;
+ buf_pool->LRU_old_len++;
+
+ } else if (old_len > new_len + BUF_LRU_OLD_TOLERANCE) {
+
+ (buf_pool->LRU_old)->old = FALSE;
+ buf_pool->LRU_old = UT_LIST_GET_NEXT(LRU,
+ buf_pool->LRU_old);
+ buf_pool->LRU_old_len--;
+ } else {
+ ut_a(buf_pool->LRU_old); /* Check that we did not
+ fall out of the LRU list */
+ return;
+ }
+ }
+}
+
+/***********************************************************************
+Initializes the old blocks pointer in the LRU list. This function should be
+called when the LRU list grows to BUF_LRU_OLD_MIN_LEN length. */
+static
+void
+buf_LRU_old_init(void)
+/*==================*/
+{
+ buf_block_t* block;
+
+ ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == BUF_LRU_OLD_MIN_LEN);
+
+ /* We first initialize all blocks in the LRU list as old and then use
+ the adjust function to move the LRU_old pointer to the right
+ position */
+
+ block = UT_LIST_GET_FIRST(buf_pool->LRU);
+
+ while (block != NULL) {
+ ut_a(block->state == BUF_BLOCK_FILE_PAGE);
+ ut_a(block->in_LRU_list);
+ block->old = TRUE;
+ block = UT_LIST_GET_NEXT(LRU, block);
+ }
+
+ buf_pool->LRU_old = UT_LIST_GET_FIRST(buf_pool->LRU);
+ buf_pool->LRU_old_len = UT_LIST_GET_LEN(buf_pool->LRU);
+
+ buf_LRU_old_adjust_len();
+}
+
+/**********************************************************************
+Removes a block from the LRU list. */
+UNIV_INLINE
+void
+buf_LRU_remove_block(
+/*=================*/
+ buf_block_t* block) /* in: control block */
+{
+ ut_ad(buf_pool);
+ ut_ad(block);
+#ifdef UNIV_SYNC_DEBUG
+ ut_ad(mutex_own(&(buf_pool->mutex)));
+#endif /* UNIV_SYNC_DEBUG */
+
+ ut_a(block->state == BUF_BLOCK_FILE_PAGE);
+ ut_a(block->in_LRU_list);
+
+ /* If the LRU_old pointer is defined and points to just this block,
+ move it backward one step */
+
+ if (block == buf_pool->LRU_old) {
+
+ /* Below: the previous block is guaranteed to exist, because
+ the LRU_old pointer is only allowed to differ by the
+ tolerance value from strict 3/8 of the LRU list length. */
+
+ buf_pool->LRU_old = UT_LIST_GET_PREV(LRU, block);
+ (buf_pool->LRU_old)->old = TRUE;
+
+ buf_pool->LRU_old_len++;
+ ut_a(buf_pool->LRU_old);
+ }
+
+ /* Remove the block from the LRU list */
+ UT_LIST_REMOVE(LRU, buf_pool->LRU, block);
+ block->in_LRU_list = FALSE;
+
+ if (srv_use_awe && block->frame) {
+ /* Remove from the list of mapped pages */
+
+ UT_LIST_REMOVE(awe_LRU_free_mapped,
+ buf_pool->awe_LRU_free_mapped, block);
+ }
+
+ /* If the LRU list is so short that LRU_old not defined, return */
+ if (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN) {
+
+ buf_pool->LRU_old = NULL;
+
+ return;
+ }
+
+ ut_ad(buf_pool->LRU_old);
+
+ /* Update the LRU_old_len field if necessary */
+ if (block->old) {
+
+ buf_pool->LRU_old_len--;
+ }
+
+ /* Adjust the length of the old block list if necessary */
+ buf_LRU_old_adjust_len();
+}
+
+/**********************************************************************
+Adds a block to the LRU list end. */
+UNIV_INLINE
+void
+buf_LRU_add_block_to_end_low(
+/*=========================*/
+ buf_block_t* block) /* in: control block */
+{
+ buf_block_t* last_block;
+
+ ut_ad(buf_pool);
+ ut_ad(block);
+#ifdef UNIV_SYNC_DEBUG
+ ut_ad(mutex_own(&(buf_pool->mutex)));
+#endif /* UNIV_SYNC_DEBUG */
+
+ ut_a(block->state == BUF_BLOCK_FILE_PAGE);
+
+ block->old = TRUE;
+
+ last_block = UT_LIST_GET_LAST(buf_pool->LRU);
+
+ if (last_block) {
+ block->LRU_position = last_block->LRU_position;
+ } else {
+ block->LRU_position = buf_pool_clock_tic();
+ }
+
+ ut_a(!block->in_LRU_list);
+ UT_LIST_ADD_LAST(LRU, buf_pool->LRU, block);
+ block->in_LRU_list = TRUE;
+
+ if (srv_use_awe && block->frame) {
+ /* Add to the list of mapped pages */
+
+ UT_LIST_ADD_LAST(awe_LRU_free_mapped,
+ buf_pool->awe_LRU_free_mapped, block);
+ }
+
+ if (UT_LIST_GET_LEN(buf_pool->LRU) >= BUF_LRU_OLD_MIN_LEN) {
+
+ buf_pool->LRU_old_len++;
+ }
+
+ if (UT_LIST_GET_LEN(buf_pool->LRU) > BUF_LRU_OLD_MIN_LEN) {
+
+ ut_ad(buf_pool->LRU_old);
+
+ /* Adjust the length of the old block list if necessary */
+
+ buf_LRU_old_adjust_len();
+
+ } else if (UT_LIST_GET_LEN(buf_pool->LRU) == BUF_LRU_OLD_MIN_LEN) {
+
+ /* The LRU list is now long enough for LRU_old to become
+ defined: init it */
+
+ buf_LRU_old_init();
+ }
+}
+
+/**********************************************************************
+Adds a block to the LRU list. */
+UNIV_INLINE
+void
+buf_LRU_add_block_low(
+/*==================*/
+ buf_block_t* block, /* in: control block */
+ ibool old) /* in: TRUE if should be put to the old blocks
+ in the LRU list, else put to the start; if the
+ LRU list is very short, the block is added to
+ the start, regardless of this parameter */
+{
+ ulint cl;
+
+ ut_ad(buf_pool);
+ ut_ad(block);
+#ifdef UNIV_SYNC_DEBUG
+ ut_ad(mutex_own(&(buf_pool->mutex)));
+#endif /* UNIV_SYNC_DEBUG */
+
+ ut_a(block->state == BUF_BLOCK_FILE_PAGE);
+ ut_a(!block->in_LRU_list);
+
+ block->old = old;
+ cl = buf_pool_clock_tic();
+
+ if (srv_use_awe && block->frame) {
+ /* Add to the list of mapped pages; for simplicity we always
+ add to the start, even if the user would have set 'old'
+ TRUE */
+
+ UT_LIST_ADD_FIRST(awe_LRU_free_mapped,
+ buf_pool->awe_LRU_free_mapped, block);
+ }
+
+ if (!old || (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN)) {
+
+ UT_LIST_ADD_FIRST(LRU, buf_pool->LRU, block);
+
+ block->LRU_position = cl;
+ block->freed_page_clock = buf_pool->freed_page_clock;
+ } else {
+ UT_LIST_INSERT_AFTER(LRU, buf_pool->LRU, buf_pool->LRU_old,
+ block);
+ buf_pool->LRU_old_len++;
+
+ /* We copy the LRU position field of the previous block
+ to the new block */
+
+ block->LRU_position = (buf_pool->LRU_old)->LRU_position;
+ }
+
+ block->in_LRU_list = TRUE;
+
+ if (UT_LIST_GET_LEN(buf_pool->LRU) > BUF_LRU_OLD_MIN_LEN) {
+
+ ut_ad(buf_pool->LRU_old);
+
+ /* Adjust the length of the old block list if necessary */
+
+ buf_LRU_old_adjust_len();
+
+ } else if (UT_LIST_GET_LEN(buf_pool->LRU) == BUF_LRU_OLD_MIN_LEN) {
+
+ /* The LRU list is now long enough for LRU_old to become
+ defined: init it */
+
+ buf_LRU_old_init();
+ }
+}
+
+/**********************************************************************
+Adds a block to the LRU list. */
+
+void
+buf_LRU_add_block(
+/*==============*/
+ buf_block_t* block, /* in: control block */
+ ibool old) /* in: TRUE if should be put to the old
+ blocks in the LRU list, else put to the start;
+ if the LRU list is very short, the block is
+ added to the start, regardless of this
+ parameter */
+{
+ buf_LRU_add_block_low(block, old);
+}
+
+/**********************************************************************
+Moves a block to the start of the LRU list. */
+
+void
+buf_LRU_make_block_young(
+/*=====================*/
+ buf_block_t* block) /* in: control block */
+{
+ buf_LRU_remove_block(block);
+ buf_LRU_add_block_low(block, FALSE);
+}
+
+/**********************************************************************
+Moves a block to the end of the LRU list. */
+
+void
+buf_LRU_make_block_old(
+/*===================*/
+ buf_block_t* block) /* in: control block */
+{
+ buf_LRU_remove_block(block);
+ buf_LRU_add_block_to_end_low(block);
+}
+
+/**********************************************************************
+Puts a block back to the free list. */
+
+void
+buf_LRU_block_free_non_file_page(
+/*=============================*/
+ buf_block_t* block) /* in: block, must not contain a file page */
+{
+#ifdef UNIV_SYNC_DEBUG
+ ut_ad(mutex_own(&(buf_pool->mutex)));
+#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(block);
+
+ ut_a((block->state == BUF_BLOCK_MEMORY)
+ || (block->state == BUF_BLOCK_READY_FOR_USE));
+
+ ut_a(block->n_pointers == 0);
+ ut_a(!block->in_free_list);
+
+ block->state = BUF_BLOCK_NOT_USED;
+
+#ifdef UNIV_DEBUG
+ /* Wipe contents of page to reveal possible stale pointers to it */
+ memset(block->frame, '\0', UNIV_PAGE_SIZE);
+#endif
+ UT_LIST_ADD_FIRST(free, buf_pool->free, block);
+ block->in_free_list = TRUE;
+
+ if (srv_use_awe && block->frame) {
+ /* Add to the list of mapped pages */
+
+ UT_LIST_ADD_FIRST(awe_LRU_free_mapped,
+ buf_pool->awe_LRU_free_mapped, block);
+ }
+}
+
+/**********************************************************************
+Takes a block out of the LRU list and page hash table and sets the block
+state to BUF_BLOCK_REMOVE_HASH. */
+static
+void
+buf_LRU_block_remove_hashed_page(
+/*=============================*/
+ buf_block_t* block) /* in: block, must contain a file page and
+ be in a state where it can be freed; there
+ may or may not be a hash index to the page */
+{
+#ifdef UNIV_SYNC_DEBUG
+ ut_ad(mutex_own(&(buf_pool->mutex)));
+#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(block);
+
+ ut_a(block->state == BUF_BLOCK_FILE_PAGE);
+ ut_a(block->io_fix == 0);
+ ut_a(block->buf_fix_count == 0);
+ ut_a(ut_dulint_cmp(block->oldest_modification, ut_dulint_zero) == 0);
+
+ buf_LRU_remove_block(block);
+
+ buf_pool->freed_page_clock += 1;
+
+ /* Note that if AWE is enabled the block may not have a frame at all */
+
+ buf_block_modify_clock_inc(block);
+
+ if (block != buf_page_hash_get(block->space, block->offset)) {
+ fprintf(stderr,
+"InnoDB: Error: page %lu %lu not found from the hash table\n",
+ (ulong) block->space,
+ (ulong) block->offset);
+ if (buf_page_hash_get(block->space, block->offset)) {
+ fprintf(stderr,
+"InnoDB: From hash table we find block %p of %lu %lu which is not %p\n",
+ buf_page_hash_get(block->space, block->offset),
+ (ulong) buf_page_hash_get(block->space, block->offset)->space,
+ (ulong) buf_page_hash_get(block->space, block->offset)->offset,
+ block);
+ }
+
+#ifdef UNIV_DEBUG
+ buf_print();
+ buf_LRU_print();
+ buf_validate();
+ buf_LRU_validate();
+#endif
+ ut_a(0);
+ }
+
+ HASH_DELETE(buf_block_t, hash, buf_pool->page_hash,
+ buf_page_address_fold(block->space, block->offset),
+ block);
+
+ block->state = BUF_BLOCK_REMOVE_HASH;
+}
+
+/**********************************************************************
+Puts a file page whose has no hash index to the free list. */
+static
+void
+buf_LRU_block_free_hashed_page(
+/*===========================*/
+ buf_block_t* block) /* in: block, must contain a file page and
+ be in a state where it can be freed */
+{
+#ifdef UNIV_SYNC_DEBUG
+ ut_ad(mutex_own(&(buf_pool->mutex)));
+#endif /* UNIV_SYNC_DEBUG */
+ ut_a(block->state == BUF_BLOCK_REMOVE_HASH);
+
+ block->state = BUF_BLOCK_MEMORY;
+
+ buf_LRU_block_free_non_file_page(block);
+}
+
+/**************************************************************************
+Validates the LRU list. */
+
+ibool
+buf_LRU_validate(void)
+/*==================*/
+{
+ buf_block_t* block;
+ ulint old_len;
+ ulint new_len;
+ ulint LRU_pos;
+
+ ut_ad(buf_pool);
+ mutex_enter(&(buf_pool->mutex));
+
+ if (UT_LIST_GET_LEN(buf_pool->LRU) >= BUF_LRU_OLD_MIN_LEN) {
+
+ ut_a(buf_pool->LRU_old);
+ old_len = buf_pool->LRU_old_len;
+ new_len = 3 * (UT_LIST_GET_LEN(buf_pool->LRU) / 8);
+ ut_a(old_len >= new_len - BUF_LRU_OLD_TOLERANCE);
+ ut_a(old_len <= new_len + BUF_LRU_OLD_TOLERANCE);
+ }
+
+ UT_LIST_VALIDATE(LRU, buf_block_t, buf_pool->LRU);
+
+ block = UT_LIST_GET_FIRST(buf_pool->LRU);
+
+ old_len = 0;
+
+ while (block != NULL) {
+
+ ut_a(block->state == BUF_BLOCK_FILE_PAGE);
+
+ if (block->old) {
+ old_len++;
+ }
+
+ if (buf_pool->LRU_old && (old_len == 1)) {
+ ut_a(buf_pool->LRU_old == block);
+ }
+
+ LRU_pos = block->LRU_position;
+
+ block = UT_LIST_GET_NEXT(LRU, block);
+
+ if (block) {
+ /* If the following assert fails, it may
+ not be an error: just the buf_pool clock
+ has wrapped around */
+ ut_a(LRU_pos >= block->LRU_position);
+ }
+ }
+
+ if (buf_pool->LRU_old) {
+ ut_a(buf_pool->LRU_old_len == old_len);
+ }
+
+ UT_LIST_VALIDATE(free, buf_block_t, buf_pool->free);
+
+ block = UT_LIST_GET_FIRST(buf_pool->free);
+
+ while (block != NULL) {
+ ut_a(block->state == BUF_BLOCK_NOT_USED);
+
+ block = UT_LIST_GET_NEXT(free, block);
+ }
+
+ mutex_exit(&(buf_pool->mutex));
+ return(TRUE);
+}
+
+/**************************************************************************
+Prints the LRU list. */
+
+void
+buf_LRU_print(void)
+/*===============*/
+{
+ buf_block_t* block;
+ buf_frame_t* frame;
+ ulint len;
+
+ ut_ad(buf_pool);
+ mutex_enter(&(buf_pool->mutex));
+
+ fprintf(stderr, "Pool ulint clock %lu\n", (ulong) buf_pool->ulint_clock);
+
+ block = UT_LIST_GET_FIRST(buf_pool->LRU);
+
+ len = 0;
+
+ while (block != NULL) {
+
+ fprintf(stderr, "BLOCK %lu ", (ulong) block->offset);
+
+ if (block->old) {
+ fputs("old ", stderr);
+ }
+
+ if (block->buf_fix_count) {
+ fprintf(stderr, "buffix count %lu ",
+ (ulong) block->buf_fix_count);
+ }
+
+ if (block->io_fix) {
+ fprintf(stderr, "io_fix %lu ", (ulong) block->io_fix);
+ }
+
+ if (ut_dulint_cmp(block->oldest_modification,
+ ut_dulint_zero) > 0) {
+ fputs("modif. ", stderr);
+ }
+
+ frame = buf_block_get_frame(block);
+
+ fprintf(stderr, "LRU pos %lu type %lu index id %lu ",
+ (ulong) block->LRU_position,
+ (ulong) fil_page_get_type(frame),
+ (ulong) ut_dulint_get_low(btr_page_get_index_id(frame)));
+
+ block = UT_LIST_GET_NEXT(LRU, block);
+ if (++len == 10) {
+ len = 0;
+ putc('\n', stderr);
+ }
+ }
+
+ mutex_exit(&(buf_pool->mutex));
+}