3 files changed, 342 insertions, 389 deletions
diff --git a/storage/innobase/buf/buf0buf.c b/storage/innobase/buf/buf0buf.c
index 0008fcb1271..d87abbd0ed9 100644
--- a/storage/innobase/buf/buf0buf.c
+++ b/storage/innobase/buf/buf0buf.c
@@ -837,16 +837,35 @@ buf_chunk_not_freed(
 	block = chunk->blocks;
 
 	for (i = chunk->size; i--; block++) {
-		mutex_enter(&block->mutex);
-
-		if (buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE
-		    && !buf_flush_ready_for_replace(&block->page)) {
+		ibool	ready;
 
+		switch (buf_block_get_state(block)) {
+		case BUF_BLOCK_ZIP_FREE:
+		case BUF_BLOCK_ZIP_PAGE:
+		case BUF_BLOCK_ZIP_DIRTY:
+			/* The uncompressed buffer pool should never
+			contain compressed block descriptors. */
+			ut_error;
+			break;
+		case BUF_BLOCK_NOT_USED:
+		case BUF_BLOCK_READY_FOR_USE:
+		case BUF_BLOCK_MEMORY:
+		case BUF_BLOCK_REMOVE_HASH:
+			/* Skip blocks that are not being used for
+			file pages. */
+			break;
+		case BUF_BLOCK_FILE_PAGE:
+			mutex_enter(&block->mutex);
+			ready = buf_flush_ready_for_replace(&block->page);
 			mutex_exit(&block->mutex);
-			return(block);
-		}
 
-		mutex_exit(&block->mutex);
+			if (!ready) {
+
+				return(block);
+			}
+
+			break;
+		}
 	}
 
 	return(NULL);
@@ -966,8 +985,6 @@ buf_pool_init(void)
 		buf_pool->no_flush[i] = os_event_create(NULL);
 	}
 
-	buf_pool->ulint_clock = 1;
-
 	/* 3. Initialize LRU fields
 	--------------------------- */
 	/* All fields are initialized by mem_zalloc(). */
@@ -1471,33 +1488,8 @@ buf_pool_resize(void)
 }
 
 /********************************************************************//**
-Moves the block to the start of the LRU list if there is a danger
-that the block would drift out of the buffer pool. */
-UNIV_INLINE
-void
-buf_block_make_young(
-/*=================*/
-	buf_page_t*	bpage)	/*!< in: block to make younger */
-{
-	ut_ad(!buf_pool_mutex_own());
-
-	/* Note that we read freed_page_clock's without holding any mutex:
-	this is allowed since the result is used only in heuristics */
-
-	if (buf_page_peek_if_too_old(bpage)) {
-
-		buf_pool_mutex_enter();
-		/* There has been freeing activity in the LRU list:
-		best to move to the head of the LRU list */
-
-		buf_LRU_make_block_young(bpage);
-		buf_pool_mutex_exit();
-	}
-}
-
-/********************************************************************//**
 Moves a page to the start of the buffer pool LRU list. This high-level
-function can be used to prevent an important page from from slipping out of
+function can be used to prevent an important page from slipping out of
 the buffer pool. */
 UNIV_INTERN
 void
@@ -1515,6 +1507,36 @@ buf_page_make_young(
 }
 
 /********************************************************************//**
+Sets the time of the first access of a page and moves a page to the
+start of the buffer pool LRU list if it is too old.  This high-level
+function can be used to prevent an important page from slipping
+out of the buffer pool. */
+static
+void
+buf_page_set_accessed_make_young(
+/*=============================*/
+	buf_page_t*	bpage,		/*!< in/out: buffer block of a
+					file page */
+	unsigned	access_time)	/*!< in: bpage->access_time
+					read under mutex protection,
+					or 0 if unknown */
+{
+	ut_ad(!buf_pool_mutex_own());
+	ut_a(buf_page_in_file(bpage));
+
+	if (buf_page_peek_if_too_old(bpage)) {
+		buf_pool_mutex_enter();
+		buf_LRU_make_block_young(bpage);
+		buf_pool_mutex_exit();
+	} else if (!access_time) {
+		ulint	time_ms = ut_time_ms();
+		buf_pool_mutex_enter();
+		buf_page_set_accessed(bpage, time_ms);
+		buf_pool_mutex_exit();
+	}
+}
+
+/********************************************************************//**
 Resets the check_index_page_at_flush field of a page if found in the buffer
 pool. */
 UNIV_INTERN
@@ -1645,11 +1667,12 @@ buf_page_get_zip(
 	buf_page_t*	bpage;
 	mutex_t*	block_mutex;
 	ibool		must_read;
+	unsigned	access_time;
 
 #ifndef UNIV_LOG_DEBUG
 	ut_ad(!ibuf_inside());
 #endif
-	buf_pool->n_page_gets++;
+	buf_pool->stat.n_page_gets++;
 
 	for (;;) {
 		buf_pool_mutex_enter();
@@ -1712,14 +1735,13 @@ err_exit:
 
 got_block:
 	must_read = buf_page_get_io_fix(bpage) == BUF_IO_READ;
+	access_time = buf_page_is_accessed(bpage);
 
 	buf_pool_mutex_exit();
 
-	buf_page_set_accessed(bpage, TRUE);
-
 	mutex_exit(block_mutex);
 
-	buf_block_make_young(bpage);
+	buf_page_set_accessed_make_young(bpage, access_time);
 
 #ifdef UNIV_DEBUG_FILE_ACCESSES
 	ut_a(!bpage->file_page_was_freed);
@@ -1812,7 +1834,7 @@ buf_zip_decompress(
 	switch (fil_page_get_type(frame)) {
 	case FIL_PAGE_INDEX:
 		if (page_zip_decompress(&block->page.zip,
-					block->frame)) {
+					block->frame, TRUE)) {
 			return(TRUE);
 		}
 
@@ -2000,7 +2022,7 @@ buf_page_get_gen(
 	mtr_t*		mtr)	/*!< in: mini-transaction */
 {
 	buf_block_t*	block;
-	ibool		accessed;
+	unsigned	access_time;
 	ulint		fix_type;
 	ibool		must_read;
 
@@ -2016,7 +2038,7 @@ buf_page_get_gen(
 #ifndef UNIV_LOG_DEBUG
 	ut_ad(!ibuf_inside() || ibuf_page(space, zip_size, offset, NULL));
 #endif
-	buf_pool->n_page_gets++;
+	buf_pool->stat.n_page_gets++;
 loop:
 	block = guess;
 	buf_pool_mutex_enter();
@@ -2243,17 +2265,16 @@ wait_until_unfixed:
 	UNIV_MEM_ASSERT_RW(&block->page, sizeof block->page);
 
 	buf_block_buf_fix_inc(block, file, line);
-	buf_pool_mutex_exit();
 
-	/* Check if this is the first access to the page */
+	mutex_exit(&block->mutex);
 
-	accessed = buf_page_is_accessed(&block->page);
+	/* Check if this is the first access to the page */
 
-	buf_page_set_accessed(&block->page, TRUE);
+	access_time = buf_page_is_accessed(&block->page);
 
-	mutex_exit(&block->mutex);
+	buf_pool_mutex_exit();
 
-	buf_block_make_young(&block->page);
+	buf_page_set_accessed_make_young(&block->page, access_time);
 
 #ifdef UNIV_DEBUG_FILE_ACCESSES
 	ut_a(!block->page.file_page_was_freed);
@@ -2306,7 +2327,7 @@ wait_until_unfixed:
 
 	mtr_memo_push(mtr, block, fix_type);
 
-	if (!accessed) {
+	if (!access_time) {
 		/* In the case of a first access, try to apply linear
 		read-ahead */
 
@@ -2336,7 +2357,7 @@ buf_page_optimistic_get_func(
 	ulint		line,	/*!< in: line where called */
 	mtr_t*		mtr)	/*!< in: mini-transaction */
 {
-	ibool		accessed;
+	unsigned	access_time;
 	ibool		success;
 	ulint		fix_type;
 
@@ -2353,14 +2374,16 @@ buf_page_optimistic_get_func(
 	}
 
 	buf_block_buf_fix_inc(block, file, line);
-	accessed = buf_page_is_accessed(&block->page);
-	buf_page_set_accessed(&block->page, TRUE);
 
 	mutex_exit(&block->mutex);
 
-	buf_block_make_young(&block->page);
+	/* Check if this is the first access to the page.
+	We do a dirty read on purpose, to avoid mutex contention.
+	This field is only used for heuristic purposes; it does not
+	affect correctness. */
 
-	/* Check if this is the first access to the page */
+	access_time = buf_page_is_accessed(&block->page);
+	buf_page_set_accessed_make_young(&block->page, access_time);
 
 	ut_ad(!ibuf_inside()
 	      || ibuf_page(buf_block_get_space(block),
@@ -2412,7 +2435,7 @@ buf_page_optimistic_get_func(
 #ifdef UNIV_DEBUG_FILE_ACCESSES
 	ut_a(block->page.file_page_was_freed == FALSE);
 #endif
-	if (UNIV_UNLIKELY(!accessed)) {
+	if (UNIV_UNLIKELY(!access_time)) {
 		/* In the case of a first access, try to apply linear
 		read-ahead */
 
@@ -2425,7 +2448,7 @@ buf_page_optimistic_get_func(
 	ut_a(ibuf_count_get(buf_block_get_space(block),
 			    buf_block_get_page_no(block)) == 0);
 #endif
-	buf_pool->n_page_gets++;
+	buf_pool->stat.n_page_gets++;
 
 	return(TRUE);
 }
@@ -2473,8 +2496,20 @@ buf_page_get_known_nowait(
 
 	mutex_exit(&block->mutex);
 
-	if (mode == BUF_MAKE_YOUNG) {
-		buf_block_make_young(&block->page);
+	if (mode == BUF_MAKE_YOUNG && buf_page_peek_if_too_old(&block->page)) {
+		buf_pool_mutex_enter();
+		buf_LRU_make_block_young(&block->page);
+		buf_pool_mutex_exit();
+	} else if (!buf_page_is_accessed(&block->page)) {
+		/* Above, we do a dirty read on purpose, to avoid
+		mutex contention.  The field buf_page_t::access_time
+		is only used for heuristic purposes.  Writes to the
+		field must be protected by mutex, however. */
+		ulint	time_ms = ut_time_ms();
+
+		buf_pool_mutex_enter();
+		buf_page_set_accessed(&block->page, time_ms);
+		buf_pool_mutex_exit();
 	}
 
 	ut_ad(!ibuf_inside() || (mode == BUF_KEEP_OLD));
@@ -2513,7 +2548,7 @@ buf_page_get_known_nowait(
 	     || (ibuf_count_get(buf_block_get_space(block),
 				buf_block_get_page_no(block)) == 0));
 #endif
-	buf_pool->n_page_gets++;
+	buf_pool->stat.n_page_gets++;
 
 	return(TRUE);
 }
@@ -2589,7 +2624,7 @@ buf_page_try_get_func(
 #endif /* UNIV_DEBUG_FILE_ACCESSES */
 	buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
 
-	buf_pool->n_page_gets++;
+	buf_pool->stat.n_page_gets++;
 
 #ifdef UNIV_IBUF_COUNT_DEBUG
 	ut_a(ibuf_count_get(buf_block_get_space(block),
@@ -2608,10 +2643,10 @@ buf_page_init_low(
 	buf_page_t*	bpage)	/*!< in: block to init */
 {
 	bpage->flush_type = BUF_FLUSH_LRU;
-	bpage->accessed = FALSE;
 	bpage->io_fix = BUF_IO_NONE;
 	bpage->buf_fix_count = 0;
 	bpage->freed_page_clock = 0;
+	bpage->access_time = 0;
 	bpage->newest_modification = 0;
 	bpage->oldest_modification = 0;
 	HASH_INVALIDATE(bpage, hash);
@@ -2907,6 +2942,7 @@ buf_page_create(
 	buf_frame_t*	frame;
 	buf_block_t*	block;
 	buf_block_t*	free_block	= NULL;
+	ulint		time_ms		= ut_time_ms();
 
 	ut_ad(mtr);
 	ut_ad(space || !zip_size);
@@ -2953,7 +2989,7 @@ buf_page_create(
 	buf_LRU_add_block(&block->page, FALSE);
 
 	buf_block_buf_fix_inc(block, __FILE__, __LINE__);
-	buf_pool->n_pages_created++;
+	buf_pool->stat.n_pages_created++;
 
 	if (zip_size) {
 		void*	data;
@@ -2990,12 +3026,12 @@ buf_page_create(
 		rw_lock_x_unlock(&block->lock);
 	}
 
+	buf_page_set_accessed(&block->page, time_ms);
+
 	buf_pool_mutex_exit();
 
 	mtr_memo_push(mtr, block, MTR_MEMO_BUF_FIX);
 
-	buf_page_set_accessed(&block->page, TRUE);
-
 	mutex_exit(&block->mutex);
 
 	/* Delete possible entries for the page from the insert buffer:
@@ -3201,7 +3237,7 @@ corrupt:
 
 		ut_ad(buf_pool->n_pend_reads > 0);
 		buf_pool->n_pend_reads--;
-		buf_pool->n_pages_read++;
+		buf_pool->stat.n_pages_read++;
 
 		if (uncompressed) {
 			rw_lock_x_unlock_gen(&((buf_block_t*) bpage)->lock,
@@ -3221,7 +3257,7 @@ corrupt:
 					     BUF_IO_WRITE);
 		}
 
-		buf_pool->n_pages_written++;
+		buf_pool->stat.n_pages_written++;
 
 		break;
 
@@ -3251,7 +3287,32 @@ void
 buf_pool_invalidate(void)
 /*=====================*/
 {
-	ibool	freed;
+	ibool		freed;
+	enum buf_flush	i;
+
+	buf_pool_mutex_enter();
+
+	for (i = BUF_FLUSH_LRU; i < BUF_FLUSH_N_TYPES; i++) {
+
+		/* As this function is called during startup and
+		during redo application phase during recovery, InnoDB
+		is single threaded (apart from IO helper threads) at
+		this stage. No new write batch can be in intialization
+		stage at this point. */
+		ut_ad(buf_pool->init_flush[i] == FALSE);
+
+		/* However, it is possible that a write batch that has
+		been posted earlier is still not complete. For buffer
+		pool invalidation to proceed we must ensure there is NO
+		write activity happening. */
+		if (buf_pool->n_flush[i] > 0) {
+			buf_pool_mutex_exit();
+			buf_flush_wait_batch_end(i);
+			buf_pool_mutex_enter();
+		}
+	}
+
+	buf_pool_mutex_exit();
 
 	ut_ad(buf_all_freed());
 
@@ -3266,6 +3327,14 @@ buf_pool_invalidate(void)
 	ut_ad(UT_LIST_GET_LEN(buf_pool->LRU) == 0);
 	ut_ad(UT_LIST_GET_LEN(buf_pool->unzip_LRU) == 0);
 
+	buf_pool->freed_page_clock = 0;
+	buf_pool->LRU_old = NULL;
+	buf_pool->LRU_old_len = 0;
+	buf_pool->LRU_flush_ended = 0;
+
+	memset(&buf_pool->stat, 0x00, sizeof(buf_pool->stat));
+	buf_refresh_io_stats();
+
 	buf_pool_mutex_exit();
 }
 
@@ -3528,6 +3597,7 @@ buf_print(void)
 		"n pending decompressions %lu\n"
 		"n pending reads %lu\n"
 		"n pending flush LRU %lu list %lu single page %lu\n"
+		"pages made young %lu, not young %lu\n"
 		"pages read %lu, created %lu, written %lu\n",
 		(ulong) size,
 		(ulong) UT_LIST_GET_LEN(buf_pool->LRU),
@@ -3538,8 +3608,11 @@ buf_print(void)
 		(ulong) buf_pool->n_flush[BUF_FLUSH_LRU],
 		(ulong) buf_pool->n_flush[BUF_FLUSH_LIST],
 		(ulong) buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE],
-		(ulong) buf_pool->n_pages_read, buf_pool->n_pages_created,
-		(ulong) buf_pool->n_pages_written);
+		(ulong) buf_pool->stat.n_pages_made_young,
+		(ulong) buf_pool->stat.n_pages_not_made_young,
+		(ulong) buf_pool->stat.n_pages_read,
+		(ulong) buf_pool->stat.n_pages_created,
+		(ulong) buf_pool->stat.n_pages_written);
 
 	/* Count the number of blocks belonging to each index in the buffer */
 
@@ -3744,10 +3817,9 @@ buf_print_io(
 {
 	time_t	current_time;
 	double	time_elapsed;
-	ulint	size;
+	ulint	n_gets_diff;
 
 	ut_ad(buf_pool);
-	size = buf_pool->curr_size;
 
 	buf_pool_mutex_enter();
 
@@ -3755,12 +3827,14 @@ buf_print_io(
 		"Buffer pool size   %lu\n"
 		"Free buffers       %lu\n"
 		"Database pages     %lu\n"
+		"Old database pages %lu\n"
 		"Modified db pages  %lu\n"
 		"Pending reads %lu\n"
 		"Pending writes: LRU %lu, flush list %lu, single page %lu\n",
-		(ulong) size,
+		(ulong) buf_pool->curr_size,
 		(ulong) UT_LIST_GET_LEN(buf_pool->free),
 		(ulong) UT_LIST_GET_LEN(buf_pool->LRU),
+		(ulong) buf_pool->LRU_old_len,
 		(ulong) UT_LIST_GET_LEN(buf_pool->flush_list),
 		(ulong) buf_pool->n_pend_reads,
 		(ulong) buf_pool->n_flush[BUF_FLUSH_LRU]
@@ -3772,37 +3846,66 @@ buf_print_io(
 	current_time = time(NULL);
 	time_elapsed = 0.001 + difftime(current_time,
 					buf_pool->last_printout_time);
-	buf_pool->last_printout_time = current_time;
 
 	fprintf(file,
+		"Pages made young %lu, not young %lu\n"
+		"%.2f youngs/s, %.2f non-youngs/s\n"
 		"Pages read %lu, created %lu, written %lu\n"
 		"%.2f reads/s, %.2f creates/s, %.2f writes/s\n",
-		(ulong) buf_pool->n_pages_read,
-		(ulong) buf_pool->n_pages_created,
-		(ulong) buf_pool->n_pages_written,
-		(buf_pool->n_pages_read - buf_pool->n_pages_read_old)
+		(ulong) buf_pool->stat.n_pages_made_young,
+		(ulong) buf_pool->stat.n_pages_not_made_young,
+		(buf_pool->stat.n_pages_made_young
+		 - buf_pool->old_stat.n_pages_made_young)
+		/ time_elapsed,
+		(buf_pool->stat.n_pages_not_made_young
+		 - buf_pool->old_stat.n_pages_not_made_young)
+		/ time_elapsed,
+		(ulong) buf_pool->stat.n_pages_read,
+		(ulong) buf_pool->stat.n_pages_created,
+		(ulong) buf_pool->stat.n_pages_written,
+		(buf_pool->stat.n_pages_read
+		 - buf_pool->old_stat.n_pages_read)
 		/ time_elapsed,
-		(buf_pool->n_pages_created - buf_pool->n_pages_created_old)
+		(buf_pool->stat.n_pages_created
+		 - buf_pool->old_stat.n_pages_created)
 		/ time_elapsed,
-		(buf_pool->n_pages_written - buf_pool->n_pages_written_old)
+		(buf_pool->stat.n_pages_written
+		 - buf_pool->old_stat.n_pages_written)
 		/ time_elapsed);
 
-	if (buf_pool->n_page_gets > buf_pool->n_page_gets_old) {
-		fprintf(file, "Buffer pool hit rate %lu / 1000\n",
+	n_gets_diff = buf_pool->stat.n_page_gets - buf_pool->old_stat.n_page_gets;
+
+	if (n_gets_diff) {
+		fprintf(file,
+			"Buffer pool hit rate %lu / 1000,"
+			" young-making rate %lu / 1000 not %lu / 1000\n",
+			(ulong)
+			(1000 - ((1000 * (buf_pool->stat.n_pages_read
+					  - buf_pool->old_stat.n_pages_read))
+				 / (buf_pool->stat.n_page_gets
+				    - buf_pool->old_stat.n_page_gets))),
+			(ulong)
+			(1000 * (buf_pool->stat.n_pages_made_young
+				 - buf_pool->old_stat.n_pages_made_young)
+			 / n_gets_diff),
 			(ulong)
-			(1000 - ((1000 * (buf_pool->n_pages_read
-					  - buf_pool->n_pages_read_old))
-				 / (buf_pool->n_page_gets
-				    - buf_pool->n_page_gets_old))));
+			(1000 * (buf_pool->stat.n_pages_not_made_young
+				 - buf_pool->old_stat.n_pages_not_made_young)
+			 / n_gets_diff));
 	} else {
 		fputs("No buffer pool page gets since the last printout\n",
 		      file);
 	}
 
-	buf_pool->n_page_gets_old = buf_pool->n_page_gets;
-	buf_pool->n_pages_read_old = buf_pool->n_pages_read;
-	buf_pool->n_pages_created_old = buf_pool->n_pages_created;
-	buf_pool->n_pages_written_old = buf_pool->n_pages_written;
+	/* Statistics about read ahead algorithm */
+	fprintf(file, "Pages read ahead %.2f/s,"
+		" evicted without access %.2f/s\n",
+		(buf_pool->stat.n_ra_pages_read
+		- buf_pool->old_stat.n_ra_pages_read)
+		/ time_elapsed,
+		(buf_pool->stat.n_ra_pages_evicted
+		- buf_pool->old_stat.n_ra_pages_evicted)
+		/ time_elapsed);
 
 	/* Print some values to help us with visualizing what is
 	happening with LRU eviction. */
@@ -3814,6 +3917,7 @@ buf_print_io(
 		buf_LRU_stat_sum.io, buf_LRU_stat_cur.io,
 		buf_LRU_stat_sum.unzip, buf_LRU_stat_cur.unzip);
 
+	buf_refresh_io_stats();
 	buf_pool_mutex_exit();
 }
 
@@ -3825,10 +3929,7 @@ buf_refresh_io_stats(void)
 /*======================*/
 {
 	buf_pool->last_printout_time = time(NULL);
-	buf_pool->n_page_gets_old = buf_pool->n_page_gets;
-	buf_pool->n_pages_read_old = buf_pool->n_pages_read;
-	buf_pool->n_pages_created_old = buf_pool->n_pages_created;
-	buf_pool->n_pages_written_old = buf_pool->n_pages_written;
+	buf_pool->old_stat = buf_pool->stat;
 }
 
 /*********************************************************************//**
diff --git a/storage/innobase/buf/buf0lru.c b/storage/innobase/buf/buf0lru.c
index be53a5f5d9d..d3a79d62d3f 100644
--- a/storage/innobase/buf/buf0lru.c
+++ b/storage/innobase/buf/buf0lru.c
@@ -49,18 +49,22 @@ Created 11/5/1995 Heikki Tuuri
 #include "log0recv.h"
 #include "srv0srv.h"
 
-/** The number of blocks from the LRU_old pointer onward, including the block
-pointed to, must be 3/8 of the whole LRU list length, except that the
-tolerance defined below is allowed. Note that the tolerance must be small
-enough such that for even the BUF_LRU_OLD_MIN_LEN long LRU list, the
-LRU_old pointer is not allowed to point to either end of the LRU list. */
+/** The number of blocks from the LRU_old pointer onward, including
+the block pointed to, must be buf_LRU_old_ratio/BUF_LRU_OLD_RATIO_DIV
+of the whole LRU list length, except that the tolerance defined below
+is allowed. Note that the tolerance must be small enough such that for
+even the BUF_LRU_OLD_MIN_LEN long LRU list, the LRU_old pointer is not
+allowed to point to either end of the LRU list. */
 
 #define BUF_LRU_OLD_TOLERANCE	20
 
-/** The whole LRU list length is divided by this number to determine an
-initial segment in buf_LRU_get_recent_limit */
-
-#define BUF_LRU_INITIAL_RATIO	8
+/** The minimum amount of non-old blocks when the LRU_old list exists
+(that is, when there are more than BUF_LRU_OLD_MIN_LEN blocks).
+@see buf_LRU_old_adjust_len */
+#define BUF_LRU_NON_OLD_MIN_LEN	5
+#if BUF_LRU_NON_OLD_MIN_LEN >= BUF_LRU_OLD_MIN_LEN
+# error "BUF_LRU_NON_OLD_MIN_LEN >= BUF_LRU_OLD_MIN_LEN"
+#endif
 
 /** When dropping the search hash index entries before deleting an ibd
 file, we build a local array of pages belonging to that tablespace
@@ -107,6 +111,15 @@ UNIV_INTERN buf_LRU_stat_t	buf_LRU_stat_sum;
 
 /* @} */
 
+/** @name Heuristics for detecting index scan @{ */
+/** Reserve this much/BUF_LRU_OLD_RATIO_DIV of the buffer pool for
+"old" blocks.  Protected by buf_pool_mutex. */
+UNIV_INTERN uint	buf_LRU_old_ratio;
+/** Move blocks to "new" LRU list only if the first access was at
+least this many milliseconds ago.  Not protected by any mutex or latch. */
+UNIV_INTERN uint	buf_LRU_old_threshold_ms;
+/* @} */
+
 /******************************************************************//**
 Takes a block out of the LRU list and page hash table.
 If the block is compressed-only (BUF_BLOCK_ZIP_PAGE),
@@ -428,42 +441,6 @@ next_page:
 	}
 }
 
-/******************************************************************//**
-Gets the minimum LRU_position field for the blocks in an initial segment
-(determined by BUF_LRU_INITIAL_RATIO) of the LRU list. The limit is not
-guaranteed to be precise, because the ulint_clock may wrap around.
-@return	the limit; zero if could not determine it */
-UNIV_INTERN
-ulint
-buf_LRU_get_recent_limit(void)
-/*==========================*/
-{
-	const buf_page_t*	bpage;
-	ulint			len;
-	ulint			limit;
-
-	buf_pool_mutex_enter();
-
-	len = UT_LIST_GET_LEN(buf_pool->LRU);
-
-	if (len < BUF_LRU_OLD_MIN_LEN) {
-		/* The LRU list is too short to do read-ahead */
-
-		buf_pool_mutex_exit();
-
-		return(0);
-	}
-
-	bpage = UT_LIST_GET_FIRST(buf_pool->LRU);
-
-	limit = buf_page_get_LRU_position(bpage);
-	len /= BUF_LRU_INITIAL_RATIO;
-
-	buf_pool_mutex_exit();
-
-	return(limit > len ? (limit - len) : 0);
-}
-
 /********************************************************************//**
 Insert a compressed block into buf_pool->zip_clean in the LRU order. */
 UNIV_INTERN
@@ -594,6 +571,7 @@ buf_LRU_free_from_common_LRU_list(
 	     bpage = UT_LIST_GET_PREV(LRU, bpage), distance--) {
 
 		enum buf_lru_free_block_status	freed;
+		unsigned			accessed;
 		mutex_t*			block_mutex
 			= buf_page_get_mutex(bpage);
 
@@ -601,11 +579,18 @@ buf_LRU_free_from_common_LRU_list(
 		ut_ad(bpage->in_LRU_list);
 
 		mutex_enter(block_mutex);
+		accessed = buf_page_is_accessed(bpage);
 		freed = buf_LRU_free_block(bpage, TRUE, NULL);
 		mutex_exit(block_mutex);
 
 		switch (freed) {
 		case BUF_LRU_FREED:
+			/* Keep track of pages that are evicted without
+			ever being accessed. This gives us a measure of
+			the effectiveness of readahead */
+			if (!accessed) {
+				++buf_pool->stat.n_ra_pages_evicted;
+			}
 			return(TRUE);
 
 		case BUF_LRU_NOT_FREED:
@@ -953,8 +938,10 @@ buf_LRU_old_adjust_len(void)
 
 	ut_a(buf_pool->LRU_old);
 	ut_ad(buf_pool_mutex_own());
-#if 3 * (BUF_LRU_OLD_MIN_LEN / 8) <= BUF_LRU_OLD_TOLERANCE + 5
-# error "3 * (BUF_LRU_OLD_MIN_LEN / 8) <= BUF_LRU_OLD_TOLERANCE + 5"
+	ut_ad(buf_LRU_old_ratio >= BUF_LRU_OLD_RATIO_MIN);
+	ut_ad(buf_LRU_old_ratio <= BUF_LRU_OLD_RATIO_MAX);
+#if BUF_LRU_OLD_RATIO_MIN * BUF_LRU_OLD_MIN_LEN <= BUF_LRU_OLD_RATIO_DIV * (BUF_LRU_OLD_TOLERANCE + 5)
+# error "BUF_LRU_OLD_RATIO_MIN * BUF_LRU_OLD_MIN_LEN <= BUF_LRU_OLD_RATIO_DIV * (BUF_LRU_OLD_TOLERANCE + 5)"
 #endif
 #ifdef UNIV_LRU_DEBUG
 	/* buf_pool->LRU_old must be the first item in the LRU list
@@ -966,34 +953,39 @@ buf_LRU_old_adjust_len(void)
 	     || UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old)->old);
 #endif /* UNIV_LRU_DEBUG */
 
+	old_len = buf_pool->LRU_old_len;
+	new_len = ut_min(UT_LIST_GET_LEN(buf_pool->LRU)
+			 * buf_LRU_old_ratio / BUF_LRU_OLD_RATIO_DIV,
+			 UT_LIST_GET_LEN(buf_pool->LRU)
+			 - (BUF_LRU_OLD_TOLERANCE
+			    + BUF_LRU_NON_OLD_MIN_LEN));
+
 	for (;;) {
-		old_len = buf_pool->LRU_old_len;
-		new_len = 3 * (UT_LIST_GET_LEN(buf_pool->LRU) / 8);
+		buf_page_t*	LRU_old = buf_pool->LRU_old;
 
-		ut_ad(buf_pool->LRU_old->in_LRU_list);
-		ut_a(buf_pool->LRU_old);
+		ut_a(LRU_old);
+		ut_ad(LRU_old->in_LRU_list);
 #ifdef UNIV_LRU_DEBUG
-		ut_a(buf_pool->LRU_old->old);
+		ut_a(LRU_old->old);
 #endif /* UNIV_LRU_DEBUG */
 
 		/* Update the LRU_old pointer if necessary */
 
-		if (old_len < new_len - BUF_LRU_OLD_TOLERANCE) {
+		if (old_len + BUF_LRU_OLD_TOLERANCE < new_len) {
 
-			buf_pool->LRU_old = UT_LIST_GET_PREV(
-				LRU, buf_pool->LRU_old);
+			buf_pool->LRU_old = LRU_old = UT_LIST_GET_PREV(
+				LRU, LRU_old);
 #ifdef UNIV_LRU_DEBUG
-			ut_a(!buf_pool->LRU_old->old);
+			ut_a(!LRU_old->old);
 #endif /* UNIV_LRU_DEBUG */
-			buf_page_set_old(buf_pool->LRU_old, TRUE);
-			buf_pool->LRU_old_len++;
+			buf_page_set_old(LRU_old, TRUE);
+			old_len = ++buf_pool->LRU_old_len;
 
 		} else if (old_len > new_len + BUF_LRU_OLD_TOLERANCE) {
 
-			buf_page_set_old(buf_pool->LRU_old, FALSE);
-			buf_pool->LRU_old = UT_LIST_GET_NEXT(
-				LRU, buf_pool->LRU_old);
-			buf_pool->LRU_old_len--;
+			buf_page_set_old(LRU_old, FALSE);
+			buf_pool->LRU_old = UT_LIST_GET_NEXT(LRU, LRU_old);
+			old_len = --buf_pool->LRU_old_len;
 		} else {
 			return;
 		}
@@ -1021,6 +1013,7 @@ buf_LRU_old_init(void)
 
 	while (bpage != NULL) {
 		ut_ad(bpage->in_LRU_list);
+		ut_ad(buf_page_in_file(bpage));
 		buf_page_set_old(bpage, TRUE);
 		bpage = UT_LIST_GET_NEXT(LRU, bpage);
 	}
@@ -1075,16 +1068,19 @@ buf_LRU_remove_block(
 
 	if (UNIV_UNLIKELY(bpage == buf_pool->LRU_old)) {
 
-		/* Below: the previous block is guaranteed to exist, because
-		the LRU_old pointer is only allowed to differ by the
-		tolerance value from strict 3/8 of the LRU list length. */
+		/* Below: the previous block is guaranteed to exist,
+		because the LRU_old pointer is only allowed to differ
+		by BUF_LRU_OLD_TOLERANCE from strict
+		buf_LRU_old_ratio/BUF_LRU_OLD_RATIO_DIV of the LRU
+		list length. */
+		buf_page_t*	prev_bpage = UT_LIST_GET_PREV(LRU, bpage);
 
-		buf_pool->LRU_old = UT_LIST_GET_PREV(LRU, bpage);
-		ut_a(buf_pool->LRU_old);
+		ut_a(prev_bpage);
 #ifdef UNIV_LRU_DEBUG
-		ut_a(!buf_pool->LRU_old->old);
+		ut_a(!prev_bpage->old);
 #endif /* UNIV_LRU_DEBUG */
-		buf_page_set_old(buf_pool->LRU_old, TRUE);
+		buf_pool->LRU_old = prev_bpage;
+		buf_page_set_old(prev_bpage, TRUE);
 
 		buf_pool->LRU_old_len++;
 	}
@@ -1149,39 +1145,25 @@ buf_LRU_add_block_to_end_low(
 /*=========================*/
 	buf_page_t*	bpage)	/*!< in: control block */
 {
-	buf_page_t*	last_bpage;
-
 	ut_ad(buf_pool);
 	ut_ad(bpage);
 	ut_ad(buf_pool_mutex_own());
 
 	ut_a(buf_page_in_file(bpage));
 
-	last_bpage = UT_LIST_GET_LAST(buf_pool->LRU);
-
-	if (last_bpage) {
-		bpage->LRU_position = last_bpage->LRU_position;
-	} else {
-		bpage->LRU_position = buf_pool_clock_tic();
-	}
-
 	ut_ad(!bpage->in_LRU_list);
 	UT_LIST_ADD_LAST(LRU, buf_pool->LRU, bpage);
 	ut_d(bpage->in_LRU_list = TRUE);
 
 	buf_page_set_old(bpage, TRUE);
 
-	if (UT_LIST_GET_LEN(buf_pool->LRU) >= BUF_LRU_OLD_MIN_LEN) {
-
-		buf_pool->LRU_old_len++;
-	}
-
 	if (UT_LIST_GET_LEN(buf_pool->LRU) > BUF_LRU_OLD_MIN_LEN) {
 
 		ut_ad(buf_pool->LRU_old);
 
 		/* Adjust the length of the old block list if necessary */
 
+		buf_pool->LRU_old_len++;
 		buf_LRU_old_adjust_len();
 
 	} else if (UT_LIST_GET_LEN(buf_pool->LRU) == BUF_LRU_OLD_MIN_LEN) {
@@ -1189,6 +1171,7 @@ buf_LRU_add_block_to_end_low(
 		/* The LRU list is now long enough for LRU_old to become
 		defined: init it */
 
+		buf_pool->LRU_old_len++;
 		buf_LRU_old_init();
 	}
 
@@ -1222,7 +1205,6 @@ buf_LRU_add_block_low(
 
 		UT_LIST_ADD_FIRST(LRU, buf_pool->LRU, bpage);
 
-		bpage->LRU_position = buf_pool_clock_tic();
 		bpage->freed_page_clock = buf_pool->freed_page_clock;
 	} else {
 #ifdef UNIV_LRU_DEBUG
@@ -1237,11 +1219,6 @@ buf_LRU_add_block_low(
 		UT_LIST_INSERT_AFTER(LRU, buf_pool->LRU, buf_pool->LRU_old,
 				     bpage);
 		buf_pool->LRU_old_len++;
-
-		/* We copy the LRU position field of the previous block
-		to the new block */
-
-		bpage->LRU_position = (buf_pool->LRU_old)->LRU_position;
 	}
 
 	ut_d(bpage->in_LRU_list = TRUE);
@@ -1295,6 +1272,12 @@ buf_LRU_make_block_young(
 /*=====================*/
 	buf_page_t*	bpage)	/*!< in: control block */
 {
+	ut_ad(buf_pool_mutex_own());
+
+	if (bpage->old) {
+		buf_pool->stat.n_pages_made_young++;
+	}
+
 	buf_LRU_remove_block(bpage);
 	buf_LRU_add_block_low(bpage, FALSE);
 }
@@ -1847,6 +1830,50 @@ buf_LRU_block_free_hashed_page(
 	buf_LRU_block_free_non_file_page(block);
 }
 
+/**********************************************************************//**
+Updates buf_LRU_old_ratio.
+@return	updated old_pct */
+UNIV_INTERN
+uint
+buf_LRU_old_ratio_update(
+/*=====================*/
+	uint	old_pct,/*!< in: Reserve this percentage of
+			the buffer pool for "old" blocks. */
+	ibool	adjust)	/*!< in: TRUE=adjust the LRU list;
+			FALSE=just assign buf_LRU_old_ratio
+			during the initialization of InnoDB */
+{
+	uint	ratio;
+
+	ratio = old_pct * BUF_LRU_OLD_RATIO_DIV / 100;
+	if (ratio < BUF_LRU_OLD_RATIO_MIN) {
+		ratio = BUF_LRU_OLD_RATIO_MIN;
+	} else if (ratio > BUF_LRU_OLD_RATIO_MAX) {
+		ratio = BUF_LRU_OLD_RATIO_MAX;
+	}
+
+	if (adjust) {
+		buf_pool_mutex_enter();
+
+		if (ratio != buf_LRU_old_ratio) {
+			buf_LRU_old_ratio = ratio;
+
+			if (UT_LIST_GET_LEN(buf_pool->LRU)
+			    >= BUF_LRU_OLD_MIN_LEN) {
+				buf_LRU_old_adjust_len();
+			}
+		}
+
+		buf_pool_mutex_exit();
+	} else {
+		buf_LRU_old_ratio = ratio;
+	}
+
+	/* the reverse of 
+	ratio = old_pct * BUF_LRU_OLD_RATIO_DIV / 100 */
+	return((uint) (ratio * 100 / (double) BUF_LRU_OLD_RATIO_DIV + 0.5));
+}
+
 /********************************************************************//**
 Update the historical stats that we are collecting for LRU eviction
 policy at the end of each interval. */
@@ -1896,7 +1923,6 @@ buf_LRU_validate(void)
 	buf_block_t*	block;
 	ulint		old_len;
 	ulint		new_len;
-	ulint		LRU_pos;
 
 	ut_ad(buf_pool);
 	buf_pool_mutex_enter();
@@ -1905,7 +1931,11 @@ buf_LRU_validate(void)
 
 		ut_a(buf_pool->LRU_old);
 		old_len = buf_pool->LRU_old_len;
-		new_len = 3 * (UT_LIST_GET_LEN(buf_pool->LRU) / 8);
+		new_len = ut_min(UT_LIST_GET_LEN(buf_pool->LRU)
+				 * buf_LRU_old_ratio / BUF_LRU_OLD_RATIO_DIV,
+				 UT_LIST_GET_LEN(buf_pool->LRU)
+				 - (BUF_LRU_OLD_TOLERANCE
+				    + BUF_LRU_NON_OLD_MIN_LEN));
 		ut_a(old_len >= new_len - BUF_LRU_OLD_TOLERANCE);
 		ut_a(old_len <= new_len + BUF_LRU_OLD_TOLERANCE);
 	}
@@ -1943,16 +1973,7 @@ buf_LRU_validate(void)
 			ut_a(buf_pool->LRU_old == bpage);
 		}
 
-		LRU_pos	= buf_page_get_LRU_position(bpage);
-
 		bpage = UT_LIST_GET_NEXT(LRU, bpage);
-
-		if (bpage) {
-			/* If the following assert fails, it may
-			not be an error: just the buf_pool clock
-			has wrapped around */
-			ut_a(LRU_pos >= buf_page_get_LRU_position(bpage));
-		}
 	}
 
 	if (buf_pool->LRU_old) {
@@ -2000,9 +2021,6 @@ buf_LRU_print(void)
 	ut_ad(buf_pool);
 	buf_pool_mutex_enter();
 
-	fprintf(stderr, "Pool ulint clock %lu\n",
-		(ulong) buf_pool->ulint_clock);
-
 	bpage = UT_LIST_GET_FIRST(buf_pool->LRU);
 
 	while (bpage != NULL) {
@@ -2033,18 +2051,16 @@ buf_LRU_print(void)
 			const byte*	frame;
 		case BUF_BLOCK_FILE_PAGE:
 			frame = buf_block_get_frame((buf_block_t*) bpage);
-			fprintf(stderr, "\nLRU pos %lu type %lu"
+			fprintf(stderr, "\ntype %lu"
 				" index id %lu\n",
-				(ulong) buf_page_get_LRU_position(bpage),
 				(ulong) fil_page_get_type(frame),
 				(ulong) ut_dulint_get_low(
 					btr_page_get_index_id(frame)));
 			break;
 		case BUF_BLOCK_ZIP_PAGE:
 			frame = bpage->zip.data;
-			fprintf(stderr, "\nLRU pos %lu type %lu size %lu"
+			fprintf(stderr, "\ntype %lu size %lu"
 				" index id %lu\n",
-				(ulong) buf_page_get_LRU_position(bpage),
 				(ulong) fil_page_get_type(frame),
 				(ulong) buf_page_get_zip_size(bpage),
 				(ulong) ut_dulint_get_low(
@@ -2052,8 +2068,7 @@ buf_LRU_print(void)
 			break;
 
 		default:
-			fprintf(stderr, "\nLRU pos %lu !state %lu!\n",
-				(ulong) buf_page_get_LRU_position(bpage),
+			fprintf(stderr, "\n!state %lu!\n",
 				(ulong) buf_page_get_state(bpage));
 			break;
 		}
diff --git a/storage/innobase/buf/buf0rea.c b/storage/innobase/buf/buf0rea.c
index 319d6b2a522..dd98ea17eb5 100644
--- a/storage/innobase/buf/buf0rea.c
+++ b/storage/innobase/buf/buf0rea.c
@@ -38,14 +38,6 @@ Created 11/5/1995 Heikki Tuuri
 #include "srv0start.h"
 #include "srv0srv.h"
 
-/** The size in blocks of the area where the random read-ahead algorithm counts
-the accessed pages when deciding whether to read-ahead */
-#define	BUF_READ_AHEAD_RANDOM_AREA	BUF_READ_AHEAD_AREA
-
-/** There must be at least this many pages in buf_pool in the area to start
-a random read-ahead */
-#define BUF_READ_AHEAD_RANDOM_THRESHOLD	(1 + BUF_READ_AHEAD_RANDOM_AREA / 2)
-
 /** The linear read-ahead area size */
 #define	BUF_READ_AHEAD_LINEAR_AREA	BUF_READ_AHEAD_AREA
 
@@ -62,7 +54,8 @@ flag is cleared and the x-lock released by an i/o-handler thread.
 @return 1 if a read request was queued, 0 if the page already resided
 in buf_pool, or if the page is in the doublewrite buffer blocks in
 which case it is never read into the pool, or if the tablespace does
-not exist or is being dropped */
+not exist or is being dropped 
+@return 1 if read request is issued. 0 if it is not */
 static
 ulint
 buf_read_page_low(
@@ -165,174 +158,13 @@ buf_read_page_low(
 }
 
 /********************************************************************//**
-Applies a random read-ahead in buf_pool if there are at least a threshold
-value of accessed pages from the random read-ahead area. Does not read any
-page, not even the one at the position (space, offset), if the read-ahead
-mechanism is not activated. NOTE 1: the calling thread may own latches on
-pages: to avoid deadlocks this function must be written such that it cannot
-end up waiting for these latches! NOTE 2: the calling thread must want
-access to the page given: this rule is set to prevent unintended read-aheads
-performed by ibuf routines, a situation which could result in a deadlock if
-the OS does not support asynchronous i/o.
-@return number of page read requests issued; NOTE that if we read ibuf
-pages, it may happen that the page at the given page number does not
-get read even if we return a positive value! */
-static
-ulint
-buf_read_ahead_random(
-/*==================*/
-	ulint	space,	/*!< in: space id */
-	ulint	zip_size,/*!< in: compressed page size in bytes, or 0 */
-	ulint	offset)	/*!< in: page number of a page which the current thread
-			wants to access */
-{
-	ib_int64_t	tablespace_version;
-	ulint		recent_blocks	= 0;
-	ulint		count;
-	ulint		LRU_recent_limit;
-	ulint		ibuf_mode;
-	ulint		low, high;
-	ulint		err;
-	ulint		i;
-	ulint		buf_read_ahead_random_area;
-
-	/* We have currently disabled random readahead */
-	return(0);
-
-	if (srv_startup_is_before_trx_rollback_phase) {
-		/* No read-ahead to avoid thread deadlocks */
-		return(0);
-	}
-
-	if (ibuf_bitmap_page(zip_size, offset)
-	    || trx_sys_hdr_page(space, offset)) {
-
-		/* If it is an ibuf bitmap page or trx sys hdr, we do
-		no read-ahead, as that could break the ibuf page access
-		order */
-
-		return(0);
-	}
-
-	/* Remember the tablespace version before we ask te tablespace size
-	below: if DISCARD + IMPORT changes the actual .ibd file meanwhile, we
-	do not try to read outside the bounds of the tablespace! */
-
-	tablespace_version = fil_space_get_version(space);
-
-	buf_read_ahead_random_area = BUF_READ_AHEAD_RANDOM_AREA;
-
-	low  = (offset / buf_read_ahead_random_area)
-		* buf_read_ahead_random_area;
-	high = (offset / buf_read_ahead_random_area + 1)
-		* buf_read_ahead_random_area;
-	if (high > fil_space_get_size(space)) {
-
-		high = fil_space_get_size(space);
-	}
-
-	/* Get the minimum LRU_position field value for an initial segment
-	of the LRU list, to determine which blocks have recently been added
-	to the start of the list. */
-
-	LRU_recent_limit = buf_LRU_get_recent_limit();
-
-	buf_pool_mutex_enter();
-
-	if (buf_pool->n_pend_reads
-	    > buf_pool->curr_size / BUF_READ_AHEAD_PEND_LIMIT) {
-		buf_pool_mutex_exit();
-
-		return(0);
-	}
-
-	/* Count how many blocks in the area have been recently accessed,
-	that is, reside near the start of the LRU list. */
-
-	for (i = low; i < high; i++) {
-		const buf_page_t*	bpage = buf_page_hash_get(space, i);
-
-		if (bpage
-		    && buf_page_is_accessed(bpage)
-		    && (buf_page_get_LRU_position(bpage) > LRU_recent_limit)) {
-
-			recent_blocks++;
-
-			if (recent_blocks >= BUF_READ_AHEAD_RANDOM_THRESHOLD) {
-
-				buf_pool_mutex_exit();
-				goto read_ahead;
-			}
-		}
-	}
-
-	buf_pool_mutex_exit();
-	/* Do nothing */
-	return(0);
-
-read_ahead:
-	/* Read all the suitable blocks within the area */
-
-	if (ibuf_inside()) {
-		ibuf_mode = BUF_READ_IBUF_PAGES_ONLY;
-	} else {
-		ibuf_mode = BUF_READ_ANY_PAGE;
-	}
-
-	count = 0;
-
-	for (i = low; i < high; i++) {
-		/* It is only sensible to do read-ahead in the non-sync aio
-		mode: hence FALSE as the first parameter */
-
-		if (!ibuf_bitmap_page(zip_size, i)) {
-			count += buf_read_page_low(
-				&err, FALSE,
-				ibuf_mode | OS_AIO_SIMULATED_WAKE_LATER,
-				space, zip_size, FALSE,
-				tablespace_version, i);
-			if (err == DB_TABLESPACE_DELETED) {
-				ut_print_timestamp(stderr);
-				fprintf(stderr,
-					"  InnoDB: Warning: in random"
-					" readahead trying to access\n"
-					"InnoDB: tablespace %lu page %lu,\n"
-					"InnoDB: but the tablespace does not"
-					" exist or is just being dropped.\n",
-					(ulong) space, (ulong) i);
-			}
-		}
-	}
-
-	/* In simulated aio we wake the aio handler threads only after
-	queuing all aio requests, in native aio the following call does
-	nothing: */
-
-	os_aio_simulated_wake_handler_threads();
-
-#ifdef UNIV_DEBUG
-	if (buf_debug_prints && (count > 0)) {
-		fprintf(stderr,
-			"Random read-ahead space %lu offset %lu pages %lu\n",
-			(ulong) space, (ulong) offset,
-			(ulong) count);
-	}
-#endif /* UNIV_DEBUG */
-
-	++srv_read_ahead_rnd;
-	return(count);
-}
-
-/********************************************************************//**
 High-level function which reads a page asynchronously from a file to the
 buffer buf_pool if it is not already there. Sets the io_fix flag and sets
 an exclusive lock on the buffer frame. The flag is cleared and the x-lock
-released by the i/o-handler thread. Does a random read-ahead if it seems
-sensible.
-@return number of page read requests issued: this can be greater than
-1 if read-ahead occurred */
+released by the i/o-handler thread.
+@return TRUE if page has been read in, FALSE in case of failure */
 UNIV_INTERN
-ulint
+ibool
 buf_read_page(
 /*==========*/
 	ulint	space,	/*!< in: space id */
@@ -341,20 +173,17 @@ buf_read_page(
 {
 	ib_int64_t	tablespace_version;
 	ulint		count;
-	ulint		count2;
 	ulint		err;
 
 	tablespace_version = fil_space_get_version(space);
 
-	count = buf_read_ahead_random(space, zip_size, offset);
-
 	/* We do the i/o in the synchronous aio mode to save thread
 	switches: hence TRUE */
 
-	count2 = buf_read_page_low(&err, TRUE, BUF_READ_ANY_PAGE, space,
-				   zip_size, FALSE,
-				   tablespace_version, offset);
-	srv_buf_pool_reads+= count2;
+	count = buf_read_page_low(&err, TRUE, BUF_READ_ANY_PAGE, space,
+				  zip_size, FALSE,
+				  tablespace_version, offset);
+	srv_buf_pool_reads += count;
 	if (err == DB_TABLESPACE_DELETED) {
 		ut_print_timestamp(stderr);
 		fprintf(stderr,
@@ -371,14 +200,14 @@ buf_read_page(
 	/* Increment number of I/O operations used for LRU policy. */
 	buf_LRU_stat_inc_io();
 
-	return(count + count2);
+	return(count > 0);
 }
 
 /********************************************************************//**
 Applies linear read-ahead if in the buf_pool the page is a border page of
 a linear read-ahead area and all the pages in the area have been accessed.
 Does not read any page if the read-ahead mechanism is not activated. Note
-that the the algorithm looks at the 'natural' adjacent successor and
+that the algorithm looks at the 'natural' adjacent successor and
 predecessor of the page, which on the leaf level of a B-tree are the next
 and previous page in the chain of leaves. To know these, the page specified
 in (space, offset) must already be present in the buf_pool. Thus, the
@@ -498,9 +327,17 @@ buf_read_ahead_linear(
 			fail_count++;
 
 		} else if (pred_bpage) {
-			int res = (ut_ulint_cmp(
-				       buf_page_get_LRU_position(bpage),
-				       buf_page_get_LRU_position(pred_bpage)));
+			/* Note that buf_page_is_accessed() returns
+			the time of the first access.  If some blocks
+			of the extent existed in the buffer pool at
+			the time of a linear access pattern, the first
+			access times may be nonmonotonic, even though
+			the latest access times were linear.  The
+			threshold (srv_read_ahead_factor) should help
+			a little against this. */
+			int res = ut_ulint_cmp(
+				buf_page_is_accessed(bpage),
+				buf_page_is_accessed(pred_bpage));
 			/* Accesses not in the right order */
 			if (res != 0 && res != asc_or_desc) {
 				fail_count++;
@@ -643,7 +480,7 @@ buf_read_ahead_linear(
 	LRU policy decision. */
 	buf_LRU_stat_inc_io();
 
-	++srv_read_ahead_seq;
+	buf_pool->stat.n_ra_pages_read += count;
 	return(count);
 }