Added Innobase to source distribution

Docs/manual.texi: Added Innobase documentation configure.in: Incremented version include/my_base.h: Added option for Innobase myisam/mi_check.c: cleanup mysql-test/t/bdb.test: cleanup mysql-test/t/innobase.test: Extended with new tests from bdb.test mysql-test/t/merge.test: Added test of SHOW create mysys/my_init.c: Fix for UNIXWARE 7 scripts/mysql_install_db.sh: Always write how to start mysqld scripts/safe_mysqld.sh: Fixed type sql/ha_innobase.cc: Update to new version sql/ha_innobase.h: Update to new version sql/handler.h: Added 'update_table_comment()' and 'append_create_info()' sql/sql_delete.cc: Fixes for Innobase sql/sql_select.cc: Fixes for Innobase sql/sql_show.cc: Append create information (for MERGE tables) sql/sql_update.cc: Fixes for Innobase
author: unknown <monty@donna.mysql.com> 2001-02-17 14:19:19 +0200
committer: unknown <monty@donna.mysql.com> 2001-02-17 14:19:19 +0200
commit: 2662b59306ef0cd495fa6e2edf7129e58a11393a (patch)
tree: bfe39951a73e906579ab819bf5198ad8f3a64a36 /innobase/buf
parent: 66de55a56bdcf2f7a9c0c4f8e19b3e761475e202 (diff)
download: mariadb-git-2662b59306ef0cd495fa6e2edf7129e58a11393a.tar.gz
9 files changed, 4697 insertions, 0 deletions
diff --git a/innobase/buf/Makefile.am b/innobase/buf/Makefile.am
new file mode 100644
index 00000000000..b1463c2220e
--- /dev/null
+++ b/innobase/buf/Makefile.am
@@ -0,0 +1,24 @@
+# Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+# & Innobase Oy
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+include ../include/Makefile.i
+
+libs_LIBRARIES =	libbuf.a
+
+libbuf_a_SOURCES =	buf0buf.c buf0flu.c buf0lru.c buf0rea.c
+
+EXTRA_PROGRAMS =	
diff --git a/innobase/buf/buf0buf.c b/innobase/buf/buf0buf.c
new file mode 100644
index 00000000000..4ffda8772f3
--- /dev/null
+++ b/innobase/buf/buf0buf.c
@@ -0,0 +1,1568 @@
+/*   Innobase relational database engine; Copyright (C) 2001 Innobase Oy
+     
+     This program is free software; you can redistribute it and/or modify
+     it under the terms of the GNU General Public License 2
+     as published by the Free Software Foundation in June 1991.
+     
+     This program is distributed in the hope that it will be useful,
+     but WITHOUT ANY WARRANTY; without even the implied warranty of
+     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+     GNU General Public License for more details.
+     
+     You should have received a copy of the GNU General Public License 2
+     along with this program (in file COPYING); if not, write to the Free
+     Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+/******************************************************
+The database buffer buf_pool
+
+(c) 1995 Innobase Oy
+
+Created 11/5/1995 Heikki Tuuri
+*******************************************************/
+
+#include "buf0buf.h"
+
+#ifdef UNIV_NONINL
+#include "buf0buf.ic"
+#endif
+
+#include "mem0mem.h"
+#include "btr0btr.h"
+#include "fil0fil.h"
+#include "lock0lock.h"
+#include "btr0sea.h"
+#include "ibuf0ibuf.h"
+#include "dict0dict.h"
+#include "log0recv.h"
+
+/*
+		IMPLEMENTATION OF THE BUFFER POOL
+		=================================
+
+Performance improvement: 
+------------------------
+Thread scheduling in NT may be so slow that the OS wait mechanism should
+not be used even in waiting for disk reads to complete.
+Rather, we should put waiting query threads to the queue of
+waiting jobs, and let the OS thread do something useful while the i/o
+is processed. In this way we could remove most OS thread switches in
+an i/o-intensive benchmark like TPC-C.
+
+A possibility is to put a user space thread library between the database
+and NT. User space thread libraries might be very fast.
+
+SQL Server 7.0 can be configured to use 'fibers' which are lightweight
+threads in NT. These should be studied.
+
+		Buffer frames and blocks
+		------------------------
+Following the terminology of Gray and Reuter, we call the memory
+blocks where file pages are loaded buffer frames. For each buffer
+frame there is a control block, or shortly, a block, in the buffer
+control array. The control info which does not need to be stored
+in the file along with the file page, resides in the control block.
+
+		Buffer pool struct
+		------------------
+The buffer buf_pool contains a single mutex which protects all the
+control data structures of the buf_pool. The content of a buffer frame is
+protected by a separate read-write lock in its control block, though.
+These locks can be locked and unlocked without owning the buf_pool mutex.
+The OS events in the buf_pool struct can be waited for without owning the
+buf_pool mutex.
+
+The buf_pool mutex is a hot-spot in main memory, causing a lot of
+memory bus traffic on multiprocessor systems when processors
+alternately access the mutex. On our Pentium, the mutex is accessed
+maybe every 10 microseconds. We gave up the solution to have mutexes
+for each control block, for instance, because it seemed to be
+complicated.
+
+A solution to reduce mutex contention of the buf_pool mutex is to
+create a separate mutex for the page hash table. On Pentium,
+accessing the hash table takes 2 microseconds, about half
+of the total buf_pool mutex hold time.
+
+		Control blocks
+		--------------
+
+The control block contains, for instance, the bufferfix count
+which is incremented when a thread wants a file page to be fixed
+in a buffer frame. The bufferfix operation does not lock the
+contents of the frame, however. For this purpose, the control
+block contains a read-write lock.
+
+The buffer frames have to be aligned so that the start memory
+address of a frame is divisible by the universal page size, which
+is a power of two.
+
+We intend to make the buffer buf_pool size on-line reconfigurable,
+that is, the buf_pool size can be changed without closing the database.
+Then the database administarator may adjust it to be bigger
+at night, for example. The control block array must
+contain enough control blocks for the maximum buffer buf_pool size
+which is used in the particular database.
+If the buf_pool size is cut, we exploit the virtual memory mechanism of
+the OS, and just refrain from using frames at high addresses. Then the OS
+can swap them to disk.
+
+The control blocks containing file pages are put to a hash table
+according to the file address of the page.
+We could speed up the access to an individual page by using
+"pointer swizzling": we could replace the page references on
+non-leaf index pages by direct pointers to the page, if it exists
+in the buf_pool. We could make a separate hash table where we could
+chain all the page references in non-leaf pages residing in the buf_pool,
+using the page reference as the hash key,
+and at the time of reading of a page update the pointers accordingly.
+Drawbacks of this solution are added complexity and,
+possibly, extra space required on non-leaf pages for memory pointers.
+A simpler solution is just to speed up the hash table mechanism
+in the database, using tables whose size is a power of 2.
+
+		Lists of blocks
+		---------------
+
+There are several lists of control blocks. The free list contains
+blocks which are currently not used.
+
+The LRU-list contains all the blocks holding a file page
+except those for which the bufferfix count is non-zero.
+The pages are in the LRU list roughly in the order of the last
+access to the page, so that the oldest pages are at the end of the
+list. We also keep a pointer to near the end of the LRU list,
+which we can use when we want to artificially age a page in the
+buf_pool. This is used if we know that some page is not needed
+again for some time: we insert the block right after the pointer,
+causing it to be replaced sooner than would noramlly be the case.
+Currently this aging mechanism is used for read-ahead mechanism
+of pages, and it can also be used when there is a scan of a full
+table which cannot fit in the memory. Putting the pages near the
+of the LRU list, we make sure that most of the buf_pool stays in the
+main memory, undisturbed.
+
+The chain of modified blocks contains the blocks
+holding file pages that have been modified in the memory
+but not written to disk yet. The block with the oldest modification
+which has not yet been written to disk is at the end of the chain.
+
+		Loading a file page
+		-------------------
+
+First, a victim block for replacement has to be found in the
+buf_pool. It is taken from the free list or searched for from the
+end of the LRU-list. An exclusive lock is reserved for the frame,
+the io_fix field is set in the block fixing the block in buf_pool,
+and the io-operation for loading the page is queued. The io-handler thread
+releases the X-lock on the frame and resets the io_fix field
+when the io operation completes.
+
+A thread may request the above operation using the buf_page_get-
+function. It may then continue to request a lock on the frame.
+The lock is granted when the io-handler releases the x-lock.
+
+		Read-ahead
+		----------
+
+The read-ahead mechanism is intended to be intelligent and
+isolated from the semantically higher levels of the database
+index management. From the higher level we only need the
+information if a file page has a natural successor or
+predecessor page. On the leaf level of a B-tree index,
+these are the next and previous pages in the natural
+order of the pages.
+
+Let us first explain the read-ahead mechanism when the leafs
+of a B-tree are scanned in an ascending or descending order.
+When a read page is the first time referenced in the buf_pool,
+the buffer manager checks if it is at the border of a so-called
+linear read-ahead area. The tablespace is divided into these
+areas of size 64 blocks, for example. So if the page is at the
+border of such an area, the read-ahead mechanism checks if
+all the other blocks in the area have been accessed in an
+ascending or descending order. If this is the case, the system
+looks at the natural successor or predecessor of the page,
+checks if that is at the border of another area, and in this case
+issues read-requests for all the pages in that area. Maybe
+we could relax the condition that all the pages in the area
+have to be accessed: if data is deleted from a table, there may
+appear holes of unused pages in the area.
+
+A different read-ahead mechanism is used when there appears
+to be a random access pattern to a file.
+If a new page is referenced in the buf_pool, and several pages
+of its random access area (for instance, 32 consecutive pages
+in a tablespace) have recently been referenced, we may predict
+that the whole area may be needed in the near future, and issue
+the read requests for the whole area. */
+
+buf_pool_t*	buf_pool = NULL; /* The buffer buf_pool of the database */
+
+ulint		buf_dbg_counter	= 0; /* This is used to insert validation
+					operations in excution in the
+					debug version */
+ibool		buf_debug_prints = FALSE; /* If this is set TRUE,
+					the program prints info whenever
+					read-ahead or flush occurs */
+					
+/************************************************************************
+Initializes a buffer control block when the buf_pool is created. */
+static
+void
+buf_block_init(
+/*===========*/
+	buf_block_t*	block,	/* in: pointer to control block */
+	byte*		frame)	/* in: pointer to buffer frame */
+{
+	block->state = BUF_BLOCK_NOT_USED;
+	
+	block->frame = frame;
+
+	block->modify_clock = ut_dulint_zero;
+	
+	rw_lock_create(&(block->lock));
+	ut_ad(rw_lock_validate(&(block->lock)));
+
+	rw_lock_create(&(block->read_lock));
+	rw_lock_set_level(&(block->read_lock), SYNC_NO_ORDER_CHECK);
+	
+	rw_lock_create(&(block->debug_latch));
+	rw_lock_set_level(&(block->debug_latch), SYNC_NO_ORDER_CHECK);
+}
+
+/************************************************************************
+Creates a buffer buf_pool object. */
+static
+buf_pool_t*
+buf_pool_create(
+/*============*/
+				/* out, own: buf_pool object, NULL if not
+				enough memory */
+	ulint	max_size,	/* in: maximum size of the buf_pool in
+				blocks */
+	ulint	curr_size)	/* in: current size to use, must be <=
+				max_size, currently must be equal to
+				max_size */
+{
+	byte*		frame;
+	ulint		i;
+	buf_block_t*	block;
+	
+	ut_a(max_size == curr_size);
+	
+	buf_pool = mem_alloc(sizeof(buf_pool_t));
+
+	/* 1. Initialize general fields
+	   ---------------------------- */
+	mutex_create(&(buf_pool->mutex));
+	mutex_set_level(&(buf_pool->mutex), SYNC_BUF_POOL);
+
+	mutex_enter(&(buf_pool->mutex));
+	
+	buf_pool->frame_mem = ut_malloc(UNIV_PAGE_SIZE * (max_size + 1));
+
+	if (buf_pool->frame_mem == NULL) {
+
+		return(NULL);
+	}
+
+	buf_pool->blocks = ut_malloc(sizeof(buf_block_t) * max_size);
+
+	if (buf_pool->blocks == NULL) {
+
+		return(NULL);
+	}
+
+	buf_pool->max_size = max_size;
+	buf_pool->curr_size = curr_size;
+
+	/* Align pointer to the first frame */
+
+	frame = ut_align(buf_pool->frame_mem, UNIV_PAGE_SIZE);
+	buf_pool->frame_zero = frame;
+
+	/* Init block structs and assign frames for them */
+	for (i = 0; i < max_size; i++) {
+
+		block = buf_pool_get_nth_block(buf_pool, i);
+		buf_block_init(block, frame);
+		frame = frame + UNIV_PAGE_SIZE;
+	}
+	
+	buf_pool->page_hash = hash_create(2 * max_size);
+
+	buf_pool->n_pend_reads = 0;
+	buf_pool->n_pages_read = 0;
+	buf_pool->n_pages_written = 0;
+	buf_pool->n_pages_created = 0;
+
+	/* 2. Initialize flushing fields
+	   ---------------------------- */
+	UT_LIST_INIT(buf_pool->flush_list);
+
+	for (i = BUF_FLUSH_LRU; i <= BUF_FLUSH_LIST; i++) {
+		buf_pool->n_flush[i] = 0;
+		buf_pool->no_flush[i] = os_event_create(NULL);
+	}
+
+	buf_pool->LRU_flush_ended = 0;
+
+	buf_pool->ulint_clock = 1;
+	buf_pool->freed_page_clock = 0;
+	
+	/* 3. Initialize LRU fields
+	   ---------------------------- */
+	UT_LIST_INIT(buf_pool->LRU);
+
+	buf_pool->LRU_old = NULL;
+
+	/* Add control blocks to the free list */
+	UT_LIST_INIT(buf_pool->free);
+	for (i = 0; i < curr_size; i++) {
+
+		block = buf_pool_get_nth_block(buf_pool, i);
+		UT_LIST_ADD_FIRST(free, buf_pool->free, block);
+	}
+
+	mutex_exit(&(buf_pool->mutex));
+
+	btr_search_sys_create(curr_size * UNIV_PAGE_SIZE / sizeof(void*) / 64);
+
+	return(buf_pool);
+}	
+
+/************************************************************************
+Initializes the buffer buf_pool of the database. */
+
+void
+buf_pool_init(
+/*==========*/
+	ulint	max_size,	/* in: maximum size of the buf_pool in blocks */
+	ulint	curr_size)	/* in: current size to use, must be <=
+				max_size */
+{
+	ut_a(buf_pool == NULL);
+
+	buf_pool_create(max_size, curr_size);
+
+	ut_ad(buf_validate());
+}
+
+/************************************************************************
+Allocates a buffer block. */
+UNIV_INLINE
+buf_block_t*
+buf_block_alloc(void)
+/*=================*/
+				/* out, own: the allocated block */
+{
+	buf_block_t*	block;
+
+	block = buf_LRU_get_free_block();
+
+	return(block);
+}
+
+/************************************************************************
+Moves to the block to the start of the LRU list if there is a danger
+that the block would drift out of the buffer pool. */
+UNIV_INLINE
+void
+buf_block_make_young(
+/*=================*/
+	buf_block_t*	block)	/* in: block to make younger */
+{
+	if (buf_pool->freed_page_clock >= block->freed_page_clock 
+				+ 1 + (buf_pool->curr_size / 1024)) {
+
+		/* There has been freeing activity in the LRU list:
+		best to move to the head of the LRU list */
+
+		buf_LRU_make_block_young(block);
+	}
+}
+
+/************************************************************************
+Moves a page to the start of the buffer pool LRU list. This high-level
+function can be used to prevent an important page from from slipping out of
+the buffer pool. */
+
+void
+buf_page_make_young(
+/*=================*/
+	buf_frame_t*	frame)	/* in: buffer frame of a file page */
+{
+	buf_block_t*	block;
+	
+	mutex_enter(&(buf_pool->mutex));
+
+	block = buf_block_align(frame);
+
+	ut_ad(block->state == BUF_BLOCK_FILE_PAGE);
+
+	buf_LRU_make_block_young(block);
+
+	mutex_exit(&(buf_pool->mutex));
+}
+
+/************************************************************************
+Frees a buffer block which does not contain a file page. */
+UNIV_INLINE
+void
+buf_block_free(
+/*===========*/
+	buf_block_t*	block)	/* in, own: block to be freed */
+{
+	ut_ad(block->state != BUF_BLOCK_FILE_PAGE);
+
+	mutex_enter(&(buf_pool->mutex));
+
+	buf_LRU_block_free_non_file_page(block);
+
+	mutex_exit(&(buf_pool->mutex));
+}
+
+/*************************************************************************
+Allocates a buffer frame. */
+
+buf_frame_t*
+buf_frame_alloc(void)
+/*=================*/
+				/* out: buffer frame */
+{
+	return(buf_block_alloc()->frame);
+}
+
+/*************************************************************************
+Frees a buffer frame which does not contain a file page. */
+
+void
+buf_frame_free(
+/*===========*/
+	buf_frame_t*	frame)	/* in: buffer frame */
+{
+	buf_block_free(buf_block_align(frame));
+}
+	
+/************************************************************************
+Returns the buffer control block if the page can be found in the buffer
+pool. NOTE that it is possible that the page is not yet read
+from disk, though. This is a very low-level function: use with care! */
+
+buf_block_t*
+buf_page_peek_block(
+/*================*/
+			/* out: control block if found from page hash table,
+			otherwise NULL; NOTE that the page is not necessarily
+			yet read from disk! */
+	ulint	space,	/* in: space id */
+	ulint	offset)	/* in: page number */
+{
+	buf_block_t*	block;
+
+	mutex_enter_fast(&(buf_pool->mutex));
+
+	block = buf_page_hash_get(space, offset);
+
+	mutex_exit(&(buf_pool->mutex));
+
+	return(block);
+}
+
+/************************************************************************
+Returns the current state of is_hashed of a page. FALSE if the page is
+not in the pool. NOTE that this operation does not fix the page in the
+pool if it is found there. */
+
+ibool
+buf_page_peek_if_search_hashed(
+/*===========================*/
+			/* out: TRUE if page hash index is built in search
+			system */
+	ulint	space,	/* in: space id */
+	ulint	offset)	/* in: page number */
+{
+	buf_block_t*	block;
+	ibool		is_hashed;
+
+	mutex_enter_fast(&(buf_pool->mutex));
+
+	block = buf_page_hash_get(space, offset);
+
+	if (!block) {
+		is_hashed = FALSE;
+	} else {
+		is_hashed = block->is_hashed;
+	}
+
+	mutex_exit(&(buf_pool->mutex));
+
+	return(is_hashed);
+}
+
+/************************************************************************
+Returns TRUE if the page can be found in the buffer pool hash table. NOTE
+that it is possible that the page is not yet read from disk, though. */
+
+ibool
+buf_page_peek(
+/*==========*/
+			/* out: TRUE if found from page hash table,
+			NOTE that the page is not necessarily yet read
+			from disk! */
+	ulint	space,	/* in: space id */
+	ulint	offset)	/* in: page number */
+{
+	if (buf_page_peek_block(space, offset)) {
+
+		return(TRUE);
+	}
+
+	return(FALSE);
+}
+
+/************************************************************************
+This is the general function used to get access to a database page. */
+
+buf_frame_t*
+buf_page_get_gen(
+/*=============*/
+				/* out: pointer to the frame or NULL */
+	ulint		space,	/* in: space id */
+	ulint		offset,	/* in: page number */
+	ulint		rw_latch,/* in: RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */
+	buf_frame_t*	guess,	/* in: guessed frame or NULL */
+	ulint		mode,	/* in: BUF_GET, BUF_GET_IF_IN_POOL,
+				BUF_GET_NO_LATCH, BUF_GET_NOWAIT */
+#ifdef UNIV_SYNC_DEBUG
+	char*		file,	/* in: file name */
+	ulint		line,	/* in: line where called */
+#endif
+	mtr_t*		mtr)	/* in: mini-transaction */
+{
+	buf_block_t*	block;
+	ibool		accessed;
+	ulint		fix_type;
+	ibool		success;
+	ibool		must_read;
+	
+	ut_ad(mtr);
+	ut_ad((rw_latch == RW_S_LATCH)
+	      || (rw_latch == RW_X_LATCH)
+	      || (rw_latch == RW_NO_LATCH));
+	ut_ad((mode != BUF_GET_NO_LATCH) || (rw_latch == RW_NO_LATCH));
+	ut_ad((mode == BUF_GET) || (mode == BUF_GET_IF_IN_POOL)
+	      || (mode == BUF_GET_NO_LATCH) || (mode == BUF_GET_NOWAIT));
+#ifndef UNIV_LOG_DEBUG
+	ut_ad(!ibuf_inside() || ibuf_page(space, offset));
+#endif
+loop:
+	mutex_enter_fast(&(buf_pool->mutex));
+
+	block = NULL;
+	
+	if (guess) {
+		block = buf_block_align(guess);
+
+		if ((offset != block->offset) || (space != block->space)
+				|| (block->state != BUF_BLOCK_FILE_PAGE)) {
+
+			block = NULL;
+		}
+	}
+
+	if (block == NULL) {
+		block = buf_page_hash_get(space, offset);
+	}
+
+	if (block == NULL) {
+		/* Page not in buf_pool: needs to be read from file */
+
+		mutex_exit(&(buf_pool->mutex));
+
+		if (mode == BUF_GET_IF_IN_POOL) {
+
+			return(NULL);
+		}
+
+		buf_read_page(space, offset);
+
+		#ifdef UNIV_DEBUG
+		buf_dbg_counter++;
+
+		if (buf_dbg_counter % 37 == 0) {
+			ut_ad(buf_validate());
+		}
+		#endif
+		goto loop;
+	}
+
+	must_read = FALSE;
+	
+	if (block->io_fix == BUF_IO_READ) {
+
+		must_read = TRUE;
+
+		if (mode == BUF_GET_IF_IN_POOL) {
+
+			/* The page is only being read to buffer */
+			mutex_exit(&(buf_pool->mutex));
+
+			return(NULL);
+		}
+	}		
+
+#ifdef UNIV_SYNC_DEBUG
+	buf_block_buf_fix_inc_debug(block, file, line);
+#else
+	buf_block_buf_fix_inc(block);
+#endif
+	buf_block_make_young(block);
+
+	/* Check if this is the first access to the page */
+
+	accessed = block->accessed;
+
+	block->accessed = TRUE;
+
+	mutex_exit(&(buf_pool->mutex));
+
+#ifdef UNIV_DEBUG
+	buf_dbg_counter++;
+
+	if (buf_dbg_counter % 5771 == 0) {
+		ut_ad(buf_validate());
+	}
+#endif
+	ut_ad(block->buf_fix_count > 0);
+	ut_ad(block->state == BUF_BLOCK_FILE_PAGE);
+
+	if (mode == BUF_GET_NOWAIT) {
+		if (rw_latch == RW_S_LATCH) {
+			success = rw_lock_s_lock_func_nowait(&(block->lock)
+					#ifdef UNIV_SYNC_DEBUG
+					,file, line
+					#endif
+			   		);
+			fix_type = MTR_MEMO_PAGE_S_FIX;
+		} else {
+			ut_ad(rw_latch == RW_X_LATCH);
+			success = rw_lock_x_lock_func_nowait(&(block->lock)
+					#ifdef UNIV_SYNC_DEBUG
+					,file, line
+					#endif
+			   		);
+			fix_type = MTR_MEMO_PAGE_X_FIX;
+		}
+
+		if (!success) {
+			mutex_enter(&(buf_pool->mutex));
+
+			block->buf_fix_count--;
+#ifdef UNIV_SYNC_DEBUG
+			rw_lock_s_unlock(&(block->debug_latch));
+#endif			
+			mutex_exit(&(buf_pool->mutex));
+
+			return(NULL);
+		}
+	} else if (rw_latch == RW_NO_LATCH) {
+
+		if (must_read) {
+			rw_lock_x_lock(&(block->read_lock));
+			rw_lock_x_unlock(&(block->read_lock));
+		}
+
+		fix_type = MTR_MEMO_BUF_FIX;
+	} else if (rw_latch == RW_S_LATCH) {
+
+		rw_lock_s_lock_func(&(block->lock)
+					#ifdef UNIV_SYNC_DEBUG
+					,0, file, line
+					#endif
+			   		);
+		fix_type = MTR_MEMO_PAGE_S_FIX;
+	} else {
+		rw_lock_x_lock_func(&(block->lock), 0
+					#ifdef UNIV_SYNC_DEBUG
+					, file, line
+					#endif
+			   		);
+		fix_type = MTR_MEMO_PAGE_X_FIX;
+	}
+
+	mtr_memo_push(mtr, block, fix_type);
+
+	if (!accessed) {
+		/* In the case of a first access, try to apply linear
+		read-ahead */
+
+		buf_read_ahead_linear(space, offset);
+	}
+
+#ifdef UNIV_IBUF_DEBUG
+	ut_a(ibuf_count_get(block->space, block->offset) == 0);
+#endif
+	return(block->frame);		
+}
+
+/************************************************************************
+This is the general function used to get optimistic access to a database
+page. */
+
+ibool
+buf_page_optimistic_get_func(
+/*=========================*/
+				/* out: TRUE if success */
+	ulint		rw_latch,/* in: RW_S_LATCH, RW_X_LATCH */
+	buf_frame_t*	guess,	/* in: guessed frame */
+	dulint		modify_clock,/* in: modify clock value if mode is
+				..._GUESS_ON_CLOCK */
+#ifdef UNIV_SYNC_DEBUG
+	char*		file,	/* in: file name */
+	ulint		line,	/* in: line where called */
+#endif
+	mtr_t*		mtr)	/* in: mini-transaction */
+{
+	buf_block_t*	block;
+	ibool		accessed;
+	ibool		success;
+	ulint		fix_type;
+
+	ut_ad(mtr && guess);
+	ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
+
+	block = buf_block_align(guess);
+	
+	mutex_enter(&(buf_pool->mutex));
+
+	if (block->state != BUF_BLOCK_FILE_PAGE) {
+
+		mutex_exit(&(buf_pool->mutex));
+
+		return(FALSE);
+	}
+
+#ifdef UNIV_SYNC_DEBUG
+	buf_block_buf_fix_inc_debug(block, file, line);
+#else
+	buf_block_buf_fix_inc(block);
+#endif
+	buf_block_make_young(block);
+
+	/* Check if this is the first access to the page */
+
+	accessed = block->accessed;
+
+	block->accessed = TRUE;
+
+	mutex_exit(&(buf_pool->mutex));
+
+	ut_ad(!ibuf_inside() || ibuf_page(block->space, block->offset));
+
+	if (rw_latch == RW_S_LATCH) {
+		success = rw_lock_s_lock_func_nowait(&(block->lock)
+				#ifdef UNIV_SYNC_DEBUG
+				, file, line
+				#endif
+				);
+		fix_type = MTR_MEMO_PAGE_S_FIX;
+	} else {
+		success = rw_lock_x_lock_func_nowait(&(block->lock)
+				#ifdef UNIV_SYNC_DEBUG
+				, file, line
+				#endif
+				);
+		fix_type = MTR_MEMO_PAGE_X_FIX;
+	}
+
+	if (!success) {
+		mutex_enter(&(buf_pool->mutex));
+		
+		block->buf_fix_count--;
+#ifdef UNIV_SYNC_DEBUG
+		rw_lock_s_unlock(&(block->debug_latch));
+#endif			
+		mutex_exit(&(buf_pool->mutex));
+
+		return(FALSE);
+	}
+
+	if (!UT_DULINT_EQ(modify_clock, block->modify_clock)) {
+
+		buf_page_dbg_add_level(block->frame, SYNC_NO_ORDER_CHECK);
+	
+		if (rw_latch == RW_S_LATCH) {
+			rw_lock_s_unlock(&(block->lock));
+		} else {
+			rw_lock_x_unlock(&(block->lock));
+		}
+
+		mutex_enter(&(buf_pool->mutex));
+		
+		block->buf_fix_count--;
+#ifdef UNIV_SYNC_DEBUG
+		rw_lock_s_unlock(&(block->debug_latch));
+#endif			
+		mutex_exit(&(buf_pool->mutex));
+		
+		return(FALSE);
+	}
+
+	mtr_memo_push(mtr, block, fix_type);
+
+#ifdef UNIV_DEBUG
+	buf_dbg_counter++;
+
+	if (buf_dbg_counter % 5771 == 0) {
+		ut_ad(buf_validate());
+	}
+#endif
+	ut_ad(block->buf_fix_count > 0);
+	ut_ad(block->state == BUF_BLOCK_FILE_PAGE);
+
+	if (!accessed) {
+		/* In the case of a first access, try to apply linear
+		read-ahead */
+
+		buf_read_ahead_linear(buf_frame_get_space_id(guess),
+					buf_frame_get_page_no(guess));
+	}
+
+#ifdef UNIV_IBUF_DEBUG
+	ut_a(ibuf_count_get(block->space, block->offset) == 0);
+#endif
+	return(TRUE);
+}
+
+/************************************************************************
+This is used to get access to a known database page, when no waiting can be
+done. */
+
+ibool
+buf_page_get_known_nowait(
+/*======================*/
+				/* out: TRUE if success */
+	ulint		rw_latch,/* in: RW_S_LATCH, RW_X_LATCH */
+	buf_frame_t*	guess,	/* in: the known page frame */
+	ulint		mode,	/* in: BUF_MAKE_YOUNG or BUF_KEEP_OLD */
+#ifdef UNIV_SYNC_DEBUG
+	char*		file,	/* in: file name */
+	ulint		line,	/* in: line where called */
+#endif
+	mtr_t*		mtr)	/* in: mini-transaction */
+{
+	buf_block_t*	block;
+	ibool		success;
+	ulint		fix_type;
+
+	ut_ad(mtr);
+	ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
+
+	block = buf_block_align(guess);
+	
+	mutex_enter(&(buf_pool->mutex));
+
+#ifdef UNIV_SYNC_DEBUG
+	buf_block_buf_fix_inc_debug(block, file, line);
+#else
+	buf_block_buf_fix_inc(block);
+#endif
+	if (mode == BUF_MAKE_YOUNG) {
+		buf_block_make_young(block);
+	}
+
+	mutex_exit(&(buf_pool->mutex));
+
+	ut_ad(!ibuf_inside() || (mode == BUF_KEEP_OLD));
+
+	if (rw_latch == RW_S_LATCH) {
+		success = rw_lock_s_lock_func_nowait(&(block->lock)
+				#ifdef UNIV_SYNC_DEBUG
+				, file, line
+				#endif
+				);
+		fix_type = MTR_MEMO_PAGE_S_FIX;
+	} else {
+		success = rw_lock_x_lock_func_nowait(&(block->lock)
+				#ifdef UNIV_SYNC_DEBUG
+				, file, line
+				#endif
+				);
+		fix_type = MTR_MEMO_PAGE_X_FIX;
+	}
+	
+	if (!success) {
+		mutex_enter(&(buf_pool->mutex));
+		
+		block->buf_fix_count--;
+#ifdef UNIV_SYNC_DEBUG
+		rw_lock_s_unlock(&(block->debug_latch));
+#endif		
+		mutex_exit(&(buf_pool->mutex));
+
+		return(FALSE);
+	}
+
+	mtr_memo_push(mtr, block, fix_type);
+
+#ifdef UNIV_DEBUG
+	buf_dbg_counter++;
+
+	if (buf_dbg_counter % 5771 == 0) {
+		ut_ad(buf_validate());
+	}
+#endif
+	ut_ad(block->buf_fix_count > 0);
+	ut_ad(block->state == BUF_BLOCK_FILE_PAGE);
+
+#ifdef UNIV_IBUF_DEBUG
+	ut_a((mode == BUF_KEEP_OLD)
+		|| (ibuf_count_get(block->space, block->offset) == 0));
+#endif
+	return(TRUE);
+}
+
+/************************************************************************
+Inits a page to the buffer buf_pool. */
+static
+void
+buf_page_init(
+/*==========*/
+				/* out: pointer to the block */
+	ulint		space,	/* in: space id */
+	ulint		offset,	/* in: offset of the page within space
+				in units of a page */
+	buf_block_t*	block)	/* in: block to init */
+{
+	ut_ad(mutex_own(&(buf_pool->mutex)));
+	ut_ad(block->state == BUF_BLOCK_READY_FOR_USE);
+
+	/* Set the state of the block */
+	block->state 		= BUF_BLOCK_FILE_PAGE;
+	block->space 		= space;
+	block->offset 		= offset;
+
+	block->lock_hash_val	= lock_rec_hash(space, offset);
+	block->lock_mutex	= NULL;
+	
+	/* Insert into the hash table of file pages */
+
+	HASH_INSERT(buf_block_t, hash, buf_pool->page_hash,
+				buf_page_address_fold(space, offset), block);
+
+	block->freed_page_clock = 0;
+
+	block->newest_modification = ut_dulint_zero;
+	block->oldest_modification = ut_dulint_zero;
+	
+	block->accessed		= FALSE;
+	block->buf_fix_count 	= 0;
+	block->io_fix		= 0;
+
+	block->n_hash_helps	= 0;
+	block->is_hashed	= FALSE;
+}
+
+/************************************************************************
+Function which inits a page for read to the buffer buf_pool. If the page is
+already in buf_pool, does nothing. Sets the io_fix flag to BUF_IO_READ and
+sets a non-recursive exclusive lock on the buffer frame. The io-handler must
+take care that the flag is cleared and the lock released later. This is one
+of the functions which perform the state transition NOT_USED => FILE_PAGE to
+a block (the other is buf_page_create). */ 
+
+buf_block_t*
+buf_page_init_for_read(
+/*===================*/
+			/* out: pointer to the block or NULL */
+	ulint	mode,	/* in: BUF_READ_IBUF_PAGES_ONLY, ... */
+	ulint	space,	/* in: space id */
+	ulint	offset)	/* in: page number */
+{
+	buf_block_t*	block;
+	mtr_t		mtr;
+	
+	ut_ad(buf_pool);
+
+	if (mode == BUF_READ_IBUF_PAGES_ONLY) {
+		/* It is a read-ahead within an ibuf routine */
+
+		ut_ad(!ibuf_bitmap_page(offset));
+		ut_ad(ibuf_inside());
+	
+		mtr_start(&mtr);
+	
+		if (!ibuf_page_low(space, offset, &mtr)) {
+
+			mtr_commit(&mtr);
+
+			return(NULL);
+		}
+	} else {
+		ut_ad(mode == BUF_READ_ANY_PAGE);
+	}
+	
+	block = buf_block_alloc();
+
+	ut_ad(block);
+
+	mutex_enter(&(buf_pool->mutex));
+	
+	if (NULL != buf_page_hash_get(space, offset)) {
+
+		/* The page is already in buf_pool, return */
+
+		mutex_exit(&(buf_pool->mutex));
+		buf_block_free(block);
+
+		if (mode == BUF_READ_IBUF_PAGES_ONLY) {
+
+			mtr_commit(&mtr);
+		}
+
+		return(NULL);
+	}
+
+	ut_ad(block);
+	
+	buf_page_init(space, offset, block);
+
+	/* The block must be put to the LRU list, to the old blocks */
+
+	buf_LRU_add_block(block, TRUE); 	/* TRUE == to old blocks */
+	
+	block->io_fix = BUF_IO_READ;
+	buf_pool->n_pend_reads++;
+	
+	/* We set a pass-type x-lock on the frame because then the same
+	thread which called for the read operation (and is running now at
+	this point of code) can wait for the read to complete by waiting
+	for the x-lock on the frame; if the x-lock were recursive, the
+	same thread would illegally get the x-lock before the page read
+	is completed. The x-lock is cleared by the io-handler thread. */
+	
+	rw_lock_x_lock_gen(&(block->lock), BUF_IO_READ);
+
+	rw_lock_x_lock_gen(&(block->read_lock), BUF_IO_READ);
+	
+ 	mutex_exit(&(buf_pool->mutex));
+
+	if (mode == BUF_READ_IBUF_PAGES_ONLY) {
+
+		mtr_commit(&mtr);
+	}
+
+	return(block);
+}	
+
+/************************************************************************
+Initializes a page to the buffer buf_pool. The page is usually not read
+from a file even if it cannot be found in the buffer buf_pool. This is one
+of the functions which perform to a block a state transition NOT_USED =>
+FILE_PAGE (the other is buf_page_init_for_read above). */
+
+buf_frame_t*
+buf_page_create(
+/*============*/
+			/* out: pointer to the frame, page bufferfixed */
+	ulint	space,	/* in: space id */
+	ulint	offset,	/* in: offset of the page within space in units of
+			a page */
+	mtr_t*	mtr)	/* in: mini-transaction handle */
+{
+	buf_frame_t*	frame;
+	buf_block_t*	block;
+	buf_block_t*	free_block	= NULL;
+	
+	ut_ad(mtr);
+
+	free_block = buf_LRU_get_free_block();
+
+	/* Delete possible entries for the page from the insert buffer:
+	such can exist if the page belonged to an index which was dropped */
+
+	ibuf_merge_or_delete_for_page(NULL, space, offset);	
+	
+	mutex_enter(&(buf_pool->mutex));
+
+	block = buf_page_hash_get(space, offset);
+
+	if (block != NULL) {
+#ifdef UNIV_IBUF_DEBUG
+		ut_a(ibuf_count_get(block->space, block->offset) == 0);
+#endif
+		/* Page can be found in buf_pool */
+		mutex_exit(&(buf_pool->mutex));
+
+		buf_block_free(free_block);
+
+		frame = buf_page_get_with_no_latch(space, offset, mtr);
+
+		return(frame);
+	}
+
+	/* If we get here, the page was not in buf_pool: init it there */
+
+	if (buf_debug_prints) {
+		printf("Creating space %lu page %lu to buffer\n", space,
+								offset);
+	}
+
+	block = free_block;
+	
+	buf_page_init(space, offset, block);
+
+	/* The block must be put to the LRU list */
+	buf_LRU_add_block(block, FALSE);
+		
+#ifdef UNIV_SYNC_DEBUG
+	buf_block_buf_fix_inc_debug(block, __FILE__, __LINE__);
+#else
+	buf_block_buf_fix_inc(block);
+#endif
+	mtr_memo_push(mtr, block, MTR_MEMO_BUF_FIX);
+
+	block->accessed = TRUE;
+	
+	buf_pool->n_pages_created++;
+
+	mutex_exit(&(buf_pool->mutex));
+
+	/* Flush pages from the end of the LRU list if necessary */
+	buf_flush_free_margin();
+
+	frame = block->frame;
+#ifdef UNIV_DEBUG
+	buf_dbg_counter++;
+
+	if (buf_dbg_counter % 357 == 0) {
+		ut_ad(buf_validate());
+	}
+#endif
+#ifdef UNIV_IBUF_DEBUG
+	ut_a(ibuf_count_get(block->space, block->offset) == 0);
+#endif
+	return(frame);
+}
+
+/************************************************************************
+Completes an asynchronous read or write request of a file page to or from
+the buffer pool. */
+
+void
+buf_page_io_complete(
+/*=================*/
+	buf_block_t*	block)	/* in: pointer to the block in question */
+{
+	dulint		id;
+	dict_index_t*	index;
+	ulint		io_type;
+
+	ut_ad(block);
+
+	io_type = block->io_fix;
+
+	if (io_type == BUF_IO_READ) {
+		if (recv_recovery_is_on()) {
+			recv_recover_page(TRUE, block->frame, block->space,
+								block->offset);
+		}
+
+		if (!recv_no_ibuf_operations) {
+			ibuf_merge_or_delete_for_page(block->frame,
+						block->space, block->offset);
+		}
+	}
+	
+#ifdef UNIV_IBUF_DEBUG
+	ut_a(ibuf_count_get(block->space, block->offset) == 0);
+#endif
+	mutex_enter(&(buf_pool->mutex));
+	
+	/* Because this thread which does the unlocking is not the same that
+	did the locking, we use a pass value != 0 in unlock, which simply
+	removes the newest lock debug record, without checking the thread
+	id. */
+
+	block->io_fix = 0;
+	
+	if (io_type == BUF_IO_READ) {
+		/* NOTE that the call to ibuf may have moved the ownership of
+		the x-latch to this OS thread: do not let this confuse you in
+		debugging! */		
+	
+		ut_ad(buf_pool->n_pend_reads > 0);
+		buf_pool->n_pend_reads--;
+		buf_pool->n_pages_read++;
+/*		
+		if (0 != ut_dulint_cmp(
+				mach_read_from_8(block->frame + FIL_PAGE_LSN),
+				mach_read_from_8(block->frame + UNIV_PAGE_SIZE
+						- FIL_PAGE_END_LSN))) {
+
+			printf("DB error: file page corrupted!\n");
+
+			ut_error;
+		}
+*/
+		rw_lock_x_unlock_gen(&(block->lock), BUF_IO_READ);
+		rw_lock_x_unlock_gen(&(block->read_lock), BUF_IO_READ);
+
+		if (buf_debug_prints) {
+			printf("Has read ");
+		}
+	} else {
+		ut_ad(io_type == BUF_IO_WRITE);
+
+		/* Write means a flush operation: call the completion
+		routine in the flush system */
+
+		buf_flush_write_complete(block);
+
+		rw_lock_s_unlock_gen(&(block->lock), BUF_IO_WRITE);
+
+		buf_pool->n_pages_written++;
+
+		if (buf_debug_prints) {
+			printf("Has written ");
+		}
+	}
+	
+	mutex_exit(&(buf_pool->mutex));
+
+	if (buf_debug_prints) {
+		printf("page space %lu page no %lu", block->space,
+								block->offset);
+		id = btr_page_get_index_id(block->frame);
+
+		index = NULL;
+		/* The following can cause deadlocks if used: */
+		/*
+		index = dict_index_get_if_in_cache(id);
+
+  		if (index) {
+			printf(" index name %s table %s", index->name,
+							index->table->name);
+		}
+		*/
+
+		printf("\n");
+	}
+}
+
+/*************************************************************************
+Invalidates the file pages in the buffer pool when an archive recovery is
+completed. All the file pages buffered must be in a replaceable state when
+this function is called: not latched and not modified. */
+
+void
+buf_pool_invalidate(void)
+/*=====================*/
+{
+	ibool	freed;
+
+	ut_ad(buf_all_freed());
+	
+	freed = TRUE;
+
+	while (freed) {
+		freed = buf_LRU_search_and_free_block(0);
+	}
+	
+	mutex_enter(&(buf_pool->mutex));
+
+	ut_ad(UT_LIST_GET_LEN(buf_pool->LRU) == 0);
+
+	mutex_exit(&(buf_pool->mutex));
+}
+
+/*************************************************************************
+Validates the buffer buf_pool data structure. */
+
+ibool
+buf_validate(void)
+/*==============*/
+{
+	buf_block_t*	block;
+	ulint		i;
+	ulint		n_single_flush	= 0;
+	ulint		n_lru_flush	= 0;
+	ulint		n_list_flush	= 0;
+	ulint		n_lru		= 0;
+	ulint		n_flush		= 0;
+	ulint		n_free		= 0;
+	ulint		n_page		= 0;
+	
+	ut_ad(buf_pool);
+
+	mutex_enter(&(buf_pool->mutex));
+
+	for (i = 0; i < buf_pool->curr_size; i++) {
+
+		block = buf_pool_get_nth_block(buf_pool, i);
+
+		if (block->state == BUF_BLOCK_FILE_PAGE) {
+
+			ut_a(buf_page_hash_get(block->space,
+						block->offset) == block);
+			n_page++;
+
+#ifdef UNIV_IBUF_DEBUG
+			ut_a((block->io_fix == BUF_IO_READ)
+			     || ibuf_count_get(block->space, block->offset)
+								== 0);
+#endif
+			if (block->io_fix == BUF_IO_WRITE) {
+
+				if (block->flush_type == BUF_FLUSH_LRU) {
+					n_lru_flush++;
+					ut_a(rw_lock_is_locked(&(block->lock),
+							RW_LOCK_SHARED));
+				} else if (block->flush_type ==
+						BUF_FLUSH_LIST) {
+					n_list_flush++;
+				} else if (block->flush_type ==
+						BUF_FLUSH_SINGLE_PAGE) {
+					n_single_flush++;
+				} else {
+					ut_error;
+				}
+
+			} else if (block->io_fix == BUF_IO_READ) {
+
+				ut_a(rw_lock_is_locked(&(block->lock),
+							RW_LOCK_EX));
+			}
+			
+			n_lru++;
+
+			if (ut_dulint_cmp(block->oldest_modification,
+						ut_dulint_zero) > 0) {
+					n_flush++;
+			}	
+		
+		} else if (block->state == BUF_BLOCK_NOT_USED) {
+			n_free++;
+		}
+ 	}
+
+	if (n_lru + n_free > buf_pool->curr_size) {
+		printf("n LRU %lu, n free %lu\n", n_lru, n_free);
+		ut_error;
+	}
+
+	ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == n_lru);
+	if (UT_LIST_GET_LEN(buf_pool->free) != n_free) {
+		printf("Free list len %lu, free blocks %lu\n",
+		    UT_LIST_GET_LEN(buf_pool->free), n_free);
+		ut_error;
+	}
+	ut_a(UT_LIST_GET_LEN(buf_pool->flush_list) == n_flush);
+
+	ut_a(buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE] == n_single_flush);
+	ut_a(buf_pool->n_flush[BUF_FLUSH_LIST] == n_list_flush);
+	ut_a(buf_pool->n_flush[BUF_FLUSH_LRU] == n_lru_flush);
+	
+	mutex_exit(&(buf_pool->mutex));
+
+	ut_a(buf_LRU_validate());
+	ut_a(buf_flush_validate());
+
+	return(TRUE);
+}	
+
+/*************************************************************************
+Prints info of the buffer buf_pool data structure. */
+
+void
+buf_print(void)
+/*===========*/
+{
+	dulint*		index_ids;
+	ulint*		counts;
+	ulint		size;
+	ulint		i;
+	ulint		j;
+	dulint		id;
+	ulint		n_found;
+	buf_frame_t* 	frame;
+	dict_index_t*	index;
+	
+	ut_ad(buf_pool);
+
+	size = buf_pool_get_curr_size() / UNIV_PAGE_SIZE;
+
+	index_ids = mem_alloc(sizeof(dulint) * size);
+	counts = mem_alloc(sizeof(ulint) * size);
+
+	mutex_enter(&(buf_pool->mutex));
+	
+	printf("LRU len %lu \n", UT_LIST_GET_LEN(buf_pool->LRU));
+	printf("free len %lu \n", UT_LIST_GET_LEN(buf_pool->free));
+	printf("flush len %lu \n", UT_LIST_GET_LEN(buf_pool->flush_list));
+	printf("buf_pool size %lu \n", size);
+
+	printf("n pending reads %lu \n", buf_pool->n_pend_reads);
+
+	printf("n pending flush LRU %lu list %lu single page %lu\n",
+		buf_pool->n_flush[BUF_FLUSH_LRU],
+		buf_pool->n_flush[BUF_FLUSH_LIST],
+		buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE]);
+
+	printf("pages read %lu, created %lu, written %lu\n",
+			buf_pool->n_pages_read, buf_pool->n_pages_created,
+						buf_pool->n_pages_written);
+
+	/* Count the number of blocks belonging to each index in the buffer */
+	
+	n_found = 0;
+
+	for (i = 0 ; i < size; i++) {
+		counts[i] = 0;
+	}
+
+	for (i = 0; i < size; i++) {
+		frame = buf_pool_get_nth_block(buf_pool, i)->frame;
+
+		if (fil_page_get_type(frame) == FIL_PAGE_INDEX) {
+
+			id = btr_page_get_index_id(frame);
+
+			/* Look for the id in the index_ids array */
+			j = 0;
+
+			while (j < n_found) {
+
+				if (ut_dulint_cmp(index_ids[j], id) == 0) {
+					(counts[j])++;
+
+					break;
+				}
+				j++;
+			}
+
+			if (j == n_found) {
+				n_found++;
+				index_ids[j] = id;
+				counts[j] = 1;
+			}
+		}
+	}
+
+	mutex_exit(&(buf_pool->mutex));
+
+	for (i = 0; i < n_found; i++) {
+		index = dict_index_get_if_in_cache(index_ids[i]);
+
+		printf("Block count for index %lu in buffer is about %lu",
+			ut_dulint_get_low(index_ids[i]), counts[i]);
+
+		if (index) {
+			printf(" index name %s table %s", index->name,
+				index->table->name);
+		}
+
+		printf("\n");
+	}
+	
+	mem_free(index_ids);
+	mem_free(counts);
+
+	ut_a(buf_validate());
+}	
+
+/*************************************************************************
+Prints info of the buffer i/o. */
+
+void
+buf_print_io(void)
+/*==============*/
+{
+	ut_ad(buf_pool);
+
+	mutex_enter(&(buf_pool->mutex));
+
+	printf("pages read %lu, created %lu, written %lu\n",
+			buf_pool->n_pages_read, buf_pool->n_pages_created,
+						buf_pool->n_pages_written);
+	mutex_exit(&(buf_pool->mutex));
+}
+
+/*************************************************************************
+Checks that all file pages in the buffer are in a replaceable state. */
+
+ibool
+buf_all_freed(void)
+/*===============*/
+{
+	buf_block_t*	block;
+	ulint		i;
+	
+	ut_ad(buf_pool);
+
+	mutex_enter(&(buf_pool->mutex));
+
+	for (i = 0; i < buf_pool->curr_size; i++) {
+
+		block = buf_pool_get_nth_block(buf_pool, i);
+
+		if (block->state == BUF_BLOCK_FILE_PAGE) {
+
+			if (!buf_flush_ready_for_replace(block)) {
+
+			    	/* printf("Page %lu %lu still fixed or dirty\n",
+			    		block->space, block->offset); */
+			    	ut_error;
+			}
+		}
+ 	}
+
+	mutex_exit(&(buf_pool->mutex));
+
+	return(TRUE);
+}	
+
+/*************************************************************************
+Checks that there currently are no pending i/o-operations for the buffer
+pool. */
+
+ibool
+buf_pool_check_no_pending_io(void)
+/*==============================*/
+				/* out: TRUE if there is no pending i/o */
+{
+	ibool	ret;
+
+	mutex_enter(&(buf_pool->mutex));
+
+	if (buf_pool->n_pend_reads + buf_pool->n_flush[BUF_FLUSH_LRU]
+				+ buf_pool->n_flush[BUF_FLUSH_LIST]
+				+ buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE]) {
+		ret = FALSE;
+	} else {
+		ret = TRUE;
+	}
+
+	mutex_exit(&(buf_pool->mutex));
+
+	return(ret);
+}
+
+/*************************************************************************
+Gets the current length of the free list of buffer blocks. */
+
+ulint
+buf_get_free_list_len(void)
+/*=======================*/
+{
+	ulint	len;
+
+	mutex_enter(&(buf_pool->mutex));
+
+	len = UT_LIST_GET_LEN(buf_pool->free);
+
+	mutex_exit(&(buf_pool->mutex));
+
+	return(len);
+}
diff --git a/innobase/buf/buf0flu.c b/innobase/buf/buf0flu.c
new file mode 100644
index 00000000000..d3a86b0c18d
--- /dev/null
+++ b/innobase/buf/buf0flu.c
@@ -0,0 +1,702 @@
+/******************************************************
+The database buffer buf_pool flush algorithm
+
+(c) 1995 Innobase Oy
+
+Created 11/11/1995 Heikki Tuuri
+*******************************************************/
+
+#include "buf0flu.h"
+
+#ifdef UNIV_NONINL
+#include "buf0flu.ic"
+#endif
+
+#include "ut0byte.h"
+#include "ut0lst.h"
+#include "fil0fil.h"
+
+#include "buf0buf.h"
+#include "buf0lru.h"
+#include "buf0rea.h"
+#include "ibuf0ibuf.h"
+#include "log0log.h"
+#include "os0file.h"
+
+/* When flushed, dirty blocks are searched in neigborhoods of this size, and
+flushed along with the original page. */
+
+#define BUF_FLUSH_AREA		ut_min(BUF_READ_AHEAD_AREA,\
+						buf_pool->curr_size / 16)
+
+/**********************************************************************
+Validates the flush list. */
+static
+ibool
+buf_flush_validate_low(void);
+/*========================*/
+		/* out: TRUE if ok */
+
+/************************************************************************
+Inserts a modified block into the flush list. */
+
+void
+buf_flush_insert_into_flush_list(
+/*=============================*/
+	buf_block_t*	block)	/* in: block which is modified */
+{
+	ut_ad(mutex_own(&(buf_pool->mutex)));
+
+	ut_ad((UT_LIST_GET_FIRST(buf_pool->flush_list) == NULL)
+	      || (ut_dulint_cmp(
+			(UT_LIST_GET_FIRST(buf_pool->flush_list))
+						->oldest_modification,
+			block->oldest_modification) <= 0));
+
+	UT_LIST_ADD_FIRST(flush_list, buf_pool->flush_list, block);
+
+	ut_ad(buf_flush_validate_low());
+}
+
+/************************************************************************
+Inserts a modified block into the flush list in the right sorted position.
+This function is used by recovery, because there the modifications do not
+necessarily come in the order of lsn's. */
+
+void
+buf_flush_insert_sorted_into_flush_list(
+/*====================================*/
+	buf_block_t*	block)	/* in: block which is modified */
+{
+	buf_block_t*	prev_b;
+	buf_block_t*	b;
+	
+	ut_ad(mutex_own(&(buf_pool->mutex)));
+
+	prev_b = NULL;
+	b = UT_LIST_GET_FIRST(buf_pool->flush_list);
+
+	while (b && (ut_dulint_cmp(b->oldest_modification,
+					block->oldest_modification) > 0)) {
+		prev_b = b;
+		b = UT_LIST_GET_NEXT(flush_list, b);
+	}
+
+	if (prev_b == NULL) {
+		UT_LIST_ADD_FIRST(flush_list, buf_pool->flush_list, block);
+	} else {
+		UT_LIST_INSERT_AFTER(flush_list, buf_pool->flush_list, prev_b,
+								block);
+	}
+
+	ut_ad(buf_flush_validate_low());
+}
+
+/************************************************************************
+Returns TRUE if the file page block is immediately suitable for replacement,
+i.e., the transition FILE_PAGE => NOT_USED allowed. */
+
+ibool
+buf_flush_ready_for_replace(
+/*========================*/
+				/* out: TRUE if can replace immediately */
+	buf_block_t*	block)	/* in: buffer control block, must be in state
+				BUF_BLOCK_FILE_PAGE and in the LRU list*/
+{
+	ut_ad(mutex_own(&(buf_pool->mutex)));
+	ut_ad(block->state == BUF_BLOCK_FILE_PAGE);
+
+	if ((ut_dulint_cmp(block->oldest_modification, ut_dulint_zero) > 0)
+	    || (block->buf_fix_count != 0)
+	    || (block->io_fix != 0)) {
+
+		return(FALSE);
+	}
+	
+	return(TRUE);
+}
+
+/************************************************************************
+Returns TRUE if the block is modified and ready for flushing. */
+UNIV_INLINE
+ibool
+buf_flush_ready_for_flush(
+/*======================*/
+				/* out: TRUE if can flush immediately */
+	buf_block_t*	block,	/* in: buffer control block, must be in state
+				BUF_BLOCK_FILE_PAGE */
+	ulint		flush_type)/* in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
+{
+	ut_ad(mutex_own(&(buf_pool->mutex)));
+	ut_ad(block->state == BUF_BLOCK_FILE_PAGE);
+
+	if ((ut_dulint_cmp(block->oldest_modification, ut_dulint_zero) > 0)
+	    					&& (block->io_fix == 0)) {
+
+	    	if (flush_type != BUF_FLUSH_LRU) {
+
+			return(TRUE);
+
+		} else if ((block->old || (UT_LIST_GET_LEN(buf_pool->LRU)
+							< BUF_LRU_OLD_MIN_LEN))
+			   && (block->buf_fix_count == 0)) {
+ 
+			/* If we are flushing the LRU list, to avoid deadlocks
+			we require the block not to be bufferfixed, and hence
+			not latched. Since LRU flushed blocks are soon moved
+			to the free list, it is good to flush only old blocks
+			from the end of the LRU list. */
+
+			return(TRUE);
+		}
+	}
+	
+	return(FALSE);
+}
+
+/************************************************************************
+Updates the flush system data structures when a write is completed. */
+
+void
+buf_flush_write_complete(
+/*=====================*/
+	buf_block_t*	block)	/* in: pointer to the block in question */
+{
+	ut_ad(block);
+	ut_ad(mutex_own(&(buf_pool->mutex)));
+
+	block->oldest_modification = ut_dulint_zero;
+
+	UT_LIST_REMOVE(flush_list, buf_pool->flush_list, block);
+
+	ut_d(UT_LIST_VALIDATE(flush_list, buf_block_t, buf_pool->flush_list));
+
+	(buf_pool->n_flush[block->flush_type])--;
+
+	if (block->flush_type == BUF_FLUSH_LRU) {
+		/* Put the block to the end of the LRU list to wait to be
+		moved to the free list */
+
+		buf_LRU_make_block_old(block);
+
+		buf_pool->LRU_flush_ended++;
+	}
+
+/* 	printf("n pending flush %lu\n",
+				buf_pool->n_flush[block->flush_type]); */
+
+	if ((buf_pool->n_flush[block->flush_type] == 0)
+	    && (buf_pool->init_flush[block->flush_type] == FALSE)) {
+
+		/* The running flush batch has ended */
+
+		os_event_set(buf_pool->no_flush[block->flush_type]);
+	}
+}
+
+/************************************************************************
+Does an asynchronous write of a buffer page. NOTE: in simulated aio we must
+call os_aio_simulated_wake_handler_threads after we have posted a batch
+of writes! */
+static
+void
+buf_flush_write_block_low(
+/*======================*/
+	buf_block_t*	block)	/* in: buffer block to write */
+{
+#ifdef UNIV_IBUF_DEBUG
+	ut_a(ibuf_count_get(block->space, block->offset) == 0);
+#endif
+	ut_ad(!ut_dulint_is_zero(block->newest_modification));
+
+#ifdef UNIV_LOG_DEBUG
+	printf(
+	"Warning: cannot force log to disk in the log debug version!\n");
+#else
+	/* Force the log to the disk before writing the modified block */
+	log_flush_up_to(block->newest_modification, LOG_WAIT_ALL_GROUPS);
+#endif	
+	/* Write the newest modification lsn to the page */
+	mach_write_to_8(block->frame + FIL_PAGE_LSN,
+						block->newest_modification);
+	mach_write_to_8(block->frame + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN,
+						block->newest_modification);
+
+	fil_io(OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER,
+		FALSE, block->space, block->offset, 0, UNIV_PAGE_SIZE,
+		 			(void*)block->frame, (void*)block);
+}
+
+/************************************************************************
+Writes a page asynchronously from the buffer buf_pool to a file, if it can be
+found in the buf_pool and it is in a flushable state. NOTE: in simulated aio
+we must call os_aio_simulated_wake_handler_threads after we have posted a batch
+of writes! */
+static
+ulint
+buf_flush_try_page(
+/*===============*/
+				/* out: 1 if a page was flushed, 0 otherwise */
+	ulint	space,		/* in: space id */
+	ulint	offset,		/* in: page offset */
+	ulint	flush_type)	/* in: BUF_FLUSH_LRU, BUF_FLUSH_LIST, or
+				BUF_FLUSH_SINGLE_PAGE */
+{
+	buf_block_t*	block;
+	ibool		locked;
+	
+	ut_ad((flush_type == BUF_FLUSH_LRU) || (flush_type == BUF_FLUSH_LIST)
+				|| (flush_type == BUF_FLUSH_SINGLE_PAGE));
+
+	mutex_enter(&(buf_pool->mutex));
+
+	block = buf_page_hash_get(space, offset);
+
+	if ((flush_type == BUF_FLUSH_LIST)
+	    && block && buf_flush_ready_for_flush(block, flush_type)) {
+	
+		block->io_fix = BUF_IO_WRITE;
+		block->flush_type = flush_type;
+
+		if (buf_pool->n_flush[block->flush_type] == 0) {
+
+			os_event_reset(buf_pool->no_flush[block->flush_type]);
+		}
+
+		(buf_pool->n_flush[flush_type])++;
+
+		locked = FALSE;
+		
+		/* If the simulated aio thread is not running, we must
+		not wait for any latch, as we may end up in a deadlock:
+		if buf_fix_count == 0, then we know we need not wait */
+
+		if (block->buf_fix_count == 0) {
+			rw_lock_s_lock_gen(&(block->lock), BUF_IO_WRITE);
+
+			locked = TRUE;
+		}
+
+		mutex_exit(&(buf_pool->mutex));
+
+		if (!locked) {
+			os_aio_simulated_wake_handler_threads();
+
+			rw_lock_s_lock_gen(&(block->lock), BUF_IO_WRITE);
+		}
+
+		if (buf_debug_prints) {
+			printf("Flushing page space %lu, page no %lu \n",
+					block->space, block->offset);
+		}
+
+		buf_flush_write_block_low(block);
+		
+		return(1);
+
+	} else if ((flush_type == BUF_FLUSH_LRU) && block
+			&& buf_flush_ready_for_flush(block, flush_type)) {
+
+		/* VERY IMPORTANT:
+		Because any thread may call the LRU flush, even when owning
+		locks on pages, to avoid deadlocks, we must make sure that the
+		s-lock is acquired on the page without waiting: this is
+		accomplished because in the if-condition above we require
+		the page not to be bufferfixed (in function
+		..._ready_for_flush). */
+
+		block->io_fix = BUF_IO_WRITE;
+		block->flush_type = flush_type;
+
+		(buf_pool->n_flush[flush_type])++;
+
+		rw_lock_s_lock_gen(&(block->lock), BUF_IO_WRITE);
+
+		/* Note that the s-latch is acquired before releasing the
+		buf_pool mutex: this ensures that the latch is acquired
+		immediately. */
+		
+		mutex_exit(&(buf_pool->mutex));
+
+		buf_flush_write_block_low(block);
+
+		return(1);
+
+	} else if ((flush_type == BUF_FLUSH_SINGLE_PAGE) && block
+			&& buf_flush_ready_for_flush(block, flush_type)) {
+	
+		block->io_fix = BUF_IO_WRITE;
+		block->flush_type = flush_type;
+
+		if (buf_pool->n_flush[block->flush_type] == 0) {
+
+			os_event_reset(buf_pool->no_flush[block->flush_type]);
+		}
+
+		(buf_pool->n_flush[flush_type])++;
+
+		mutex_exit(&(buf_pool->mutex));
+
+		rw_lock_s_lock_gen(&(block->lock), BUF_IO_WRITE);
+
+		if (buf_debug_prints) {
+			printf("Flushing single page space %lu, page no %lu \n",
+						block->space, block->offset);
+		}
+
+		buf_flush_write_block_low(block);
+		
+		return(1);
+	} else {
+		mutex_exit(&(buf_pool->mutex));
+
+		return(0);
+	}		
+}
+
+/***************************************************************
+Flushes to disk all flushable pages within the flush area. */
+static
+ulint
+buf_flush_try_neighbors(
+/*====================*/
+				/* out: number of pages flushed */
+	ulint	space,		/* in: space id */
+	ulint	offset,		/* in: page offset */
+	ulint	flush_type)	/* in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
+{
+	buf_block_t*	block;
+	ulint		low, high;
+	ulint		count		= 0;
+	ulint		i;
+
+	ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
+
+	low = (offset / BUF_FLUSH_AREA) * BUF_FLUSH_AREA;
+	high = (offset / BUF_FLUSH_AREA + 1) * BUF_FLUSH_AREA;
+
+	if (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN) {
+		/* If there is little space, it is better not to flush any
+		block except from the end of the LRU list */
+	
+		low = offset;
+		high = offset + 1;
+	}		
+
+	/* printf("Flush area: low %lu high %lu\n", low, high); */
+	
+	if (high > fil_space_get_size(space)) {
+		high = fil_space_get_size(space);
+	}
+
+	mutex_enter(&(buf_pool->mutex));
+
+	for (i = low; i < high; i++) {
+
+		block = buf_page_hash_get(space, i);
+
+		if (block && buf_flush_ready_for_flush(block, flush_type)) {
+
+			mutex_exit(&(buf_pool->mutex));
+
+			/* Note: as we release the buf_pool mutex above, in
+			buf_flush_try_page we cannot be sure the page is still
+			in a flushable state: therefore we check it again
+			inside that function. */
+
+			count += buf_flush_try_page(space, i, flush_type);
+
+			mutex_enter(&(buf_pool->mutex));
+		}
+	}
+				
+	mutex_exit(&(buf_pool->mutex));
+
+	/* In simulated aio we wake up the i/o-handler threads now that
+	we have posted a batch of writes: */
+	
+	os_aio_simulated_wake_handler_threads();
+
+	return(count);
+}
+
+/***********************************************************************
+This utility flushes dirty blocks from the end of the LRU list or flush_list.
+NOTE 1: in the case of an LRU flush the calling thread may own latches to
+pages: to avoid deadlocks, this function must be written so that it cannot
+end up waiting for these latches! NOTE 2: in the case of a flush list flush,
+the calling thread is not allowed to own any latches on pages! */
+
+ulint
+buf_flush_batch(
+/*============*/
+				/* out: number of blocks for which the write
+				request was queued; ULINT_UNDEFINED if there
+				was a flush of the same type already running */
+	ulint	flush_type,	/* in: BUF_FLUSH_LRU or BUF_FLUSH_LIST; if
+				BUF_FLUSH_LIST, then the caller must not own
+				any latches on pages */
+	ulint	min_n,		/* in: wished minimum mumber of blocks flushed
+				(it is not guaranteed that the actual number
+				is that big, though) */
+	dulint	lsn_limit)	/* in the case BUF_FLUSH_LIST all blocks whose
+				oldest_modification is smaller than this
+				should be flushed (if their number does not
+				exceed min_n), otherwise ignored */
+{
+	buf_block_t*	block;
+	ulint		page_count 	= 0;
+	ulint		old_page_count;
+	ulint		space;
+	ulint		offset;
+	ibool		found;
+	
+	ut_ad((flush_type == BUF_FLUSH_LRU) || (flush_type == BUF_FLUSH_LIST)); 
+	ut_ad((flush_type != BUF_FLUSH_LIST) ||
+					sync_thread_levels_empty_gen(TRUE));
+	      
+	mutex_enter(&(buf_pool->mutex));
+
+	if ((buf_pool->n_flush[flush_type] > 0)
+	    || (buf_pool->init_flush[flush_type] == TRUE)) {
+
+		/* There is already a flush batch of the same type running */
+		
+		mutex_exit(&(buf_pool->mutex));
+
+		return(ULINT_UNDEFINED);
+	}
+
+	(buf_pool->init_flush)[flush_type] = TRUE;
+	
+	for (;;) {
+		/* If we have flushed enough, leave the loop */
+		if (page_count >= min_n) {
+
+			break;
+		}
+	
+		/* Start from the end of the list looking for a suitable
+		block to be flushed. */
+		
+	    	if (flush_type == BUF_FLUSH_LRU) {
+			block = UT_LIST_GET_LAST(buf_pool->LRU);
+	    	} else {
+			ut_ad(flush_type == BUF_FLUSH_LIST);
+
+			block = UT_LIST_GET_LAST(buf_pool->flush_list);
+
+			if (!block
+			    || (ut_dulint_cmp(block->oldest_modification,
+			    				lsn_limit) >= 0)) {
+				/* We have flushed enough */
+
+				break;
+			}
+	    	}
+	    	
+	    	found = FALSE;
+	
+		/* Note that after finding a single flushable page, we try to
+		flush also all its neighbors, and after that start from the
+		END of the LRU list or flush list again: the list may change
+		during the flushing and we cannot safely preserve within this
+		function a pointer to a block in the list! */
+
+	    	while ((block != NULL) && !found) {
+
+			if (buf_flush_ready_for_flush(block, flush_type)) {
+
+				found = TRUE;
+				space = block->space;
+				offset = block->offset;
+	    
+				mutex_exit(&(buf_pool->mutex));
+
+				old_page_count = page_count;
+				
+				/* Try to flush also all the neighbors */
+				page_count +=
+					buf_flush_try_neighbors(space, offset,
+								flush_type);
+
+				/* printf(
+				"Flush type %lu, page no %lu, neighb %lu\n",
+				flush_type, offset,
+				page_count - old_page_count); */
+
+				mutex_enter(&(buf_pool->mutex));
+
+			} else if (flush_type == BUF_FLUSH_LRU) {
+
+				block = UT_LIST_GET_PREV(LRU, block);
+
+			} else {
+				ut_ad(flush_type == BUF_FLUSH_LIST);
+
+				block = UT_LIST_GET_PREV(flush_list, block);
+			}
+	    	}
+
+	    	/* If we could not find anything to flush, leave the loop */
+
+	    	if (!found) {
+	    		break;
+	    	}
+	}
+
+	(buf_pool->init_flush)[flush_type] = FALSE;
+
+	if ((buf_pool->n_flush[flush_type] == 0)
+	    && (buf_pool->init_flush[flush_type] == FALSE)) {
+
+		/* The running flush batch has ended */
+
+		os_event_set(buf_pool->no_flush[flush_type]);
+	}
+
+	mutex_exit(&(buf_pool->mutex));
+
+	if (buf_debug_prints && (page_count > 0)) {
+		if (flush_type == BUF_FLUSH_LRU) {
+			printf("To flush %lu pages in LRU flush\n",
+						page_count, flush_type);
+		} else if (flush_type == BUF_FLUSH_LIST) {
+			printf("To flush %lu pages in flush list flush\n",
+						page_count, flush_type);
+		} else {
+			ut_error;
+		}
+	}
+	
+	return(page_count);
+}
+
+/**********************************************************************
+Waits until a flush batch of the given type ends */
+
+void
+buf_flush_wait_batch_end(
+/*=====================*/
+	ulint	type)	/* in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
+{
+	ut_ad((type == BUF_FLUSH_LRU) || (type == BUF_FLUSH_LIST));
+	
+	os_event_wait(buf_pool->no_flush[type]);
+}	
+
+/**********************************************************************
+Gives a recommendation of how many blocks should be flushed to establish
+a big enough margin of replaceable blocks near the end of the LRU list
+and in the free list. */
+static
+ulint
+buf_flush_LRU_recommendation(void)
+/*==============================*/
+			/* out: number of blocks which should be flushed
+			from the end of the LRU list */
+{
+	buf_block_t*	block;
+	ulint		n_replaceable;
+	ulint		distance	= 0;
+	
+	mutex_enter(&(buf_pool->mutex));
+
+	n_replaceable = UT_LIST_GET_LEN(buf_pool->free);
+
+	block = UT_LIST_GET_LAST(buf_pool->LRU);
+
+	while ((block != NULL)
+	       && (n_replaceable < BUF_FLUSH_FREE_BLOCK_MARGIN
+	       				+ BUF_FLUSH_EXTRA_MARGIN)
+	       && (distance < BUF_LRU_FREE_SEARCH_LEN)) {
+
+		if (buf_flush_ready_for_replace(block)) {
+			n_replaceable++;
+		}
+
+		distance++;
+			
+		block = UT_LIST_GET_PREV(LRU, block);
+	}
+	
+	mutex_exit(&(buf_pool->mutex));
+
+	if (n_replaceable >= BUF_FLUSH_FREE_BLOCK_MARGIN) {
+
+		return(0);
+	}
+	
+	return(BUF_FLUSH_FREE_BLOCK_MARGIN + BUF_FLUSH_EXTRA_MARGIN
+							- n_replaceable);
+}
+
+/*************************************************************************
+Flushes pages from the end of the LRU list if there is too small a margin
+of replaceable pages there or in the free list. VERY IMPORTANT: this function
+is called also by threads which have locks on pages. To avoid deadlocks, we
+flush only pages such that the s-lock required for flushing can be acquired
+immediately, without waiting. */ 
+
+void
+buf_flush_free_margin(void)
+/*=======================*/
+{
+	ulint	n_to_flush;
+
+	n_to_flush = buf_flush_LRU_recommendation();
+	
+	if (n_to_flush > 0) {
+		buf_flush_batch(BUF_FLUSH_LRU, n_to_flush, ut_dulint_zero);
+	}
+}
+
+/**********************************************************************
+Validates the flush list. */
+static
+ibool
+buf_flush_validate_low(void)
+/*========================*/
+		/* out: TRUE if ok */
+{
+	buf_block_t*	block;
+	dulint		om;
+	
+	UT_LIST_VALIDATE(flush_list, buf_block_t, buf_pool->flush_list);
+
+	block = UT_LIST_GET_FIRST(buf_pool->flush_list);
+
+	while (block != NULL) {
+		om = block->oldest_modification;
+		ut_a(block->state == BUF_BLOCK_FILE_PAGE);
+		ut_a(ut_dulint_cmp(om, ut_dulint_zero) > 0);
+		
+		block = UT_LIST_GET_NEXT(flush_list, block);
+
+		if (block) {
+			ut_a(ut_dulint_cmp(om, block->oldest_modification)
+									>= 0);
+		}
+	}
+
+	return(TRUE);
+}
+
+/**********************************************************************
+Validates the flush list. */
+
+ibool
+buf_flush_validate(void)
+/*====================*/
+		/* out: TRUE if ok */
+{
+	ibool	ret;
+	
+	mutex_enter(&(buf_pool->mutex));
+
+	ret = buf_flush_validate_low();
+	
+	mutex_exit(&(buf_pool->mutex));
+
+	return(ret);
+}
diff --git a/innobase/buf/buf0lru.c b/innobase/buf/buf0lru.c
new file mode 100644
index 00000000000..26bdd7db1fe
--- /dev/null
+++ b/innobase/buf/buf0lru.c
@@ -0,0 +1,734 @@
+/******************************************************
+The database buffer replacement algorithm
+
+(c) 1995 Innobase Oy
+
+Created 11/5/1995 Heikki Tuuri
+*******************************************************/
+
+#include "buf0lru.h"
+
+#ifdef UNIV_NONINL
+#include "buf0lru.ic"
+#endif
+
+#include "ut0byte.h"
+#include "ut0lst.h"
+#include "ut0rnd.h"
+#include "sync0sync.h"
+#include "sync0rw.h"
+#include "hash0hash.h"
+#include "os0sync.h"
+#include "fil0fil.h"
+#include "btr0btr.h"
+#include "buf0buf.h"
+#include "buf0flu.h"
+#include "buf0rea.h"
+#include "btr0sea.h"
+#include "os0file.h"
+
+/* The number of blocks from the LRU_old pointer onward, including the block
+pointed to, must be 3/8 of the whole LRU list length, except that the
+tolerance defined below is allowed. Note that the tolerance must be small
+enough such that for even the BUF_LRU_OLD_MIN_LEN long LRU list, the
+LRU_old pointer is not allowed to point to either end of the LRU list. */
+
+#define BUF_LRU_OLD_TOLERANCE	20
+
+/* The whole LRU list length is divided by this number to determine an
+initial segment in buf_LRU_get_recent_limit */
+
+#define BUF_LRU_INITIAL_RATIO	8
+
+/**********************************************************************
+Takes a block out of the LRU list and page hash table and sets the block
+state to BUF_BLOCK_REMOVE_HASH. */
+static
+void
+buf_LRU_block_remove_hashed_page(
+/*=============================*/
+	buf_block_t*	block);	/* in: block, must contain a file page and
+				be in a state where it can be freed; there
+				may or may not be a hash index to the page */
+/**********************************************************************
+Puts a file page whose has no hash index to the free list. */
+static
+void
+buf_LRU_block_free_hashed_page(
+/*===========================*/
+	buf_block_t*	block);	/* in: block, must contain a file page and
+				be in a state where it can be freed */
+
+/**********************************************************************
+Gets the minimum LRU_position field for the blocks in an initial segment
+(determined by BUF_LRU_INITIAL_RATIO) of the LRU list. The limit is not
+guaranteed to be precise, because the ulint_clock may wrap around. */
+
+ulint
+buf_LRU_get_recent_limit(void)
+/*==========================*/
+			/* out: the limit; zero if could not determine it */
+{
+	buf_block_t*	block;
+	ulint		len;
+	ulint		limit;
+
+	mutex_enter(&(buf_pool->mutex));
+
+	len = UT_LIST_GET_LEN(buf_pool->LRU);
+
+	if (len < BUF_LRU_OLD_MIN_LEN) {
+		/* The LRU list is too short to do read-ahead */
+
+		mutex_exit(&(buf_pool->mutex));
+
+		return(0);
+	}
+
+	block = UT_LIST_GET_FIRST(buf_pool->LRU);
+
+	limit = block->LRU_position - len / BUF_LRU_INITIAL_RATIO;
+
+	mutex_exit(&(buf_pool->mutex));
+
+	return(limit);
+}
+
+/**********************************************************************
+Look for a replaceable block from the end of the LRU list and put it to
+the free list if found. */
+
+ibool
+buf_LRU_search_and_free_block(
+/*==========================*/
+				/* out: TRUE if freed */
+	ulint	n_iterations)	/* in: how many times this has been called
+				repeatedly without result: a high value
+				means that we should search farther */
+{
+	buf_block_t*	block;
+	ulint		distance;
+	ibool		freed;
+	ulint		i;
+
+	mutex_enter(&(buf_pool->mutex));
+	
+	freed = FALSE;
+	
+	distance = BUF_LRU_FREE_SEARCH_LEN * (1 + n_iterations / 5);
+
+	i = 0;
+	block = UT_LIST_GET_LAST(buf_pool->LRU);
+
+	while (i < distance && block != NULL) {
+
+		if (buf_flush_ready_for_replace(block)) {
+
+			if (buf_debug_prints) {
+				printf(
+				"Putting space %lu page %lu to free list\n",
+					block->space, block->offset);
+			}
+			
+			buf_LRU_block_remove_hashed_page(block);
+
+			mutex_exit(&(buf_pool->mutex));
+
+			btr_search_drop_page_hash_index(block->frame);
+
+			mutex_enter(&(buf_pool->mutex));
+
+			buf_LRU_block_free_hashed_page(block);
+
+			freed = TRUE;
+
+			break;
+		}
+
+		block = UT_LIST_GET_PREV(LRU, block);
+	}
+
+	if (buf_pool->LRU_flush_ended > 0) {
+		buf_pool->LRU_flush_ended--;
+	}
+ 
+	if (!freed) {
+		buf_pool->LRU_flush_ended = 0;
+	}
+
+	mutex_exit(&(buf_pool->mutex));
+	
+	return(freed);
+}
+	
+/**********************************************************************
+Tries to remove LRU flushed blocks from the end of the LRU list and put them
+to the free list. This is beneficial for the efficiency of the insert buffer
+operation, as flushed pages from non-unique non-clustered indexes are here
+taken out of the buffer pool, and their inserts redirected to the insert
+buffer. Otherwise, the flushed blocks could get modified again before read
+operations need new buffer blocks, and the i/o work done in flushing would be
+wasted. */
+
+void
+buf_LRU_try_free_flushed_blocks(void)
+/*=================================*/
+{
+	mutex_enter(&(buf_pool->mutex));
+
+	while (buf_pool->LRU_flush_ended > 0) {
+
+		mutex_exit(&(buf_pool->mutex));
+
+		buf_LRU_search_and_free_block(0);
+		
+		mutex_enter(&(buf_pool->mutex));
+	}
+
+	mutex_exit(&(buf_pool->mutex));
+}	
+
+/**********************************************************************
+Returns a free block from buf_pool. The block is taken off the free list.
+If it is empty, blocks are moved from the end of the LRU list to the free
+list. */
+
+buf_block_t*
+buf_LRU_get_free_block(void)
+/*========================*/
+				/* out: the free control block */
+{
+	buf_block_t*	block		= NULL;
+	ibool		freed;
+	ulint		n_iterations	= 0;
+loop:
+	mutex_enter(&(buf_pool->mutex));
+
+	if (buf_pool->LRU_flush_ended > 0) {
+		mutex_exit(&(buf_pool->mutex));
+
+		buf_LRU_try_free_flushed_blocks();
+		
+		mutex_enter(&(buf_pool->mutex));
+	}
+	
+	/* If there is a block in the free list, take it */
+	if (UT_LIST_GET_LEN(buf_pool->free) > 0) {
+		
+		block = UT_LIST_GET_FIRST(buf_pool->free);
+		UT_LIST_REMOVE(free, buf_pool->free, block);
+		block->state = BUF_BLOCK_READY_FOR_USE;
+
+		mutex_exit(&(buf_pool->mutex));
+		
+		return(block);
+	}
+	
+	/* If no block was in the free list, search from the end of the LRU
+	list and try to free a block there */
+
+	mutex_exit(&(buf_pool->mutex));
+
+	freed = buf_LRU_search_and_free_block(n_iterations);
+
+	if (freed > 0) {
+		goto loop;
+	}
+
+	/* No free block was found near the end of the list: try to flush
+	the LRU list */
+
+	buf_flush_free_margin();
+
+	os_event_wait(buf_pool->no_flush[BUF_FLUSH_LRU]);
+
+	n_iterations++;
+
+	os_aio_simulated_wake_handler_threads();
+
+	if (n_iterations > 10) {
+
+		os_thread_sleep(500000);
+	}
+
+	if (n_iterations > 20) {
+/*		buf_print();
+		os_aio_print();
+		rw_lock_list_print_info();
+*/
+		if (n_iterations > 30) {
+			fprintf(stderr,
+		"Innobase: Warning: difficult to find free blocks from\n"
+		"Innobase: the buffer pool! Consider increasing the\n"
+		"Innobase: buffer pool size.\n");
+		}
+	}
+
+	goto loop;	
+}	
+
+/***********************************************************************
+Moves the LRU_old pointer so that the length of the old blocks list
+is inside the allowed limits. */
+UNIV_INLINE
+void
+buf_LRU_old_adjust_len(void)
+/*========================*/
+{
+	ulint	old_len;
+	ulint	new_len;
+
+	ut_ad(buf_pool->LRU_old);
+	ut_ad(mutex_own(&(buf_pool->mutex)));
+	ut_ad(3 * (BUF_LRU_OLD_MIN_LEN / 8) > BUF_LRU_OLD_TOLERANCE + 5);
+
+	for (;;) {
+		old_len = buf_pool->LRU_old_len;
+		new_len = 3 * (UT_LIST_GET_LEN(buf_pool->LRU) / 8);
+
+		/* Update the LRU_old pointer if necessary */
+	
+		if (old_len < new_len - BUF_LRU_OLD_TOLERANCE) {
+		
+			buf_pool->LRU_old = UT_LIST_GET_PREV(LRU,
+							buf_pool->LRU_old);
+			(buf_pool->LRU_old)->old = TRUE;
+			buf_pool->LRU_old_len++;
+
+		} else if (old_len > new_len + BUF_LRU_OLD_TOLERANCE) {
+
+			(buf_pool->LRU_old)->old = FALSE;
+			buf_pool->LRU_old = UT_LIST_GET_NEXT(LRU,
+							buf_pool->LRU_old);
+			buf_pool->LRU_old_len--;
+		} else {
+			ut_ad(buf_pool->LRU_old); /* Check that we did not
+						fall out of the LRU list */
+			return;
+		}
+	}
+}
+
+/***********************************************************************
+Initializes the old blocks pointer in the LRU list.
+This function should be called when the LRU list grows to
+BUF_LRU_OLD_MIN_LEN length. */
+static
+void
+buf_LRU_old_init(void)
+/*==================*/
+{
+	buf_block_t*	block;
+
+	ut_ad(UT_LIST_GET_LEN(buf_pool->LRU) == BUF_LRU_OLD_MIN_LEN);
+
+	/* We first initialize all blocks in the LRU list as old and then use
+	the adjust function to move the LRU_old pointer to the right
+	position */
+
+	block = UT_LIST_GET_FIRST(buf_pool->LRU);
+
+	while (block != NULL) {
+		block->old = TRUE;
+		block = UT_LIST_GET_NEXT(LRU, block);
+	}
+
+	buf_pool->LRU_old = UT_LIST_GET_FIRST(buf_pool->LRU);
+	buf_pool->LRU_old_len = UT_LIST_GET_LEN(buf_pool->LRU);
+	
+	buf_LRU_old_adjust_len();
+}	    	
+
+/**********************************************************************
+Removes a block from the LRU list. */
+UNIV_INLINE
+void
+buf_LRU_remove_block(
+/*=================*/
+	buf_block_t*	block)	/* in: control block */
+{
+	ut_ad(buf_pool);
+	ut_ad(block);
+	ut_ad(mutex_own(&(buf_pool->mutex)));
+		
+	/* If the LRU_old pointer is defined and points to just this block,
+	move it backward one step */
+
+	if (block == buf_pool->LRU_old) {
+
+		/* Below: the previous block is guaranteed to exist, because
+		the LRU_old pointer is only allowed to differ by the
+		tolerance value from strict 3/8 of the LRU list length. */
+
+		buf_pool->LRU_old = UT_LIST_GET_PREV(LRU, block);
+		(buf_pool->LRU_old)->old = TRUE;
+
+		buf_pool->LRU_old_len++;
+		ut_ad(buf_pool->LRU_old);
+	}
+
+	/* Remove the block from the LRU list */
+	UT_LIST_REMOVE(LRU, buf_pool->LRU, block);
+
+	/* If the LRU list is so short that LRU_old not defined, return */
+	if (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN) {
+
+		buf_pool->LRU_old = NULL;
+
+		return;
+	}
+
+	ut_ad(buf_pool->LRU_old);	
+
+	/* Update the LRU_old_len field if necessary */
+	if (block->old) {
+
+		buf_pool->LRU_old_len--;
+	}
+
+	/* Adjust the length of the old block list if necessary */
+	buf_LRU_old_adjust_len();
+}	    	
+
+/**********************************************************************
+Adds a block to the LRU list end. */
+UNIV_INLINE
+void
+buf_LRU_add_block_to_end_low(
+/*=========================*/
+	buf_block_t*	block)	/* in: control block */
+{
+	buf_block_t*	last_block;
+	
+	ut_ad(buf_pool);
+	ut_ad(block);
+	ut_ad(mutex_own(&(buf_pool->mutex)));
+
+	block->old = TRUE;
+
+	last_block = UT_LIST_GET_LAST(buf_pool->LRU);
+
+	if (last_block) {
+		block->LRU_position = last_block->LRU_position;
+	} else {
+		block->LRU_position = buf_pool_clock_tic();
+	}			
+
+	UT_LIST_ADD_LAST(LRU, buf_pool->LRU, block);
+
+	if (UT_LIST_GET_LEN(buf_pool->LRU) >= BUF_LRU_OLD_MIN_LEN) {
+
+		buf_pool->LRU_old_len++;
+	}
+
+	if (UT_LIST_GET_LEN(buf_pool->LRU) > BUF_LRU_OLD_MIN_LEN) {
+
+		ut_ad(buf_pool->LRU_old);
+
+		/* Adjust the length of the old block list if necessary */
+
+		buf_LRU_old_adjust_len();
+
+	} else if (UT_LIST_GET_LEN(buf_pool->LRU) == BUF_LRU_OLD_MIN_LEN) {
+
+		/* The LRU list is now long enough for LRU_old to become
+		defined: init it */
+
+		buf_LRU_old_init();
+	}	
+}	    	
+
+/**********************************************************************
+Adds a block to the LRU list. */
+UNIV_INLINE
+void
+buf_LRU_add_block_low(
+/*==================*/
+	buf_block_t*	block,	/* in: control block */
+	ibool		old)	/* in: TRUE if should be put to the old blocks
+				in the LRU list, else put to the start; if the
+				LRU list is very short, the block is added to
+				the start, regardless of this parameter */
+{
+	ulint	cl;
+	
+	ut_ad(buf_pool);
+	ut_ad(block);
+	ut_ad(mutex_own(&(buf_pool->mutex)));
+
+	block->old = old;
+	cl = buf_pool_clock_tic();
+
+	if (!old || (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN)) {
+
+		UT_LIST_ADD_FIRST(LRU, buf_pool->LRU, block);
+
+		block->LRU_position = cl;		
+		block->freed_page_clock = buf_pool->freed_page_clock;
+	} else {
+		UT_LIST_INSERT_AFTER(LRU, buf_pool->LRU, buf_pool->LRU_old,
+								block);
+		buf_pool->LRU_old_len++;
+
+		/* We copy the LRU position field of the previous block
+		to the new block */
+
+		block->LRU_position = (buf_pool->LRU_old)->LRU_position;
+	}
+
+	if (UT_LIST_GET_LEN(buf_pool->LRU) > BUF_LRU_OLD_MIN_LEN) {
+
+		ut_ad(buf_pool->LRU_old);
+
+		/* Adjust the length of the old block list if necessary */
+
+		buf_LRU_old_adjust_len();
+
+	} else if (UT_LIST_GET_LEN(buf_pool->LRU) == BUF_LRU_OLD_MIN_LEN) {
+
+		/* The LRU list is now long enough for LRU_old to become
+		defined: init it */
+
+		buf_LRU_old_init();
+	}	
+}	    	
+
+/**********************************************************************
+Adds a block to the LRU list. */
+
+void
+buf_LRU_add_block(
+/*==============*/
+	buf_block_t*	block,	/* in: control block */
+	ibool		old)	/* in: TRUE if should be put to the old
+				blocks in the LRU list, else put to the start;
+				if the LRU list is very short, the block is
+				added to the start, regardless of this
+				parameter */
+{
+	buf_LRU_add_block_low(block, old);
+}
+
+/**********************************************************************
+Moves a block to the start of the LRU list. */
+
+void
+buf_LRU_make_block_young(
+/*=====================*/
+	buf_block_t*	block)	/* in: control block */
+{
+	buf_LRU_remove_block(block);
+	buf_LRU_add_block_low(block, FALSE);
+}
+
+/**********************************************************************
+Moves a block to the end of the LRU list. */
+
+void
+buf_LRU_make_block_old(
+/*===================*/
+	buf_block_t*	block)	/* in: control block */
+{
+	buf_LRU_remove_block(block);
+	buf_LRU_add_block_to_end_low(block);
+}
+
+/**********************************************************************
+Puts a block back to the free list. */
+
+void
+buf_LRU_block_free_non_file_page(
+/*=============================*/
+	buf_block_t*	block)	/* in: block, must not contain a file page */
+{
+	ut_ad(mutex_own(&(buf_pool->mutex)));
+	ut_ad(block);
+	
+	ut_ad((block->state == BUF_BLOCK_MEMORY)
+	      || (block->state == BUF_BLOCK_READY_FOR_USE));
+
+	block->state = BUF_BLOCK_NOT_USED;
+
+	UT_LIST_ADD_FIRST(free, buf_pool->free, block);
+}
+
+/**********************************************************************
+Takes a block out of the LRU list and page hash table and sets the block
+state to BUF_BLOCK_REMOVE_HASH. */
+static
+void
+buf_LRU_block_remove_hashed_page(
+/*=============================*/
+	buf_block_t*	block)	/* in: block, must contain a file page and
+				be in a state where it can be freed; there
+				may or may not be a hash index to the page */
+{
+	ut_ad(mutex_own(&(buf_pool->mutex)));
+	ut_ad(block);
+	
+	ut_ad(block->state == BUF_BLOCK_FILE_PAGE);
+
+	ut_a(block->io_fix == 0);
+	ut_a(block->buf_fix_count == 0);
+	ut_a(ut_dulint_cmp(block->oldest_modification, ut_dulint_zero) == 0);
+
+	buf_LRU_remove_block(block);
+
+	buf_pool->freed_page_clock += 1;
+
+ 	buf_frame_modify_clock_inc(block->frame);
+		
+	HASH_DELETE(buf_block_t, hash, buf_pool->page_hash,
+			buf_page_address_fold(block->space, block->offset),
+			block);
+
+	block->state = BUF_BLOCK_REMOVE_HASH;
+}
+
+/**********************************************************************
+Puts a file page whose has no hash index to the free list. */
+static
+void
+buf_LRU_block_free_hashed_page(
+/*===========================*/
+	buf_block_t*	block)	/* in: block, must contain a file page and
+				be in a state where it can be freed */
+{
+	ut_ad(mutex_own(&(buf_pool->mutex)));
+	ut_ad(block->state == BUF_BLOCK_REMOVE_HASH);
+
+	block->state = BUF_BLOCK_MEMORY;
+
+	buf_LRU_block_free_non_file_page(block);
+}
+				
+/**************************************************************************
+Validates the LRU list. */
+
+ibool
+buf_LRU_validate(void)
+/*==================*/
+{
+	buf_block_t*	block;
+	ulint		old_len;
+	ulint		new_len;
+	ulint		LRU_pos;
+	
+	ut_ad(buf_pool);
+	mutex_enter(&(buf_pool->mutex));
+
+	if (UT_LIST_GET_LEN(buf_pool->LRU) >= BUF_LRU_OLD_MIN_LEN) {
+
+		ut_a(buf_pool->LRU_old);
+		old_len = buf_pool->LRU_old_len;
+		new_len = 3 * (UT_LIST_GET_LEN(buf_pool->LRU) / 8);
+		ut_a(old_len >= new_len - BUF_LRU_OLD_TOLERANCE);
+		ut_a(old_len <= new_len + BUF_LRU_OLD_TOLERANCE);
+	}
+		
+	UT_LIST_VALIDATE(LRU, buf_block_t, buf_pool->LRU);
+
+	block = UT_LIST_GET_FIRST(buf_pool->LRU);
+
+	old_len = 0;
+
+	while (block != NULL) {
+
+		ut_a(block->state == BUF_BLOCK_FILE_PAGE);
+
+		if (block->old) {
+			old_len++;
+		}
+
+		if (buf_pool->LRU_old && (old_len == 1)) {
+			ut_a(buf_pool->LRU_old == block);
+		}
+
+		LRU_pos	= block->LRU_position;
+
+		block = UT_LIST_GET_NEXT(LRU, block);
+
+		if (block) {
+			/* If the following assert fails, it may
+			not be an error: just the buf_pool clock
+			has wrapped around */
+			ut_a(LRU_pos >= block->LRU_position);
+		}
+	}
+
+	if (buf_pool->LRU_old) {
+		ut_a(buf_pool->LRU_old_len == old_len);
+	} 
+
+	UT_LIST_VALIDATE(free, buf_block_t, buf_pool->free);
+
+	block = UT_LIST_GET_FIRST(buf_pool->free);
+
+	while (block != NULL) {
+		ut_a(block->state == BUF_BLOCK_NOT_USED);
+
+		block = UT_LIST_GET_NEXT(free, block);
+	}
+
+	mutex_exit(&(buf_pool->mutex));
+	return(TRUE);
+}
+
+/**************************************************************************
+Prints the LRU list. */
+
+void
+buf_LRU_print(void)
+/*===============*/
+{
+	buf_block_t*	block;
+	buf_frame_t*	frame;
+	ulint		len;
+	
+	ut_ad(buf_pool);
+	mutex_enter(&(buf_pool->mutex));
+
+	printf("Pool ulint clock %lu\n", buf_pool->ulint_clock);
+
+	block = UT_LIST_GET_FIRST(buf_pool->LRU);
+
+	len = 0;
+
+	while (block != NULL) {
+
+		printf("BLOCK %lu ", block->offset);
+
+		if (block->old) {
+			printf("old ");
+		}
+
+		if (block->buf_fix_count) {
+			printf("buffix count %lu ", block->buf_fix_count);
+		}
+
+		if (block->io_fix) {
+			printf("io_fix %lu ", block->io_fix);
+		}
+
+		if (ut_dulint_cmp(block->oldest_modification,
+				ut_dulint_zero) > 0) {
+			printf("modif. ");
+		}
+
+		printf("LRU pos %lu ", block->LRU_position);
+		
+		frame = buf_block_get_frame(block);
+
+		printf("type %lu ", fil_page_get_type(frame));
+		printf("index id %lu ", ut_dulint_get_low(
+					btr_page_get_index_id(frame)));
+
+		block = UT_LIST_GET_NEXT(LRU, block);
+		len++;
+		if (len % 10 == 0) {
+			printf("\n");
+		}
+	}
+
+	mutex_exit(&(buf_pool->mutex));
+}
diff --git a/innobase/buf/buf0rea.c b/innobase/buf/buf0rea.c
new file mode 100644
index 00000000000..13e9ed0476b
--- /dev/null
+++ b/innobase/buf/buf0rea.c
@@ -0,0 +1,559 @@
+/******************************************************
+The database buffer read
+
+(c) 1995 Innobase Oy
+
+Created 11/5/1995 Heikki Tuuri
+*******************************************************/
+
+#include "buf0rea.h"
+
+#include "fil0fil.h"
+#include "mtr0mtr.h"
+
+#include "buf0buf.h"
+#include "buf0flu.h"
+#include "buf0lru.h"
+#include "ibuf0ibuf.h"
+#include "log0recv.h"
+#include "trx0sys.h"
+#include "os0file.h"
+
+/* The size in blocks of the area where the random read-ahead algorithm counts
+the accessed pages when deciding whether to read-ahead */
+#define	BUF_READ_AHEAD_RANDOM_AREA	BUF_READ_AHEAD_AREA
+
+/* There must be at least this many pages in buf_pool in the area to start
+a random read-ahead */
+#define BUF_READ_AHEAD_RANDOM_THRESHOLD	(5 + BUF_READ_AHEAD_RANDOM_AREA / 8)
+
+/* The linear read-ahead area size */
+#define	BUF_READ_AHEAD_LINEAR_AREA	BUF_READ_AHEAD_AREA
+
+/* The linear read-ahead threshold */
+#define BUF_READ_AHEAD_LINEAR_THRESHOLD	(3 * BUF_READ_AHEAD_LINEAR_AREA / 8)
+
+/* If there are buf_pool->curr_size per the number below pending reads, then
+read-ahead is not done: this is to prevent flooding the buffer pool with
+i/o-fixed buffer blocks */
+#define BUF_READ_AHEAD_PEND_LIMIT	2
+
+/************************************************************************
+Low-level function which reads a page asynchronously from a file to the
+buffer buf_pool if it is not already there, in which case does nothing.
+Sets the io_fix flag and sets an exclusive lock on the buffer frame. The
+flag is cleared and the x-lock released by an i/o-handler thread. */
+static
+ulint
+buf_read_page_low(
+/*==============*/
+			/* out: 1 if a read request was queued, 0 if the page
+			already resided in buf_pool */
+	ibool	sync,	/* in: TRUE if synchronous aio is desired */
+	ulint	mode,	/* in: BUF_READ_IBUF_PAGES_ONLY, ...,
+			ORed to OS_AIO_SIMULATED_WAKE_LATER (see below
+			at read-ahead functions) */
+	ulint	space,	/* in: space id */
+	ulint	offset)	/* in: page number */
+{
+	buf_block_t*	block;
+	ulint		wake_later;
+
+	wake_later = mode & OS_AIO_SIMULATED_WAKE_LATER;
+	mode = mode & ~OS_AIO_SIMULATED_WAKE_LATER;
+	
+#ifdef UNIV_LOG_DEBUG
+	if (space % 2 == 1) {
+		/* We are updating a replicate space while holding the
+		log mutex: the read must be handled before other reads
+		which might incur ibuf operations and thus write to the log */
+
+		printf("Log debug: reading replicate page in sync mode\n");
+
+		sync = TRUE;
+	}
+#endif
+	if (trx_sys_hdr_page(space, offset)) {
+
+		/* Trx sys header is so low in the latching order that we play
+		safe and do not leave the i/o-completion to an asynchronous
+		i/o-thread: */
+		
+		sync = TRUE;
+	}
+
+	block = buf_page_init_for_read(mode, space, offset);
+
+	if (block != NULL) {
+		fil_io(OS_FILE_READ | wake_later,
+			sync, space, offset, 0, UNIV_PAGE_SIZE,
+					(void*)block->frame, (void*)block);
+		if (sync) {
+			/* The i/o is already completed when we arrive from
+			fil_read */
+			buf_page_io_complete(block);
+		}
+		
+		return(1);
+	}
+
+	return(0);
+}	
+
+/************************************************************************
+Applies a random read-ahead in buf_pool if there are at least a threshold
+value of accessed pages from the random read-ahead area. Does not read any
+page, not even the one at the position (space, offset), if the read-ahead
+mechanism is not activated. NOTE 1: the calling thread may own latches on
+pages: to avoid deadlocks this function must be written such that it cannot
+end up waiting for these latches! NOTE 2: the calling thread must want
+access to the page given: this rule is set to prevent unintended read-aheads
+performed by ibuf routines, a situation which could result in a deadlock if
+the OS does not support asynchronous i/o. */
+static
+ulint
+buf_read_ahead_random(
+/*==================*/
+			/* out: number of page read requests issued; NOTE
+			that if we read ibuf pages, it may happen that
+			the page at the given page number does not get
+			read even if we return a value > 0! */
+	ulint	space,	/* in: space id */
+	ulint	offset)	/* in: page number of a page which the current thread
+			wants to access */
+{
+	buf_block_t*	block;
+	ulint		recent_blocks	= 0;
+	ulint		count;
+	ulint		LRU_recent_limit;
+	ulint		ibuf_mode;
+	ulint		low, high;
+	ulint		i;
+
+	if (ibuf_bitmap_page(offset)) {
+
+		/* If it is an ibuf bitmap page, we do no read-ahead, as
+		that could break the ibuf page access order */
+
+		return(0);
+	}
+
+	low  = (offset / BUF_READ_AHEAD_RANDOM_AREA)
+					* BUF_READ_AHEAD_RANDOM_AREA;
+	high = (offset / BUF_READ_AHEAD_RANDOM_AREA + 1)
+					* BUF_READ_AHEAD_RANDOM_AREA;
+
+	if (high > fil_space_get_size(space)) {
+
+		high = fil_space_get_size(space);
+	}
+
+	/* Get the minimum LRU_position field value for an initial segment
+	of the LRU list, to determine which blocks have recently been added
+	to the start of the list. */
+	
+	LRU_recent_limit = buf_LRU_get_recent_limit();
+
+	mutex_enter(&(buf_pool->mutex));
+
+	if (buf_pool->n_pend_reads >
+			buf_pool->curr_size / BUF_READ_AHEAD_PEND_LIMIT) {
+		mutex_exit(&(buf_pool->mutex));
+
+		return(0);
+	}	
+
+	/* Count how many blocks in the area have been recently accessed,
+	that is, reside near the start of the LRU list. */
+
+	for (i = low; i < high; i++) {
+
+		block = buf_page_hash_get(space, i);
+
+		if ((block)
+		    && (block->LRU_position > LRU_recent_limit)
+		    && block->accessed) {
+
+			recent_blocks++;
+		}
+	}
+
+	mutex_exit(&(buf_pool->mutex));
+	
+	if (recent_blocks < BUF_READ_AHEAD_RANDOM_THRESHOLD) {
+		/* Do nothing */
+
+		return(0);
+	}
+
+	/* Read all the suitable blocks within the area */
+
+	if (ibuf_inside()) {
+		ibuf_mode = BUF_READ_IBUF_PAGES_ONLY;
+	} else {
+		ibuf_mode = BUF_READ_ANY_PAGE;
+	}
+
+	count = 0;
+
+	for (i = low; i < high; i++) {
+		/* It is only sensible to do read-ahead in the non-sync aio
+		mode: hence FALSE as the first parameter */
+
+		if (!ibuf_bitmap_page(i)) {
+			
+			count += buf_read_page_low(FALSE, ibuf_mode
+					| OS_AIO_SIMULATED_WAKE_LATER,
+								space, i);
+		}
+	}
+
+	/* In simulated aio we wake the aio handler threads only after
+	queuing all aio requests, in native aio the following call does
+	nothing: */
+	
+	os_aio_simulated_wake_handler_threads();
+
+	if (buf_debug_prints && (count > 0)) {
+	
+		printf("Random read-ahead space %lu offset %lu pages %lu\n",
+						space, offset, count);
+	}
+
+	return(count);
+}
+
+/************************************************************************
+High-level function which reads a page asynchronously from a file to the
+buffer buf_pool if it is not already there. Sets the io_fix flag and sets
+an exclusive lock on the buffer frame. The flag is cleared and the x-lock
+released by the i/o-handler thread. Does a random read-ahead if it seems
+sensible. */
+
+ulint
+buf_read_page(
+/*==========*/
+			/* out: number of page read requests issued: this can
+			be > 1 if read-ahead occurred */
+	ulint	space,	/* in: space id */
+	ulint	offset)	/* in: page number */
+{
+	ulint	count;
+	ulint	count2;
+
+	count = buf_read_ahead_random(space, offset);
+
+	/* We do the i/o in the synchronous aio mode to save thread
+	switches: hence TRUE */
+
+	count2 = buf_read_page_low(TRUE, BUF_READ_ANY_PAGE, space, offset);
+
+	/* Flush pages from the end of the LRU list if necessary */
+	buf_flush_free_margin();
+
+	return(count + count2);
+}
+
+/************************************************************************
+Applies linear read-ahead if in the buf_pool the page is a border page of
+a linear read-ahead area and all the pages in the area have been accessed.
+Does not read any page if the read-ahead mechanism is not activated. Note
+that the the algorithm looks at the 'natural' adjacent successor and
+predecessor of the page, which on the leaf level of a B-tree are the next
+and previous page in the chain of leaves. To know these, the page specified
+in (space, offset) must already be present in the buf_pool. Thus, the
+natural way to use this function is to call it when a page in the buf_pool
+is accessed the first time, calling this function just after it has been
+bufferfixed.
+NOTE 1: as this function looks at the natural predecessor and successor
+fields on the page, what happens, if these are not initialized to any
+sensible value? No problem, before applying read-ahead we check that the
+area to read is within the span of the space, if not, read-ahead is not
+applied. An uninitialized value may result in a useless read operation, but
+only very improbably.
+NOTE 2: the calling thread may own latches on pages: to avoid deadlocks this
+function must be written such that it cannot end up waiting for these
+latches!
+NOTE 3: the calling thread must want access to the page given: this rule is
+set to prevent unintended read-aheads performed by ibuf routines, a situation
+which could result in a deadlock if the OS does not support asynchronous io. */
+
+ulint
+buf_read_ahead_linear(
+/*==================*/
+			/* out: number of page read requests issued */
+	ulint	space,	/* in: space id */
+	ulint	offset)	/* in: page number of a page; NOTE: the current thread
+			must want access to this page (see NOTE 3 above) */
+{
+	buf_block_t*	block;
+	buf_frame_t*	frame;
+	buf_block_t*	pred_block	= NULL;
+	ulint		pred_offset;
+	ulint		succ_offset;
+	ulint		count;
+	int		asc_or_desc;
+	ulint		new_offset;
+	ulint		fail_count;
+	ulint		ibuf_mode;
+	ulint		low, high;
+	ulint		i;
+	
+	if (ibuf_bitmap_page(offset)) {
+		/* If it is an ibuf bitmap page, we do no read-ahead, as
+		that could break the ibuf page access order */
+
+		return(0);
+	}
+
+	low  = (offset / BUF_READ_AHEAD_LINEAR_AREA)
+					* BUF_READ_AHEAD_LINEAR_AREA;
+	high = (offset / BUF_READ_AHEAD_LINEAR_AREA + 1)
+					* BUF_READ_AHEAD_LINEAR_AREA;
+
+	if ((offset != low) && (offset != high - 1)) {
+		/* This is not a border page of the area: return */
+
+		return(0);
+	}
+
+	if (high > fil_space_get_size(space)) {
+		/* The area is not whole, return */
+
+		return(0);
+	}
+
+	mutex_enter(&(buf_pool->mutex));
+
+	if (buf_pool->n_pend_reads >
+			buf_pool->curr_size / BUF_READ_AHEAD_PEND_LIMIT) {
+		mutex_exit(&(buf_pool->mutex));
+
+		return(0);
+	}	
+
+	/* Check that almost all pages in the area have been accessed; if
+	offset == low, the accesses must be in a descending order, otherwise,
+	in an ascending order. */
+
+	asc_or_desc = 1;
+
+	if (offset == low) {
+		asc_or_desc = -1;
+	}
+
+	fail_count = 0;
+
+	for (i = low; i < high; i++) {
+
+		block = buf_page_hash_get(space, i);
+		
+		if ((block == NULL) || !block->accessed) {
+
+			/* Not accessed */
+			fail_count++;
+
+		} else if (pred_block && (ut_ulint_cmp(block->LRU_position,
+				      		    pred_block->LRU_position)
+			       		  != asc_or_desc)) {
+
+			/* Accesses not in the right order */
+
+			fail_count++;
+			pred_block = block;
+		}
+	}
+
+	if (fail_count > BUF_READ_AHEAD_LINEAR_AREA -
+			 BUF_READ_AHEAD_LINEAR_THRESHOLD) {
+		/* Too many failures: return */
+
+		mutex_exit(&(buf_pool->mutex));
+
+		return(0);
+	}
+
+	/* If we got this far, we know that enough pages in the area have
+	been accessed in the right order: linear read-ahead can be sensible */
+
+	block = buf_page_hash_get(space, offset);
+
+	if (block == NULL) {
+		mutex_exit(&(buf_pool->mutex));
+
+		return(0);
+	}
+
+	frame = block->frame;
+	
+	/* Read the natural predecessor and successor page addresses from
+	the page; NOTE that because the calling thread may have an x-latch
+	on the page, we do not acquire an s-latch on the page, this is to
+	prevent deadlocks. Even if we read values which are nonsense, the
+	algorithm will work. */ 
+
+	pred_offset = fil_page_get_prev(frame);
+	succ_offset = fil_page_get_next(frame);
+
+	mutex_exit(&(buf_pool->mutex));
+	
+	if ((offset == low) && (succ_offset == offset + 1)) {
+
+	    	/* This is ok, we can continue */
+	    	new_offset = pred_offset;
+
+	} else if ((offset == high - 1) && (pred_offset == offset - 1)) {
+
+	    	/* This is ok, we can continue */
+	    	new_offset = succ_offset;
+	} else {
+		/* Successor or predecessor not in the right order */
+
+		return(0);
+	}
+
+	low  = (new_offset / BUF_READ_AHEAD_LINEAR_AREA)
+					* BUF_READ_AHEAD_LINEAR_AREA;
+	high = (new_offset / BUF_READ_AHEAD_LINEAR_AREA + 1)
+					* BUF_READ_AHEAD_LINEAR_AREA;
+
+	if ((new_offset != low) && (new_offset != high - 1)) {
+		/* This is not a border page of the area: return */
+
+		return(0);
+	}
+
+	if (high > fil_space_get_size(space)) {
+		/* The area is not whole, return */
+
+		return(0);
+	}
+
+	/* If we got this far, read-ahead can be sensible: do it */	    	
+
+	if (ibuf_inside()) {
+		ibuf_mode = BUF_READ_IBUF_PAGES_ONLY;
+	} else {
+		ibuf_mode = BUF_READ_ANY_PAGE;
+	}
+
+	count = 0;
+
+	for (i = low; i < high; i++) {
+		/* It is only sensible to do read-ahead in the non-sync
+		aio mode: hence FALSE as the first parameter */
+
+		if (!ibuf_bitmap_page(i)) {
+			count += buf_read_page_low(FALSE, ibuf_mode
+					| OS_AIO_SIMULATED_WAKE_LATER,
+					space, i);
+		}
+	}
+
+	/* In simulated aio we wake the aio handler threads only after
+	queuing all aio requests, in native aio the following call does
+	nothing: */
+	
+	os_aio_simulated_wake_handler_threads();
+
+	/* Flush pages from the end of the LRU list if necessary */
+	buf_flush_free_margin();
+
+	if (buf_debug_prints && (count > 0)) {
+		printf(
+		"LINEAR read-ahead space %lu offset %lu pages %lu\n",
+		space, offset, count);
+	}
+
+	return(count);
+}
+
+/************************************************************************
+Issues read requests for pages which the ibuf module wants to read in, in
+order to contract insert buffer trees. Technically, this function is like
+a read-ahead function. */
+
+void
+buf_read_ibuf_merge_pages(
+/*======================*/
+	ibool	sync,		/* in: TRUE if the caller wants this function
+				to wait for the highest address page to get
+				read in, before this function returns */
+	ulint	space,		/* in: space id */
+	ulint*	page_nos,	/* in: array of page numbers to read, with the
+				highest page number the last in the array */
+	ulint	n_stored)	/* in: number of page numbers in the array */
+{
+	ulint	i;
+
+	ut_ad(!ibuf_inside());
+#ifdef UNIV_IBUF_DEBUG
+	ut_a(n_stored < UNIV_PAGE_SIZE);
+#endif	
+	while (buf_pool->n_pend_reads >
+			buf_pool->curr_size / BUF_READ_AHEAD_PEND_LIMIT) {
+		os_thread_sleep(500000);
+	}	
+
+	for (i = 0; i < n_stored; i++) {
+		if ((i + 1 == n_stored) && sync) {
+			buf_read_page_low(TRUE, BUF_READ_ANY_PAGE, space,
+								page_nos[i]);
+		} else {
+			buf_read_page_low(FALSE, BUF_READ_ANY_PAGE, space,
+								page_nos[i]);
+		}
+	}
+	
+	/* Flush pages from the end of the LRU list if necessary */
+	buf_flush_free_margin();
+
+	if (buf_debug_prints) {
+		printf("Ibuf merge read-ahead space %lu pages %lu\n",
+							space, n_stored);
+	}
+}
+
+/************************************************************************
+Issues read requests for pages which recovery wants to read in. */
+
+void
+buf_read_recv_pages(
+/*================*/
+	ibool	sync,		/* in: TRUE if the caller wants this function
+				to wait for the highest address page to get
+				read in, before this function returns */
+	ulint	space,		/* in: space id */
+	ulint*	page_nos,	/* in: array of page numbers to read, with the
+				highest page number the last in the array */
+	ulint	n_stored)	/* in: number of page numbers in the array */
+{
+	ulint	i;
+
+	for (i = 0; i < n_stored; i++) {
+
+		while (buf_pool->n_pend_reads >= RECV_POOL_N_FREE_BLOCKS / 2) {
+
+			os_aio_simulated_wake_handler_threads();
+			os_thread_sleep(500000);
+		}
+
+		if ((i + 1 == n_stored) && sync) {
+			buf_read_page_low(TRUE, BUF_READ_ANY_PAGE, space,
+								page_nos[i]);
+		} else {
+			buf_read_page_low(FALSE, BUF_READ_ANY_PAGE
+					| OS_AIO_SIMULATED_WAKE_LATER,
+					space, page_nos[i]);
+		}
+	}
+	
+	os_aio_simulated_wake_handler_threads();
+
+	/* Flush pages from the end of the LRU list if necessary */
+	buf_flush_free_margin();
+
+	if (buf_debug_prints) {
+		printf("Recovery applies read-ahead pages %lu\n", n_stored);
+	}
+}
diff --git a/innobase/buf/makefilewin b/innobase/buf/makefilewin
new file mode 100644
index 00000000000..ce62cb95958
--- /dev/null
+++ b/innobase/buf/makefilewin
@@ -0,0 +1,20 @@
+include ..\include\makefile.i
+
+buf.lib: buf0buf.obj buf0lru.obj buf0flu.obj buf0rea.obj
+	lib -out:..\libs\buf.lib buf0buf.obj buf0lru.obj buf0flu.obj buf0rea.obj
+
+buf0buf.obj: buf0buf.c
+	$(CCOM) $(CFL) -c buf0buf.c
+
+buf0lru.obj: buf0lru.c
+	$(CCOM) $(CFL) -c buf0lru.c
+
+buf0flu.obj: buf0flu.c
+	$(CCOM) $(CFL) -c buf0flu.c
+
+buf0rea.obj: buf0rea.c
+	$(CCOM) $(CFL) -c buf0rea.c
+
+
+
+
diff --git a/innobase/buf/ts/makefile b/innobase/buf/ts/makefile
new file mode 100644
index 00000000000..5886d06d7fa
--- /dev/null
+++ b/innobase/buf/ts/makefile
@@ -0,0 +1,20 @@
+
+
+
+include ..\..\makefile.i
+
+doall: tsbuf
+
+tsbuf: ..\buf.lib tsbuf.c
+	$(CCOM) $(CFL) -I.. -I..\.. ..\buf.lib ..\..\btr.lib ..\..\trx.lib ..\..\pars.lib ..\..\que.lib ..\..\lock.lib ..\..\row.lib ..\..\read.lib ..\..\srv.lib ..\..\com.lib ..\..\usr.lib ..\..\thr.lib ..\..\fut.lib ..\..\fsp.lib ..\..\page.lib ..\..\dyn.lib ..\..\mtr.lib ..\..\log.lib ..\..\rem.lib ..\..\fil.lib ..\..\dict.lib ..\..\data.lib ..\..\mach.lib ..\..\ha.lib ..\..\ut.lib ..\..\sync.lib ..\..\mem.lib ..\..\os.lib tsbuf.c $(LFL)
+
+tsos: ..\buf.lib tsos.c
+	$(CCOM) $(CFL) -I.. -I..\.. ..\buf.lib ..\..\mach.lib ..\..\fil.lib ..\..\ha.lib ..\..\ut.lib ..\..\sync.lib ..\..\mem.lib ..\..\os.lib tsos.c $(LFL)
+
+
+
+
+
+
+
+
diff --git a/innobase/buf/ts/tsbuf.c b/innobase/buf/ts/tsbuf.c
new file mode 100644
index 00000000000..fd6ae69653f
--- /dev/null
+++ b/innobase/buf/ts/tsbuf.c
@@ -0,0 +1,885 @@
+/************************************************************************
+The test module for the file system and buffer manager
+
+(c) 1995 Innobase Oy
+
+Created 11/16/1995 Heikki Tuuri
+*************************************************************************/
+
+#include "string.h"
+
+#include "os0thread.h"
+#include "os0file.h"
+#include "ut0ut.h"
+#include "ut0byte.h"
+#include "sync0sync.h"
+#include "mem0mem.h"
+#include "fil0fil.h"
+#include "mtr0mtr.h"
+#include "mtr0log.h"
+#include "log0log.h"
+#include "mach0data.h"
+#include "..\buf0buf.h"
+#include "..\buf0flu.h"
+#include "..\buf0lru.h"
+
+os_file_t	files[1000];
+
+mutex_t		ios_mutex;
+ulint		ios;
+ulint		n[10];
+
+mutex_t		incs_mutex;
+ulint		incs;
+
+#define N_SPACES	1
+#define N_FILES		1
+#define FILE_SIZE	4000
+#define POOL_SIZE	1000
+#define	COUNTER_OFFSET	1500
+
+#define LOOP_SIZE	150
+#define	N_THREADS	5
+
+
+ulint zero = 0;
+
+buf_frame_t*	bl_arr[POOL_SIZE];
+
+/************************************************************************
+Io-handler thread function. */
+
+ulint
+handler_thread(
+/*===========*/
+	void*	arg)
+{
+	ulint	segment;
+	void*	mess;
+	ulint	i;
+	bool	ret;
+	
+	segment = *((ulint*)arg);
+
+	printf("Io handler thread %lu starts\n", segment);
+
+	for (i = 0;; i++) {
+		ret = fil_aio_wait(segment, &mess);
+		ut_a(ret);
+
+		buf_page_io_complete((buf_block_t*)mess);
+		
+		mutex_enter(&ios_mutex);
+		ios++;
+		mutex_exit(&ios_mutex);
+		
+	}
+
+	return(0);
+}
+
+/*************************************************************************
+This thread reports the status of sync system. */
+
+ulint
+info_thread(
+/*========*/
+	void*	arg)
+{
+	ulint	segment;
+	
+	segment = *((ulint*)arg);
+
+	for (;;) {
+		sync_print();
+		os_aio_print();
+		printf("Debug stop threads == %lu\n", ut_dbg_stop_threads);
+		os_thread_sleep(30000000);		
+	}
+
+	return(0);
+}
+
+/*************************************************************************
+Creates the files for the file system test and inserts them to
+the file system. */
+
+void
+create_files(void)
+/*==============*/
+{
+	bool		ret;
+	ulint		i, k;
+	char		name[20];
+	os_thread_t	thr[5];
+	os_thread_id_t	id[5];
+	ulint		err;
+
+	printf("--------------------------------------------------------\n");
+	printf("Create or open database files\n");
+
+	strcpy(name, "tsfile00");
+
+	for (k = 0; k < N_SPACES; k++) {
+	for (i = 0; i < N_FILES; i++) {
+
+		name[9] = (char)((ulint)'0' + k);
+		name[10] = (char)((ulint)'0' + i);
+	
+		files[i] = os_file_create(name, OS_FILE_CREATE,
+					OS_FILE_TABLESPACE, &ret);
+
+		if (ret == FALSE) {
+			err = os_file_get_last_error();
+			if (err != OS_FILE_ALREADY_EXISTS) {
+				printf("OS error %lu in file creation\n", err);
+				ut_error;
+			}
+	
+			files[i] = os_file_create(
+				name, OS_FILE_OPEN,
+						OS_FILE_TABLESPACE, &ret);
+
+			ut_a(ret);
+		}
+
+		ret = os_file_close(files[i]);
+		ut_a(ret);
+
+		if (i == 0) {
+			fil_space_create(name, k, OS_FILE_TABLESPACE);
+		}
+
+		ut_a(fil_validate());
+
+		fil_node_create(name, FILE_SIZE, k);
+	}
+	}
+
+	ios = 0;
+
+	mutex_create(&ios_mutex);
+	
+	for (i = 0; i < 5; i++) {
+		n[i] = i;
+
+		thr[i] = os_thread_create(handler_thread, n + i, id + i);
+	}
+/*
+	n[9] = 9;
+	os_thread_create(info_thread, n + 9, id);
+*/
+}
+
+/************************************************************************
+Creates the test database files. */
+
+void 
+create_db(void)
+/*===========*/
+{
+	ulint			i;
+	byte*			frame;
+	ulint			j;
+	ulint			tm, oldtm;
+	mtr_t			mtr;
+	
+	printf("--------------------------------------------------------\n");
+	printf("Write database pages\n");
+
+	oldtm = ut_clock();
+
+	for (i = 0; i < N_SPACES; i++) {
+		for (j = 0; j < FILE_SIZE * N_FILES; j++) {
+			mtr_start(&mtr);
+			mtr_set_log_mode(&mtr, MTR_LOG_NONE);
+
+			frame = buf_page_create(i, j, &mtr);
+			buf_page_get(i, j, RW_X_LATCH, &mtr);
+
+			if (j > FILE_SIZE * N_FILES - 64 * 2 - 1) {
+				mlog_write_ulint(frame + FIL_PAGE_PREV, j - 5,
+						MLOG_4BYTES, &mtr);
+				mlog_write_ulint(frame + FIL_PAGE_NEXT, j - 7,
+						MLOG_4BYTES, &mtr);
+			} else {
+				mlog_write_ulint(frame + FIL_PAGE_PREV, j - 1,
+						MLOG_4BYTES, &mtr);
+				mlog_write_ulint(frame + FIL_PAGE_NEXT, j + 1,
+						MLOG_4BYTES, &mtr);
+			}
+					
+			mlog_write_ulint(frame + FIL_PAGE_OFFSET, j,
+						MLOG_4BYTES, &mtr);
+			mlog_write_ulint(frame + FIL_PAGE_SPACE, i,
+						MLOG_4BYTES, &mtr);
+			mlog_write_ulint(frame + COUNTER_OFFSET, 0,
+						MLOG_4BYTES, &mtr);
+
+			mtr_commit(&mtr);
+		}
+	}
+
+	tm = ut_clock();
+	printf("Wall clock time for test %lu milliseconds\n", tm - oldtm);
+
+	printf("--------------------------------------------------------\n");
+	printf("TEST 1 A. Test of page creation when page resides in buffer\n");
+	for (i = 0; i < N_SPACES; i++) {
+		for (j = FILE_SIZE * N_FILES - 200;
+					j < FILE_SIZE * N_FILES; j++) {
+			mtr_start(&mtr);
+			mtr_set_log_mode(&mtr, MTR_LOG_NONE);
+
+			frame = buf_page_create(i, j, &mtr);
+			buf_page_get(i, j, RW_X_LATCH, &mtr);
+
+			mlog_write_ulint(frame + FIL_PAGE_PREV,
+					j - 1, MLOG_4BYTES, &mtr);
+
+			mlog_write_ulint(frame + FIL_PAGE_NEXT,
+					j + 1, MLOG_4BYTES, &mtr);
+
+			mlog_write_ulint(frame + FIL_PAGE_OFFSET, j,
+					MLOG_4BYTES, &mtr);
+			mlog_write_ulint(frame + FIL_PAGE_SPACE, i,
+					MLOG_4BYTES, &mtr);
+			mtr_commit(&mtr);
+		}
+	}
+
+	printf("--------------------------------------------------------\n");
+	printf("TEST 1 B. Flush pages\n");
+
+	buf_flush_batch(BUF_FLUSH_LIST, POOL_SIZE / 2);
+	buf_validate();
+
+	printf("--------------------------------------------------------\n");
+	printf("TEST 1 C. Allocate POOL_SIZE blocks to flush pages\n");
+
+	buf_validate();
+	/* Flush the pool of dirty pages */
+	for (i = 0; i < POOL_SIZE; i++) {
+
+		bl_arr[i] = buf_frame_alloc();
+	}
+	buf_validate();
+	buf_LRU_print();
+
+	for (i = 0; i < POOL_SIZE; i++) {
+
+		buf_frame_free(bl_arr[i]);
+	}
+
+	buf_validate();
+	ut_a(buf_all_freed());
+
+	mtr_start(&mtr);
+	frame = buf_page_get(0, 313, RW_S_LATCH, &mtr);
+#ifdef UNIV_ASYNC_IO
+	ut_a(buf_page_io_query(buf_block_align(frame)) == TRUE);
+#endif
+	mtr_commit(&mtr);
+}
+	
+/************************************************************************
+Reads the test database files. */
+
+void 
+test1(void)
+/*=======*/
+{
+	ulint			i, j, k, c;
+	byte*			frame;
+	ulint			tm, oldtm;
+	mtr_t			mtr;
+	
+	printf("--------------------------------------------------------\n");
+	printf("TEST 1 D. Read linearly database files\n");
+
+	oldtm = ut_clock();
+	
+	for (k = 0; k < 1; k++) {
+	for (i = 0; i < N_SPACES; i++) {
+		for (j = 0; j < N_FILES * FILE_SIZE; j++) {
+			mtr_start(&mtr);
+		
+			frame = buf_page_get(i, j, RW_S_LATCH, &mtr);
+
+			ut_a(mtr_read_ulint(frame + FIL_PAGE_OFFSET,
+					   	MLOG_4BYTES, &mtr)
+					== j);
+			ut_a(mtr_read_ulint(frame + FIL_PAGE_SPACE,
+						MLOG_4BYTES, &mtr)
+					== i);
+
+			mtr_commit(&mtr);			
+		}
+	}
+	}
+
+	tm = ut_clock();
+	printf("Wall clock time for %lu pages %lu milliseconds\n",
+			k * i * j, tm - oldtm);
+	buf_validate();
+
+	printf("--------------------------------------------------------\n");
+	printf("TEST 1 E. Read linearly downward database files\n");
+
+	oldtm = ut_clock();
+
+	c = 0;
+	
+	for (k = 0; k < 1; k++) {
+	for (i = 0; i < N_SPACES; i++) {
+		for (j = ut_min(1000, FILE_SIZE - 1); j > 0; j--) {
+			mtr_start(&mtr);
+		
+			frame = buf_page_get(i, j, RW_S_LATCH, &mtr);
+			c++;
+			
+			ut_a(mtr_read_ulint(frame + FIL_PAGE_OFFSET,
+						MLOG_4BYTES, &mtr)
+					== j);
+			ut_a(mtr_read_ulint(frame + FIL_PAGE_SPACE,
+						MLOG_4BYTES, &mtr)
+					== i);
+			
+
+			ut_a(buf_page_io_query(buf_block_align(frame))
+			     == FALSE);
+			     
+			mtr_commit(&mtr);
+		}
+	}
+	}
+
+	tm = ut_clock();
+	printf("Wall clock time for %lu pages %lu milliseconds\n",
+			c, tm - oldtm);
+	buf_validate();
+}
+
+/************************************************************************
+Reads the test database files. */
+
+void 
+test2(void)
+/*=======*/
+{
+	ulint			i, j, k;
+	byte*			frame;
+	ulint			tm, oldtm;
+	mtr_t			mtr;
+	
+	printf("--------------------------------------------------------\n");
+	printf("TEST 2. Read randomly database files\n");
+
+	oldtm = ut_clock();
+
+	for (k = 0; k < 100; k++) {
+		i = ut_rnd_gen_ulint() % N_SPACES;
+		j = ut_rnd_gen_ulint() % (N_FILES * FILE_SIZE);
+
+		mtr_start(&mtr);		
+
+		frame = buf_page_get(i, j, RW_S_LATCH, &mtr);
+			
+		ut_a(mtr_read_ulint(frame + FIL_PAGE_OFFSET,
+						MLOG_4BYTES, &mtr)
+					== j);
+		ut_a(mtr_read_ulint(frame + FIL_PAGE_SPACE,
+						MLOG_4BYTES, &mtr)
+					== i);
+			
+		mtr_commit(&mtr);
+	}
+
+	tm = ut_clock();
+	printf("Wall clock time for random %lu read %lu milliseconds\n",
+		k, tm - oldtm);
+}
+
+/************************************************************************
+Reads the test database files. */
+
+void 
+test3(void)
+/*=======*/
+{
+	ulint			i, j, k;
+	byte*			frame;
+	ulint			tm, oldtm;
+	ulint			rnd;
+	mtr_t			mtr;
+	
+	if (FILE_SIZE < POOL_SIZE + 3050 + ut_dbg_zero) {
+		return;
+	}
+
+	printf("Flush the pool of high-offset pages\n");
+	
+	/* Flush the pool of high-offset pages */
+	for (i = 0; i < POOL_SIZE; i++) {
+
+		mtr_start(&mtr);
+		
+		frame = buf_page_get(0, i, RW_S_LATCH, &mtr);
+
+		mtr_commit(&mtr);
+	}
+	buf_validate();
+
+	printf("--------------------------------------------------------\n");
+	printf("TEST 3. Read randomly database pages, no read-ahead\n");
+	
+	oldtm = ut_clock();
+
+	rnd = 123;
+
+	for (k = 0; k < 400; k++) {
+		rnd += 23477;
+	
+		i = 0;
+		j = POOL_SIZE + 10 + rnd % 3000;
+		
+		mtr_start(&mtr);
+
+		frame = buf_page_get(i, j, RW_S_LATCH, &mtr);
+			
+		ut_a(mtr_read_ulint(frame + FIL_PAGE_OFFSET,
+						MLOG_4BYTES, &mtr)
+					== j);
+		ut_a(mtr_read_ulint(frame + FIL_PAGE_SPACE,
+						MLOG_4BYTES, &mtr)
+					== i);
+		mtr_commit(&mtr);
+	}
+
+	tm = ut_clock();
+	printf(
+	   "Wall clock time for %lu random no read-ahead %lu milliseconds\n",
+		k, tm - oldtm);
+
+	buf_validate();
+	printf("Flush the pool of high-offset pages\n");
+	/* Flush the pool of high-offset pages */
+	for (i = 0; i < POOL_SIZE; i++) {
+
+		mtr_start(&mtr);
+	
+		frame = buf_page_get(0, i, RW_S_LATCH, &mtr);
+
+		mtr_commit(&mtr);
+	}
+
+	buf_validate();
+	printf("--------------------------------------------------------\n");
+	printf("TEST 3 B. Read randomly database pages, random read-ahead\n");
+
+	oldtm = ut_clock();
+
+	rnd = 123;
+	for (k = 0; k < 400; k++) {
+		rnd += 23477;
+
+		i = 0;
+		j = POOL_SIZE + 10 + rnd % 400;
+
+		mtr_start(&mtr);
+		
+		frame = buf_page_get(i, j, RW_S_LATCH, &mtr);
+
+		ut_a(mtr_read_ulint(frame + FIL_PAGE_OFFSET,
+						MLOG_4BYTES, &mtr)
+					== j);
+		ut_a(mtr_read_ulint(frame + FIL_PAGE_SPACE,
+						MLOG_4BYTES, &mtr)
+					== i);
+		mtr_commit(&mtr);
+	}
+
+	tm = ut_clock();
+	printf(
+	   "Wall clock time for %lu random read-ahead %lu milliseconds\n",
+		k, tm - oldtm);
+}
+
+/************************************************************************
+Tests speed of CPU algorithms. */
+
+void 
+test4(void)
+/*=======*/
+{
+	ulint			i, j;
+	ulint			tm, oldtm;
+	mtr_t			mtr;
+	buf_frame_t*		frame;
+	
+	os_thread_sleep(2000000);
+
+	printf("--------------------------------------------------------\n");
+	printf("TEST 4. Speed of CPU algorithms\n");
+
+	oldtm = ut_clock();
+
+	for (j = 0; j < 1000; j++) {
+
+	    mtr_start(&mtr);
+	    for (i = 0; i < 20; i++) {
+
+		frame = buf_page_get(0, i, RW_S_LATCH, &mtr);
+	    }
+	    mtr_commit(&mtr);
+	}
+
+	tm = ut_clock();
+	printf("Wall clock time for %lu page get-release %lu milliseconds\n",
+			i * j, tm - oldtm);
+
+	buf_validate();
+
+	oldtm = ut_clock();
+
+	for (i = 0; i < 10000; i++) {
+		frame = buf_frame_alloc();
+		buf_frame_free(frame);
+	}
+
+	tm = ut_clock();
+	printf("Wall clock time for %lu block alloc-free %lu milliseconds\n",
+			i, tm - oldtm);
+
+	ha_print_info(buf_pool->page_hash);
+	buf_print();
+}
+
+/************************************************************************
+Tests various points of code. */
+
+void 
+test5(void)
+/*=======*/
+{
+	buf_frame_t*		frame;
+	fil_addr_t		addr;
+	ulint			space;
+	mtr_t			mtr;
+	
+	printf("--------------------------------------------------------\n");
+	printf("TEST 5. Various tests \n");
+
+	mtr_start(&mtr);
+	
+	frame = buf_page_get(0, 313, RW_S_LATCH, &mtr);
+
+	ut_a(buf_frame_get_space_id(frame) == 0);
+	ut_a(buf_frame_get_page_no(frame) == 313);
+
+	ut_a(buf_frame_align(frame + UNIV_PAGE_SIZE - 1) == frame);
+	ut_a(buf_frame_align(frame) == frame);
+
+	ut_a(buf_block_align(frame + UNIV_PAGE_SIZE - 1) ==
+						buf_block_align(frame));
+
+	buf_ptr_get_fsp_addr(frame + UNIV_PAGE_SIZE - 1, &space, &addr);
+
+	ut_a(addr.page == 313)
+	ut_a(addr.boffset == UNIV_PAGE_SIZE - 1);
+	ut_a(space == 0);
+
+	mtr_commit(&mtr);
+}
+
+/************************************************************************
+Random test thread function. */
+
+ulint
+random_thread(
+/*===========*/
+	void*	arg)
+{
+	ulint		n;
+	ulint		i, j, r, t, p, sp, count;
+	ulint		s;
+	buf_frame_t*	arr[POOL_SIZE / N_THREADS];
+	buf_frame_t*	frame;
+	mtr_t		mtr;
+	mtr_t		mtr2;
+	
+	n = *((ulint*)arg);
+
+	printf("Random test thread %lu starts\n", os_thread_get_curr_id());
+
+	for (i = 0; i < 30; i++) {
+	   t = ut_rnd_gen_ulint() % 10;
+	   r = ut_rnd_gen_ulint() % 100;
+	   s = ut_rnd_gen_ulint() % (POOL_SIZE / N_THREADS);
+	   p = ut_rnd_gen_ulint();
+	   sp = ut_rnd_gen_ulint() % N_SPACES;
+
+	   if (i % 100 == 0) {
+	   	printf("Thr %lu tst %lu starts\n", os_thread_get_curr_id(), t);
+	   }
+	   ut_a(buf_validate());
+
+	   mtr_start(&mtr);
+	   if (t == 6) {
+	   	/* Allocate free blocks */
+	   	for (j = 0; j < s; j++) {
+	   		arr[j] = buf_frame_alloc();
+	   		ut_a(arr[j]);
+	   	}
+	   	for (j = 0; j < s; j++) {
+	   		buf_frame_free(arr[j]);
+	   	}
+	   } else if (t == 9) {
+/*	   	buf_flush_batch(BUF_FLUSH_LIST, 30); */
+	   	
+	   } else if (t == 7) {
+	   	/* x-lock many blocks */
+	   	for (j = 0; j < s; j++) {
+	   		arr[j] = buf_page_get(sp, (p + j)
+	   					% (N_FILES * FILE_SIZE),
+	   					RW_X_LATCH,
+	   					&mtr);
+	   		ut_a(arr[j]);
+	   		if (j > 0) {
+	   			ut_a(arr[j] != arr[j - 1]);
+	   		}
+	   	}
+	   	ut_a(buf_validate());
+	   } else if (t == 8) {
+	   	/* s-lock many blocks */
+	   	for (j = 0; j < s; j++) {
+	   		arr[j] = buf_page_get(sp, (p + j)
+						% (N_FILES * FILE_SIZE),
+						RW_S_LATCH,
+							&mtr);
+	   		ut_a(arr[j]);
+	   		if (j > 0) {
+	   			ut_a(arr[j] != arr[j - 1]);
+	   		}
+	   	}
+	   } else if (t <= 2) {
+	   	for (j = 0; j < r; j++) {
+			/* Read pages */
+			mtr_start(&mtr2);
+			frame = buf_page_get(sp,
+					p % (N_FILES * FILE_SIZE),
+					RW_S_LATCH, &mtr2);
+
+			ut_a(mtr_read_ulint(frame + FIL_PAGE_OFFSET,
+						MLOG_4BYTES, &mtr2)
+					== p % (N_FILES * FILE_SIZE));
+			ut_a(mtr_read_ulint(frame + FIL_PAGE_SPACE,
+						MLOG_4BYTES, &mtr2)
+					== sp);
+			mtr_commit(&mtr2);
+			if (t == 0) {
+				p++;	/* upward */
+			} else if (t == 1) {
+				p--;	/* downward */
+			} else if (t == 2) {
+				p = ut_rnd_gen_ulint(); /* randomly */
+			}
+		}
+	   } else if (t <= 5) {
+	   	for (j = 0; j < r; j++) {
+			/* Write pages */
+			mtr_start(&mtr2);
+			frame = buf_page_get(sp, p % (N_FILES * FILE_SIZE),
+						RW_X_LATCH, &mtr2);
+			count = 1 + mtr_read_ulint(frame + COUNTER_OFFSET,
+						MLOG_4BYTES, &mtr2);
+			mutex_enter(&incs_mutex);
+			incs++;
+			mutex_exit(&incs_mutex);
+			mlog_write_ulint(frame + COUNTER_OFFSET, count,
+						MLOG_4BYTES, &mtr2);
+			mtr_commit(&mtr2);
+			if (t == 3) {
+				p++;	/* upward */
+			} else if (t == 4) {
+				p--;	/* downward */
+			} else if (t == 5) {
+				p = ut_rnd_gen_ulint(); /* randomly */
+			}
+		}
+	   } /* if t = */
+
+	   mtr_commit(&mtr);
+/*	   printf("Thr %lu tst %lu ends ", os_thread_get_curr_id(), t); */
+	   ut_a(buf_validate());
+	} /* for i */
+	printf("\nRandom test thread %lu exits\n", os_thread_get_curr_id());
+	return(0);
+}
+
+/************************************************************************
+Random test thread function which reports the rw-lock list. */
+
+ulint
+rw_list_thread(
+/*===========*/
+	void*	arg)
+{
+	ulint		n;
+	ulint		i;
+	
+	n = *((ulint*)arg);
+
+	printf("\nRw list test thread %lu starts\n", os_thread_get_curr_id());
+
+	for (i = 0; i < 10; i++) {
+		os_thread_sleep(3000000);
+		rw_lock_list_print_info();
+		buf_validate();
+	}
+
+	return(0);
+}
+
+/*************************************************************************
+Performs random operations on the buffer with several threads. */
+
+void
+test6(void)
+/*=======*/
+{
+	ulint		i, j;
+	os_thread_t	thr[N_THREADS + 1];
+	os_thread_id_t	id[N_THREADS + 1];
+	ulint		n[N_THREADS + 1];
+	ulint		count	= 0;
+	buf_frame_t*	frame;
+	mtr_t		mtr;
+	
+	printf("--------------------------------------------------------\n");
+	printf("TEST 6. Random multi-thread test on the buffer \n");
+
+	incs = 0;
+	mutex_create(&incs_mutex);
+	
+	for (i = 0; i < N_THREADS; i++) {
+		n[i] = i;
+
+		thr[i] = os_thread_create(random_thread, n + i, id + i);
+	}
+/*
+	n[N_THREADS] = N_THREADS;
+
+	thr[N_THREADS] = os_thread_create(rw_list_thread, n + N_THREADS,
+					id + N_THREADS);
+*/
+	for (i = 0; i < N_THREADS; i++) {
+		os_thread_wait(thr[i]);
+	}
+
+/*	os_thread_wait(thr[N_THREADS]); */
+
+	for (i = 0; i < N_SPACES; i++) {
+		for (j = 0; j < N_FILES * FILE_SIZE; j++) {
+			mtr_start(&mtr);
+		
+			frame = buf_page_get(i, j, RW_S_LATCH, &mtr);
+
+			ut_a(mtr_read_ulint(frame + FIL_PAGE_OFFSET,
+						MLOG_4BYTES, &mtr)
+					== j);
+			ut_a(mtr_read_ulint(frame + FIL_PAGE_SPACE,
+						MLOG_4BYTES, &mtr)
+					== i);
+
+			count += mtr_read_ulint(frame + COUNTER_OFFSET,
+						MLOG_4BYTES, &mtr);
+			
+			mtr_commit(&mtr);
+		}
+	}
+
+	printf("Count %lu incs %lu\n", count, incs);
+	ut_a(count == incs);
+}
+
+/************************************************************************
+Frees the spaces in the file system. */
+
+void
+free_system(void)
+/*=============*/
+{
+	ulint	i;
+
+	for (i = 0; i < N_SPACES; i++) {
+		fil_space_free(i);
+	}
+}
+
+/************************************************************************
+Main test function. */
+
+void 
+main(void) 
+/*======*/
+{
+	ulint	tm, oldtm;
+
+/*	buf_debug_prints = TRUE; */
+	
+	oldtm = ut_clock();
+	
+	os_aio_init(160, 5);
+	sync_init();
+	mem_init(1500000);
+	fil_init(26);	/* Allow 25 open files at a time */
+	buf_pool_init(POOL_SIZE, POOL_SIZE);
+	log_init();
+	
+	buf_validate();
+
+	ut_a(fil_validate());
+	
+	create_files();
+
+	create_db();
+
+	buf_validate();
+
+	test1();
+	buf_validate();
+
+	test2();
+	buf_validate();
+
+	test3();
+	buf_validate();
+
+	test4();
+
+	test5();
+
+	buf_validate();
+
+	test6();
+
+	buf_validate();
+
+	buf_print();
+	
+	buf_flush_batch(BUF_FLUSH_LIST, POOL_SIZE + 1);
+	buf_print();
+	buf_validate();
+
+	os_thread_sleep(1000000);
+	
+	buf_print();
+	buf_all_freed();
+	
+	free_system();	
+	
+	tm = ut_clock();
+	printf("Wall clock time for test %lu milliseconds\n", tm - oldtm);
+	printf("TESTS COMPLETED SUCCESSFULLY!\n");
+} 
diff --git a/innobase/buf/ts/tsos.c b/innobase/buf/ts/tsos.c
new file mode 100644
index 00000000000..c1cc3f27172
--- /dev/null
+++ b/innobase/buf/ts/tsos.c
@@ -0,0 +1,185 @@
+/************************************************************************
+The test module for the operating system interface
+
+(c) 1995 Innobase Oy
+
+Created 9/27/1995 Heikki Tuuri
+*************************************************************************/
+
+
+#include "../os0thread.h"
+#include "../os0shm.h"
+#include "../os0proc.h"
+#include "../os0sync.h"
+#include "../os0file.h"
+#include "ut0ut.h"
+#include "sync0sync.h"
+#include "mem0mem.h"
+
+ulint	last_thr = 1;
+
+byte	global_buf[1000000];
+
+os_file_t	file;
+os_file_t	file2;
+
+os_event_t	gl_ready;
+
+mutex_t		ios_mutex;
+ulint		ios;
+
+/************************************************************************
+Io-handler thread function. */
+
+ulint
+handler_thread(
+/*===========*/
+	void*	arg)
+{
+	ulint	segment;
+	void*	mess;
+	ulint	i;
+	bool	ret;
+	
+	segment = *((ulint*)arg);
+
+	printf("Thread %lu starts\n", segment);
+
+	for (i = 0;; i++) {
+		ret = os_aio_wait(segment, &mess);
+
+		mutex_enter(&ios_mutex);
+		ios++;
+		mutex_exit(&ios_mutex);
+		
+		ut_a(ret);
+/*		printf("Message for thread %lu %lu\n", segment,
+						(ulint)mess); */
+		if ((ulint)mess == 3333) {
+			os_event_set(gl_ready);
+		}
+	}
+
+	return(0);
+}
+
+/************************************************************************
+Test of io-handler threads */
+
+void 
+test4(void)
+/*=======*/
+{
+	ulint			i;
+	bool			ret;
+	void*			buf;
+	ulint			rnd;
+	ulint			tm, oldtm;
+
+	os_thread_t		thr[5];
+	os_thread_id_t		id[5];
+	ulint			n[5];
+	
+        printf("-------------------------------------------\n");
+	printf("OS-TEST 4. Test of asynchronous file io\n");
+
+	/* Align the buffer for file io */
+
+	buf = (void*)(((ulint)global_buf + 6300) & (~0xFFF)); 
+
+	gl_ready = os_event_create(NULL);
+	ios = 0;
+
+	sync_init();
+	mem_init();
+
+	mutex_create(&ios_mutex);
+	
+	for (i = 0; i < 5; i++) {
+		n[i] = i;
+
+		thr[i] = os_thread_create(handler_thread, n + i, id + i);
+	}
+
+	rnd = 0;
+	
+	oldtm = ut_clock();
+
+	for (i = 0; i < 4096; i++) {
+		ret = os_aio_read(file, (byte*)buf + 8192 * (rnd % 100),
+				8192 * (rnd % 4096), 0,
+							8192, (void*)i);
+		ut_a(ret);
+		rnd += 1;
+	}
+
+	ret = os_aio_read(file, buf, 8192 * (rnd % 1024), 0, 8192,
+							(void*)3333);
+	ut_a(ret);
+
+	ut_a(!os_aio_all_slots_free());
+
+	tm = ut_clock();
+
+	printf("All ios queued! N ios: %lu\n", ios);
+
+	printf("Wall clock time for test %lu milliseconds\n", tm - oldtm);
+	
+	os_event_wait(gl_ready);
+
+	tm = ut_clock();
+	printf("N ios: %lu\n", ios);
+	printf("Wall clock time for test %lu milliseconds\n", tm - oldtm);
+
+	os_thread_sleep(2000000);
+
+	printf("N ios: %lu\n", ios);
+
+	ut_a(os_aio_all_slots_free());
+}
+
+/*************************************************************************
+Initializes the asyncronous io system for tests. */
+
+void
+init_aio(void)
+/*==========*/
+{
+	bool	ret;
+	void*	buf;
+
+	buf = (void*)(((ulint)global_buf + 6300) & (~0xFFF)); 
+
+	os_aio_init(160, 5);
+	file = os_file_create("j:\\tsfile4", OS_FILE_CREATE, OS_FILE_TABLESPACE,
+				&ret);
+
+	if (ret == FALSE) {
+		ut_a(os_file_get_last_error() == OS_FILE_ALREADY_EXISTS);
+	
+		file = os_file_create("j:\\tsfile4", OS_FILE_OPEN,
+			OS_FILE_TABLESPACE, &ret);
+
+		ut_a(ret);
+	}
+}
+
+/************************************************************************
+Main test function. */
+
+void 
+main(void) 
+/*======*/
+{
+	ulint	tm, oldtm;
+
+	oldtm = ut_clock();
+
+	init_aio();
+
+	test4();
+
+	tm = ut_clock();
+	printf("Wall clock time for test %lu milliseconds\n", tm - oldtm);
+	printf("TESTS COMPLETED SUCCESSFULLY!\n");
+}
author	unknown <monty@donna.mysql.com>	2001-02-17 14:19:19 +0200
committer	unknown <monty@donna.mysql.com>	2001-02-17 14:19:19 +0200
commit	2662b59306ef0cd495fa6e2edf7129e58a11393a (patch)
tree	bfe39951a73e906579ab819bf5198ad8f3a64a36 /innobase/buf
parent	66de55a56bdcf2f7a9c0c4f8e19b3e761475e202 (diff)
download	mariadb-git-2662b59306ef0cd495fa6e2edf7129e58a11393a.tar.gz