summaryrefslogtreecommitdiff
path: root/innobase/os
diff options
context:
space:
mode:
Diffstat (limited to 'innobase/os')
-rw-r--r--innobase/os/Makefile.am24
-rw-r--r--innobase/os/makefilewin20
-rw-r--r--innobase/os/os0file.c2005
-rw-r--r--innobase/os/os0fileold.c1956
-rw-r--r--innobase/os/os0proc.c150
-rw-r--r--innobase/os/os0shm.c146
-rw-r--r--innobase/os/os0sync.c461
-rw-r--r--innobase/os/os0thread.c210
-rw-r--r--innobase/os/os0trash.c43
-rw-r--r--innobase/os/ts/makefile20
-rw-r--r--innobase/os/ts/tsos.c793
-rw-r--r--innobase/os/ts/tsosaux.c83
12 files changed, 5911 insertions, 0 deletions
diff --git a/innobase/os/Makefile.am b/innobase/os/Makefile.am
new file mode 100644
index 00000000000..b517cac1cc8
--- /dev/null
+++ b/innobase/os/Makefile.am
@@ -0,0 +1,24 @@
+# Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+# & Innobase Oy
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+include ../include/Makefile.i
+
+libs_LIBRARIES = libos.a
+
+libos_a_SOURCES = os0proc.c os0shm.c os0sync.c os0thread.c os0file.c
+
+EXTRA_PROGRAMS =
diff --git a/innobase/os/makefilewin b/innobase/os/makefilewin
new file mode 100644
index 00000000000..08dba0e5e47
--- /dev/null
+++ b/innobase/os/makefilewin
@@ -0,0 +1,20 @@
+include ..\include\makefile.i
+
+os.lib: os0sync.obj os0thread.obj os0shm.obj os0proc.obj os0file.obj
+ lib -out:..\libs\os.lib os0sync.obj os0thread.obj os0shm.obj os0proc.obj os0file.obj
+
+os0sync.obj: os0sync.c
+ $(CCOM) $(CFLW) -c os0sync.c
+
+os0thread.obj: os0thread.c
+ $(CCOM) $(CFLW) -c os0thread.c
+
+os0shm.obj: os0shm.c
+ $(CCOM) $(CFLW) -c os0shm.c
+
+os0proc.obj: os0proc.c
+ $(CCOM) $(CFLW) -c os0proc.c
+
+os0file.obj: os0file.c
+ $(CCOM) $(CFLW) -c os0file.c
+
diff --git a/innobase/os/os0file.c b/innobase/os/os0file.c
new file mode 100644
index 00000000000..7851b83732d
--- /dev/null
+++ b/innobase/os/os0file.c
@@ -0,0 +1,2005 @@
+/******************************************************
+The interface to the operating system file i/o primitives
+
+(c) 1995 Innobase Oy
+
+Created 10/21/1995 Heikki Tuuri
+*******************************************************/
+
+#include "os0file.h"
+#include "os0sync.h"
+#include "ut0mem.h"
+
+
+#ifdef POSIX_ASYNC_IO
+/* We assume in this case that the OS has standard Posix aio (at least SunOS
+2.6, HP-UX 11i and AIX 4.3 have) */
+
+#undef __USE_FILE_OFFSET64
+
+#include <aio.h>
+#endif
+
+/* We use these mutexes to protect lseek + file i/o operation, if the
+OS does not provide an atomic pread or pwrite, or similar */
+#define OS_FILE_N_SEEK_MUTEXES 16
+os_mutex_t os_file_seek_mutexes[OS_FILE_N_SEEK_MUTEXES];
+
+/* In simulated aio, merge at most this many consecutive i/os */
+#define OS_AIO_MERGE_N_CONSECUTIVE 32
+
+/* If this flag is TRUE, then we will use the native aio of the
+OS (provided we compiled Innobase with it in), otherwise we will
+use simulated aio we build below with threads */
+
+ibool os_aio_use_native_aio = FALSE;
+
+/* The aio array slot structure */
+typedef struct os_aio_slot_struct os_aio_slot_t;
+
+struct os_aio_slot_struct{
+ ibool is_read; /* TRUE if a read operation */
+ ulint pos; /* index of the slot in the aio
+ array */
+ ibool reserved; /* TRUE if this slot is reserved */
+ ulint len; /* length of the block to read or
+ write */
+ byte* buf; /* buffer used in i/o */
+ ulint type; /* OS_FILE_READ or OS_FILE_WRITE */
+ ulint offset; /* 32 low bits of file offset in
+ bytes */
+ ulint offset_high; /* 32 high bits of file offset */
+ os_file_t file; /* file where to read or write */
+ char* name; /* file name or path */
+ ibool io_already_done;/* used only in simulated aio:
+ TRUE if the physical i/o already
+ made and only the slot message
+ needs to be passed to the caller
+ of os_aio_simulated_handle */
+ void* message1; /* message which is given by the */
+ void* message2; /* the requester of an aio operation
+ and which can be used to identify
+ which pending aio operation was
+ completed */
+#ifdef WIN_ASYNC_IO
+ OVERLAPPED control; /* Windows control block for the
+ aio request */
+#elif defined(POSIX_ASYNC_IO)
+ struct aiocb control; /* Posix control block for aio
+ request */
+#endif
+};
+
+/* The aio array structure */
+typedef struct os_aio_array_struct os_aio_array_t;
+
+struct os_aio_array_struct{
+ os_mutex_t mutex; /* the mutex protecting the aio array */
+ os_event_t not_full; /* The event which is set to signaled
+ state when there is space in the aio
+ outside the ibuf segment */
+ ulint n_slots; /* Total number of slots in the aio array.
+ This must be divisible by n_threads. */
+ ulint n_segments;/* Number of segments in the aio array of
+ pending aio requests. A thread can wait
+ separately for any one of the segments. */
+ ulint n_reserved;/* Number of reserved slots in the
+ aio array outside the ibuf segment */
+ os_aio_slot_t* slots; /* Pointer to the slots in the array */
+ os_event_t* events; /* Pointer to an array of event handles
+ where we copied the handles from slots,
+ in the same order. This can be used in
+ WaitForMultipleObjects; used only in
+ Windows */
+};
+
+/* Array of events used in simulated aio */
+os_event_t* os_aio_segment_wait_events = NULL;
+
+/* The aio arrays for non-ibuf i/o and ibuf i/o, as well as sync aio. These
+are NULL when the module has not yet been initialized. */
+os_aio_array_t* os_aio_read_array = NULL;
+os_aio_array_t* os_aio_write_array = NULL;
+os_aio_array_t* os_aio_ibuf_array = NULL;
+os_aio_array_t* os_aio_log_array = NULL;
+os_aio_array_t* os_aio_sync_array = NULL;
+
+ulint os_aio_n_segments = ULINT_UNDEFINED;
+
+/***************************************************************************
+Retrieves the last error number if an error occurs in a file io function.
+The number should be retrieved before any other OS calls (because they may
+overwrite the error number). If the number is not known to this program,
+the OS error number + 100 is returned. */
+
+ulint
+os_file_get_last_error(void)
+/*========================*/
+ /* out: error number, or OS error number + 100 */
+{
+ ulint err;
+
+#ifdef __WIN__
+
+ err = (ulint) GetLastError();
+
+ if (err == ERROR_FILE_NOT_FOUND) {
+ return(OS_FILE_NOT_FOUND);
+ } else if (err == ERROR_DISK_FULL) {
+ return(OS_FILE_DISK_FULL);
+ } else if (err == ERROR_FILE_EXISTS) {
+ return(OS_FILE_ALREADY_EXISTS);
+ } else {
+ return(100 + err);
+ }
+#else
+ err = (ulint) errno;
+
+ if (err == ENOSPC ) {
+ return(OS_FILE_DISK_FULL);
+#ifdef POSIX_ASYNC_IO
+ } else if (err == EAGAIN) {
+ return(OS_FILE_AIO_RESOURCES_RESERVED);
+#endif
+ } else if (err == ENOENT) {
+ return(OS_FILE_NOT_FOUND);
+ } else if (err == EEXIST) {
+ return(OS_FILE_ALREADY_EXISTS);
+ } else {
+ return(100 + err);
+ }
+#endif
+}
+
+/********************************************************************
+Does error handling when a file operation fails. If we have run out
+of disk space, then the user can clean the disk. If we do not find
+a specified file, then the user can copy it to disk. */
+static
+ibool
+os_file_handle_error(
+/*=================*/
+ /* out: TRUE if we should retry the operation */
+ os_file_t file, /* in: file pointer */
+ char* name) /* in: name of a file or NULL */
+{
+ int input_char;
+ ulint err;
+
+ err = os_file_get_last_error();
+
+ if (err == OS_FILE_DISK_FULL) {
+ask_again:
+ printf("\n");
+ if (name) {
+ printf(
+ "Innobase encountered a problem with file %s.\n",
+ name);
+ }
+ printf("Disk is full. Try to clean the disk to free space\n");
+ printf("before answering the following: How to continue?\n");
+ printf("(Y == freed some space: try again)\n");
+ printf("(N == crash the database: will restart it)?\n");
+ask_with_no_question:
+ input_char = getchar();
+
+ if (input_char == (int) 'N') {
+ ut_error;
+
+ return(FALSE);
+ } else if (input_char == (int) 'Y') {
+
+ return(TRUE);
+ } else if (input_char == (int) '\n') {
+
+ goto ask_with_no_question;
+ } else {
+ goto ask_again;
+ }
+ } else if (err == OS_FILE_AIO_RESOURCES_RESERVED) {
+
+ return(TRUE);
+ } else {
+ ut_error;
+ }
+
+ return(FALSE);
+}
+
+/********************************************************************
+Opens an existing file or creates a new. */
+
+os_file_t
+os_file_create(
+/*===========*/
+ /* out, own: handle to the file, not defined if error,
+ error number can be retrieved with os_get_last_error */
+ char* name, /* in: name of the file or path as a null-terminated
+ string */
+ ulint create_mode, /* in: OS_FILE_OPEN if an existing file is opened
+ (if does not exist, error), or OS_FILE_CREATE if a new
+ file is created (if exists, error), OS_FILE_OVERWRITE
+ if a new is created or an old overwritten */
+ ulint purpose,/* in: OS_FILE_AIO, if asynchronous, non-buffered i/o
+ is desired, OS_FILE_NORMAL, if any normal file */
+ ibool* success)/* out: TRUE if succeed, FALSE if error */
+{
+#ifdef __WIN__
+ os_file_t file;
+ DWORD create_flag;
+ DWORD attributes;
+ ibool retry;
+
+try_again:
+ ut_a(name);
+
+ if (create_mode == OS_FILE_OPEN) {
+ create_flag = OPEN_EXISTING;
+ } else if (create_mode == OS_FILE_CREATE) {
+ create_flag = CREATE_NEW;
+ } else if (create_mode == OS_FILE_OVERWRITE) {
+ create_flag = CREATE_ALWAYS;
+ } else {
+ create_flag = 0;
+ ut_error;
+ }
+
+ if (purpose == OS_FILE_AIO) {
+ /* use asynchronous (overlapped) io and no buffering
+ of writes in the OS */
+ attributes = 0;
+#ifdef WIN_ASYNC_IO
+ if (os_aio_use_native_aio) {
+ attributes = attributes | FILE_FLAG_OVERLAPPED;
+ }
+#endif
+#ifdef UNIV_NON_BUFFERED_IO
+ attributes = attributes | FILE_FLAG_NO_BUFFERING;
+#endif
+ } else if (purpose == OS_FILE_NORMAL) {
+ attributes = 0
+#ifdef UNIV_NON_BUFFERED_IO
+ | FILE_FLAG_NO_BUFFERING
+#endif
+ ;
+ } else {
+ attributes = 0;
+ ut_error;
+ }
+
+ file = CreateFile(name,
+ GENERIC_READ | GENERIC_WRITE, /* read and write
+ access */
+ FILE_SHARE_READ,/* file can be read by other
+ processes */
+ NULL, /* default security attributes */
+ create_flag,
+ attributes,
+ NULL); /* no template file */
+
+ if (file == INVALID_HANDLE_VALUE) {
+ *success = FALSE;
+
+ if (create_mode != OS_FILE_OPEN
+ && os_file_get_last_error() == OS_FILE_DISK_FULL) {
+
+ retry = os_file_handle_error(file, name);
+
+ if (retry) {
+ goto try_again;
+ }
+ }
+ } else {
+ *success = TRUE;
+ }
+
+ return(file);
+#else
+ os_file_t file;
+ int create_flag;
+ ibool retry;
+
+try_again:
+ ut_a(name);
+
+ if (create_mode == OS_FILE_OPEN) {
+ create_flag = O_RDWR;
+ } else if (create_mode == OS_FILE_CREATE) {
+ create_flag = O_RDWR | O_CREAT | O_EXCL;
+ } else if (create_mode == OS_FILE_OVERWRITE) {
+ create_flag = O_RDWR | O_CREAT | O_TRUNC;
+ } else {
+ create_flag = 0;
+ ut_error;
+ }
+
+ UT_NOT_USED(purpose);
+
+ if (create_mode == OS_FILE_CREATE) {
+
+ file = open(name, create_flag, S_IRWXU | S_IRWXG | S_IRWXO);
+ } else {
+ file = open(name, create_flag);
+ }
+
+ if (file == -1) {
+ *success = FALSE;
+
+ if (create_mode != OS_FILE_OPEN
+ && errno == ENOSPC) {
+
+ retry = os_file_handle_error(file, name);
+
+ if (retry) {
+ goto try_again;
+ }
+ }
+ } else {
+ *success = TRUE;
+ }
+
+ return(file);
+#endif
+}
+
+/***************************************************************************
+Closes a file handle. In case of error, error number can be retrieved with
+os_file_get_last_error. */
+
+ibool
+os_file_close(
+/*==========*/
+ /* out: TRUE if success */
+ os_file_t file) /* in, own: handle to a file */
+{
+#ifdef __WIN__
+ BOOL ret;
+
+ ut_a(file);
+
+ ret = CloseHandle(file);
+
+ if (ret) {
+ return(TRUE);
+ }
+
+ return(FALSE);
+#else
+ int ret;
+
+ ret = close(file);
+
+ if (ret == -1) {
+ return(FALSE);
+ }
+
+ return(TRUE);
+#endif
+}
+
+/***************************************************************************
+Gets a file size. */
+
+ibool
+os_file_get_size(
+/*=============*/
+ /* out: TRUE if success */
+ os_file_t file, /* in: handle to a file */
+ ulint* size, /* out: least significant 32 bits of file
+ size */
+ ulint* size_high)/* out: most significant 32 bits of size */
+{
+#ifdef __WIN__
+ DWORD high;
+ DWORD low;
+
+ low = GetFileSize(file, &high);
+
+ if ((low == 0xFFFFFFFF) && (GetLastError() != NO_ERROR)) {
+ return(FALSE);
+ }
+
+ *size = low;
+ *size_high = high;
+
+ return(TRUE);
+#else
+ *size = (ulint) lseek(file, 0, SEEK_END);
+ *size_high = 0;
+
+ return(TRUE);
+#endif
+}
+
+/***************************************************************************
+Sets a file size. This function can be used to extend or truncate a file. */
+
+ibool
+os_file_set_size(
+/*=============*/
+ /* out: TRUE if success */
+ char* name, /* in: name of the file or path as a
+ null-terminated string */
+ os_file_t file, /* in: handle to a file */
+ ulint size, /* in: least significant 32 bits of file
+ size */
+ ulint size_high)/* in: most significant 32 bits of size */
+{
+ ulint offset;
+ ulint n_bytes;
+ ulint low;
+ ibool ret;
+ ibool retry;
+ ulint i;
+ byte* buf;
+
+try_again:
+ buf = ut_malloc(UNIV_PAGE_SIZE * 64);
+
+ /* Write buffer full of zeros */
+ for (i = 0; i < UNIV_PAGE_SIZE * 64; i++) {
+ buf[i] = '\0';
+ }
+
+ offset = 0;
+ low = size;
+#if (UNIV_WORD_SIZE == 8)
+ low = low + (size_high << 32);
+#endif
+ while (offset < low) {
+ if (low - offset < UNIV_PAGE_SIZE * 64) {
+ n_bytes = low - offset;
+ } else {
+ n_bytes = UNIV_PAGE_SIZE * 64;
+ }
+
+ ret = os_file_write(name, file, buf, offset, 0, n_bytes);
+
+ if (!ret) {
+ ut_free(buf);
+ goto error_handling;
+ }
+ offset += n_bytes;
+ }
+
+ ut_free(buf);
+
+ ret = os_file_flush(file);
+
+ if (ret) {
+ return(TRUE);
+ }
+
+error_handling:
+ retry = os_file_handle_error(file, name);
+
+ if (retry) {
+ goto try_again;
+ }
+
+ ut_error;
+}
+
+/***************************************************************************
+Flushes the write buffers of a given file to the disk. */
+
+ibool
+os_file_flush(
+/*==========*/
+ /* out: TRUE if success */
+ os_file_t file) /* in, own: handle to a file */
+{
+#ifdef __WIN__
+ BOOL ret;
+
+ ut_a(file);
+
+ ret = FlushFileBuffers(file);
+
+ if (ret) {
+ return(TRUE);
+ }
+
+ return(FALSE);
+#else
+ int ret;
+
+ ret = fsync(file);
+
+ if (ret == 0) {
+ return(TRUE);
+ }
+
+ return(FALSE);
+#endif
+}
+
+
+#ifndef __WIN__
+/***********************************************************************
+Does a synchronous read operation in Posix. */
+static
+ssize_t
+os_file_pread(
+/*==========*/
+ /* out: number of bytes read, -1 if error */
+ os_file_t file, /* in: handle to a file */
+ void* buf, /* in: buffer where to read */
+ ulint n, /* in: number of bytes to read */
+ ulint offset) /* in: offset from where to read */
+{
+#ifdef HAVE_PREAD
+ return(pread(file, buf, n, (off_t) offset));
+#else
+ ssize_t ret;
+ ulint i;
+
+ /* Protect the seek / read operation with a mutex */
+ i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
+
+ os_mutex_enter(os_file_seek_mutexes[i]);
+
+ ret = lseek(file, (off_t) offset, 0);
+
+ if (ret < 0) {
+ os_mutex_exit(os_file_seek_mutexes[i]);
+
+ return(ret);
+ }
+
+ ret = read(file, buf, n);
+
+ os_mutex_exit(os_file_seek_mutexes[i]);
+
+ return(ret);
+#endif
+}
+
+/***********************************************************************
+Does a synchronous write operation in Posix. */
+static
+ssize_t
+os_file_pwrite(
+/*===========*/
+ /* out: number of bytes written, -1 if error */
+ os_file_t file, /* in: handle to a file */
+ void* buf, /* in: buffer from where to write */
+ ulint n, /* in: number of bytes to write */
+ ulint offset) /* in: offset where to write */
+{
+#ifdef HAVE_PWRITE
+ return(pwrite(file, buf, n, (off_t) offset));
+#else
+ ssize_t ret;
+ ulint i;
+
+ /* Protect the seek / write operation with a mutex */
+ i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
+
+ os_mutex_enter(os_file_seek_mutexes[i]);
+
+ ret = lseek(file, (off_t) offset, 0);
+
+ if (ret < 0) {
+ os_mutex_exit(os_file_seek_mutexes[i]);
+
+ return(ret);
+ }
+
+ ret = write(file, buf, n);
+
+ os_mutex_exit(os_file_seek_mutexes[i]);
+
+ return(ret);
+#endif
+}
+#endif
+
+/***********************************************************************
+Requests a synchronous positioned read operation. */
+
+ibool
+os_file_read(
+/*=========*/
+ /* out: TRUE if request was
+ successful, FALSE if fail */
+ os_file_t file, /* in: handle to a file */
+ void* buf, /* in: buffer where to read */
+ ulint offset, /* in: least significant 32 bits of file
+ offset where to read */
+ ulint offset_high, /* in: most significant 32 bits of
+ offset */
+ ulint n) /* in: number of bytes to read */
+{
+#ifdef __WIN__
+ BOOL ret;
+ DWORD len;
+ DWORD ret2;
+ DWORD err;
+ DWORD low;
+ DWORD high;
+ ibool retry;
+ ulint i;
+
+try_again:
+ ut_ad(file);
+ ut_ad(buf);
+ ut_ad(n > 0);
+
+ low = offset;
+ high = offset_high;
+
+ /* Protect the seek / read operation with a mutex */
+ i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
+
+ os_mutex_enter(os_file_seek_mutexes[i]);
+
+ ret2 = SetFilePointer(file, low, &high, FILE_BEGIN);
+
+ if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) {
+ err = GetLastError();
+
+ os_mutex_exit(os_file_seek_mutexes[i]);
+
+ goto error_handling;
+ }
+
+ ret = ReadFile(file, buf, n, &len, NULL);
+
+ os_mutex_exit(os_file_seek_mutexes[i]);
+
+ if (ret && len == n) {
+ return(TRUE);
+ }
+
+ err = GetLastError();
+#else
+ ibool retry;
+ ssize_t ret;
+ ulint i;
+
+#if (UNIV_WORD_SIZE == 8)
+ offset = offset + (offset_high << 32);
+#endif
+try_again:
+ /* Protect the seek / read operation with a mutex */
+ i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
+
+ os_mutex_enter(os_file_seek_mutexes[i]);
+
+ ret = os_file_pread(file, buf, n, (off_t) offset);
+
+ if (ret == n) {
+ os_mutex_exit(os_file_seek_mutexes[i]);
+
+ return(TRUE);
+ }
+#endif
+error_handling:
+ retry = os_file_handle_error(file, NULL);
+
+ if (retry) {
+ goto try_again;
+ }
+
+ ut_error;
+
+ return(FALSE);
+}
+
+/***********************************************************************
+Requests a synchronous write operation. */
+
+ibool
+os_file_write(
+/*==========*/
+ /* out: TRUE if request was
+ successful, FALSE if fail */
+ char* name, /* in: name of the file or path as a
+ null-terminated string */
+ os_file_t file, /* in: handle to a file */
+ void* buf, /* in: buffer from which to write */
+ ulint offset, /* in: least significant 32 bits of file
+ offset where to write */
+ ulint offset_high, /* in: most significant 32 bits of
+ offset */
+ ulint n) /* in: number of bytes to write */
+{
+#ifdef __WIN__
+ BOOL ret;
+ DWORD len;
+ DWORD ret2;
+ DWORD err;
+ DWORD low;
+ DWORD high;
+ ibool retry;
+ ulint i;
+
+try_again:
+ ut_ad(file);
+ ut_ad(buf);
+ ut_ad(n > 0);
+
+ low = offset;
+ high = offset_high;
+
+ /* Protect the seek / write operation with a mutex */
+ i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
+
+ os_mutex_enter(os_file_seek_mutexes[i]);
+
+ ret2 = SetFilePointer(file, low, &high, FILE_BEGIN);
+
+ if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) {
+ err = GetLastError();
+
+ os_mutex_exit(os_file_seek_mutexes[i]);
+
+ goto error_handling;
+ }
+
+ ret = WriteFile(file, buf, n, &len, NULL);
+
+ os_mutex_exit(os_file_seek_mutexes[i]);
+
+ if (ret && len == n) {
+ return(TRUE);
+ }
+#else
+ ibool retry;
+ ssize_t ret;
+
+#if (UNIV_WORD_SIZE == 8)
+ offset = offset + (offset_high << 32);
+#endif
+try_again:
+ ret = pwrite(file, buf, n, (off_t) offset);
+
+ if (ret == n) {
+ return(TRUE);
+ }
+#endif
+
+error_handling:
+ retry = os_file_handle_error(file, name);
+
+ if (retry) {
+ goto try_again;
+ }
+
+ ut_error;
+
+ return(FALSE);
+}
+
+/********************************************************************
+Returns a pointer to the nth slot in the aio array. */
+static
+os_aio_slot_t*
+os_aio_array_get_nth_slot(
+/*======================*/
+ /* out: pointer to slot */
+ os_aio_array_t* array, /* in: aio array */
+ ulint index) /* in: index of the slot */
+{
+ ut_a(index < array->n_slots);
+
+ return((array->slots) + index);
+}
+
+/****************************************************************************
+Creates an aio wait array. */
+static
+os_aio_array_t*
+os_aio_array_create(
+/*================*/
+ /* out, own: aio array */
+ ulint n, /* in: maximum number of pending aio operations
+ allowed; n must be divisible by n_segments */
+ ulint n_segments) /* in: number of segments in the aio array */
+{
+ os_aio_array_t* array;
+ ulint i;
+ os_aio_slot_t* slot;
+#ifdef WIN_ASYNC_IO
+ OVERLAPPED* over;
+#endif
+ ut_a(n > 0);
+ ut_a(n_segments > 0);
+ ut_a(n % n_segments == 0);
+
+ array = ut_malloc(sizeof(os_aio_array_t));
+
+ array->mutex = os_mutex_create(NULL);
+ array->not_full = os_event_create(NULL);
+ array->n_slots = n;
+ array->n_segments = n_segments;
+ array->n_reserved = 0;
+ array->slots = ut_malloc(n * sizeof(os_aio_slot_t));
+ array->events = ut_malloc(n * sizeof(os_event_t));
+
+ for (i = 0; i < n; i++) {
+ slot = os_aio_array_get_nth_slot(array, i);
+
+ slot->pos = i;
+ slot->reserved = FALSE;
+#ifdef WIN_ASYNC_IO
+ over = &(slot->control);
+
+ over->hEvent = os_event_create(NULL);
+
+ *((array->events) + i) = over->hEvent;
+#endif
+ }
+
+ return(array);
+}
+
+/****************************************************************************
+Initializes the asynchronous io system. Creates separate aio array for
+non-ibuf read and write, a third aio array for the ibuf i/o, with just one
+segment, two aio arrays for log reads and writes with one segment, and a
+synchronous aio array of the specified size. The combined number of segments
+in the three first aio arrays is the parameter n_segments given to the
+function. The caller must create an i/o handler thread for each segment in
+the four first arrays, but not for the sync aio array. */
+
+void
+os_aio_init(
+/*========*/
+ ulint n, /* in: maximum number of pending aio operations
+ allowed; n must be divisible by n_segments */
+ ulint n_segments, /* in: combined number of segments in the four
+ first aio arrays; must be >= 4 */
+ ulint n_slots_sync) /* in: number of slots in the sync aio array */
+{
+ ulint n_read_segs;
+ ulint n_write_segs;
+ ulint n_per_seg;
+ ulint i;
+#ifdef POSIX_ASYNC_IO
+ sigset_t sigset;
+#endif
+ ut_ad(n % n_segments == 0);
+ ut_ad(n_segments >= 4);
+
+ n_per_seg = n / n_segments;
+ n_write_segs = (n_segments - 2) / 2;
+ n_read_segs = n_segments - 2 - n_write_segs;
+
+ /* printf("Array n per seg %lu\n", n_per_seg); */
+
+ os_aio_read_array = os_aio_array_create(n_read_segs * n_per_seg,
+ n_read_segs);
+ os_aio_write_array = os_aio_array_create(n_write_segs * n_per_seg,
+ n_write_segs);
+ os_aio_ibuf_array = os_aio_array_create(n_per_seg, 1);
+
+ os_aio_log_array = os_aio_array_create(n_per_seg, 1);
+
+ os_aio_sync_array = os_aio_array_create(n_slots_sync, 1);
+
+ os_aio_n_segments = n_segments;
+
+ os_aio_validate();
+
+ for (i = 0; i < OS_FILE_N_SEEK_MUTEXES; i++) {
+ os_file_seek_mutexes[i] = os_mutex_create(NULL);
+ }
+
+ os_aio_segment_wait_events = ut_malloc(n_segments * sizeof(void*));
+
+ for (i = 0; i < n_segments; i++) {
+ os_aio_segment_wait_events[i] = os_event_create(NULL);
+ }
+
+#ifdef POSIX_ASYNC_IO
+ /* Block aio signals from the current thread and its children:
+ for this to work, the current thread must be the first created
+ in the database, so that all its children will inherit its
+ signal mask */
+
+ sigemptyset(&sigset);
+ sigaddset(&sigset, SIGRTMIN + 1 + 0);
+ sigaddset(&sigset, SIGRTMIN + 1 + 1);
+ sigaddset(&sigset, SIGRTMIN + 1 + 2);
+ sigaddset(&sigset, SIGRTMIN + 1 + 3);
+
+ pthread_sigmask(SIG_BLOCK, &sigset, NULL);
+#endif
+}
+
+/**************************************************************************
+Calculates segment number for a slot. */
+static
+ulint
+os_aio_get_segment_no_from_slot(
+/*============================*/
+ /* out: segment number (which is the number
+ used by, for example, i/o-handler threads) */
+ os_aio_array_t* array, /* in: aio wait array */
+ os_aio_slot_t* slot) /* in: slot in this array */
+{
+ ulint segment;
+ ulint seg_len;
+
+ if (array == os_aio_ibuf_array) {
+ segment = 0;
+
+ } else if (array == os_aio_log_array) {
+ segment = 1;
+
+ } else if (array == os_aio_read_array) {
+ seg_len = os_aio_read_array->n_slots /
+ os_aio_read_array->n_segments;
+
+ segment = 2 + slot->pos / seg_len;
+ } else {
+ ut_a(array == os_aio_write_array);
+ seg_len = os_aio_write_array->n_slots /
+ os_aio_write_array->n_segments;
+
+ segment = os_aio_read_array->n_segments + 2
+ + slot->pos / seg_len;
+ }
+
+ return(segment);
+}
+
+/**************************************************************************
+Calculates local segment number and aio array from global segment number. */
+static
+ulint
+os_aio_get_array_and_local_segment(
+/*===============================*/
+ /* out: local segment number within
+ the aio array */
+ os_aio_array_t** array, /* out: aio wait array */
+ ulint global_segment)/* in: global segment number */
+{
+ ulint segment;
+
+ ut_a(global_segment < os_aio_n_segments);
+
+ if (global_segment == 0) {
+ *array = os_aio_ibuf_array;
+ segment = 0;
+
+ } else if (global_segment == 1) {
+ *array = os_aio_log_array;
+ segment = 0;
+
+ } else if (global_segment < os_aio_read_array->n_segments + 2) {
+ *array = os_aio_read_array;
+
+ segment = global_segment - 2;
+ } else {
+ *array = os_aio_write_array;
+
+ segment = global_segment - (os_aio_read_array->n_segments + 2);
+ }
+
+ return(segment);
+}
+
+/***********************************************************************
+Gets an integer value designating a specified aio array. This is used
+to give numbers to signals in Posix aio. */
+static
+ulint
+os_aio_get_array_no(
+/*================*/
+ os_aio_array_t* array) /* in: aio array */
+{
+ if (array == os_aio_ibuf_array) {
+
+ return(0);
+
+ } else if (array == os_aio_log_array) {
+
+ return(1);
+
+ } else if (array == os_aio_read_array) {
+
+ return(2);
+ } else if (array == os_aio_write_array) {
+
+ return(3);
+ } else {
+ ut_a(0);
+
+ return(0);
+ }
+}
+
+/***********************************************************************
+Gets the aio array for its number. */
+static
+os_aio_array_t*
+os_aio_get_array_from_no(
+/*=====================*/
+ /* out: aio array */
+ ulint n) /* in: array number */
+{
+ if (n == 0) {
+ return(os_aio_ibuf_array);
+ } else if (n == 1) {
+
+ return(os_aio_log_array);
+ } else if (n == 2) {
+
+ return(os_aio_read_array);
+ } else if (n == 3) {
+
+ return(os_aio_write_array);
+ } else {
+ ut_a(0);
+
+ return(NULL);
+ }
+}
+
+/***********************************************************************
+Requests for a slot in the aio array. If no slot is available, waits until
+not_full-event becomes signaled. */
+static
+os_aio_slot_t*
+os_aio_array_reserve_slot(
+/*======================*/
+ /* out: pointer to slot */
+ ulint type, /* in: OS_FILE_READ or OS_FILE_WRITE */
+ os_aio_array_t* array, /* in: aio array */
+ void* message1,/* in: message to be passed along with
+ the aio operation */
+ void* message2,/* in: message to be passed along with
+ the aio operation */
+ os_file_t file, /* in: file handle */
+ char* name, /* in: name of the file or path as a
+ null-terminated string */
+ void* buf, /* in: buffer where to read or from which
+ to write */
+ ulint offset, /* in: least significant 32 bits of file
+ offset */
+ ulint offset_high, /* in: most significant 32 bits of
+ offset */
+ ulint len) /* in: length of the block to read or write */
+{
+ os_aio_slot_t* slot;
+#ifdef WIN_ASYNC_IO
+ OVERLAPPED* control;
+
+#elif defined(POSIX_ASYNC_IO)
+
+ struct aiocb* control;
+#endif
+ ulint i;
+loop:
+ os_mutex_enter(array->mutex);
+
+ if (array->n_reserved == array->n_slots) {
+ os_mutex_exit(array->mutex);
+
+ if (!os_aio_use_native_aio) {
+ /* If the handler threads are suspended, wake them
+ so that we get more slots */
+
+ os_aio_simulated_wake_handler_threads();
+ }
+
+ os_event_wait(array->not_full);
+
+ goto loop;
+ }
+
+ for (i = 0;; i++) {
+ slot = os_aio_array_get_nth_slot(array, i);
+
+ if (slot->reserved == FALSE) {
+ break;
+ }
+ }
+
+ array->n_reserved++;
+
+ if (array->n_reserved == array->n_slots) {
+ os_event_reset(array->not_full);
+ }
+
+ slot->reserved = TRUE;
+ slot->message1 = message1;
+ slot->message2 = message2;
+ slot->file = file;
+ slot->name = name;
+ slot->len = len;
+ slot->type = type;
+ slot->buf = buf;
+ slot->offset = offset;
+ slot->offset_high = offset_high;
+ slot->io_already_done = FALSE;
+
+#ifdef WIN_ASYNC_IO
+ control = &(slot->control);
+ control->Offset = (DWORD)offset;
+ control->OffsetHigh = (DWORD)offset_high;
+ os_event_reset(control->hEvent);
+
+#elif defined(POSIX_ASYNC_IO)
+
+#if (UNIV_WORD_SIZE == 8)
+ offset = offset + (offset_high << 32);
+#else
+ ut_a(offset_high == 0);
+#endif
+ control = &(slot->control);
+ control->aio_fildes = file;
+ control->aio_buf = buf;
+ control->aio_nbytes = len;
+ control->aio_offset = offset;
+ control->aio_reqprio = 0;
+ control->aio_sigevent.sigev_notify = SIGEV_SIGNAL;
+ control->aio_sigevent.sigev_signo =
+ SIGRTMIN + 1 + os_aio_get_array_no(array);
+ /* TODO: How to choose the signal numbers? */
+/*
+ printf("AIO signal number %lu\n", (ulint) control->aio_sigevent.sigev_signo);
+*/
+ control->aio_sigevent.sigev_value.sival_ptr = slot;
+#endif
+ os_mutex_exit(array->mutex);
+
+ return(slot);
+}
+
+/***********************************************************************
+Frees a slot in the aio array. */
+static
+void
+os_aio_array_free_slot(
+/*===================*/
+ os_aio_array_t* array, /* in: aio array */
+ os_aio_slot_t* slot) /* in: pointer to slot */
+{
+ ut_ad(array);
+ ut_ad(slot);
+
+ os_mutex_enter(array->mutex);
+
+ ut_ad(slot->reserved);
+
+ slot->reserved = FALSE;
+
+ array->n_reserved--;
+
+ if (array->n_reserved == array->n_slots - 1) {
+ os_event_set(array->not_full);
+ }
+
+#ifdef WIN_ASYNC_IO
+ os_event_reset(slot->control.hEvent);
+#endif
+ os_mutex_exit(array->mutex);
+}
+
+/**************************************************************************
+Wakes up a simulated aio i/o-handler thread if it has something to do. */
+static
+void
+os_aio_simulated_wake_handler_thread(
+/*=================================*/
+ ulint global_segment) /* in: the number of the segment in the aio
+ arrays */
+{
+ os_aio_array_t* array;
+ ulint segment;
+ os_aio_slot_t* slot;
+ ulint n;
+ ulint i;
+
+ ut_ad(!os_aio_use_native_aio);
+
+ segment = os_aio_get_array_and_local_segment(&array, global_segment);
+
+ n = array->n_slots / array->n_segments;
+
+ /* Look through n slots after the segment * n'th slot */
+
+ os_mutex_enter(array->mutex);
+
+ for (i = 0; i < n; i++) {
+ slot = os_aio_array_get_nth_slot(array, i + segment * n);
+
+ if (slot->reserved) {
+ /* Found an i/o request */
+
+ break;
+ }
+ }
+
+ os_mutex_exit(array->mutex);
+
+ if (i < n) {
+ os_event_set(os_aio_segment_wait_events[global_segment]);
+ }
+}
+
+/**************************************************************************
+Wakes up simulated aio i/o-handler threads if they have something to do. */
+
+void
+os_aio_simulated_wake_handler_threads(void)
+/*=======================================*/
+{
+ ulint i;
+
+ if (os_aio_use_native_aio) {
+ /* We do not use simulated aio: do nothing */
+
+ return;
+ }
+
+ for (i = 0; i < os_aio_n_segments; i++) {
+ os_aio_simulated_wake_handler_thread(i);
+ }
+}
+
+/***********************************************************************
+Requests an asynchronous i/o operation. */
+
+ibool
+os_aio(
+/*===*/
+ /* out: TRUE if request was queued
+ successfully, FALSE if fail */
+ ulint type, /* in: OS_FILE_READ or OS_FILE_WRITE */
+ ulint mode, /* in: OS_AIO_NORMAL, ..., possibly ORed
+ to OS_AIO_SIMULATED_WAKE_LATER: the
+ last flag advises this function not to wake
+ i/o-handler threads, but the caller will
+ do the waking explicitly later, in this
+ way the caller can post several requests in
+ a batch; NOTE that the batch must not be
+ so big that it exhausts the slots in aio
+ arrays! NOTE that a simulated batch
+ may introduce hidden chances of deadlocks,
+ because i/os are not actually handled until
+ all have been posted: use with great
+ caution! */
+ char* name, /* in: name of the file or path as a
+ null-terminated string */
+ os_file_t file, /* in: handle to a file */
+ void* buf, /* in: buffer where to read or from which
+ to write */
+ ulint offset, /* in: least significant 32 bits of file
+ offset where to read or write */
+ ulint offset_high, /* in: most significant 32 bits of
+ offset */
+ ulint n, /* in: number of bytes to read or write */
+ void* message1,/* in: messages for the aio handler (these
+ can be used to identify a completed aio
+ operation); if mode is OS_AIO_SYNC, these
+ are ignored */
+ void* message2)
+{
+ os_aio_array_t* array;
+ os_aio_slot_t* slot;
+#ifdef WIN_ASYNC_IO
+ BOOL ret = TRUE;
+ DWORD len = n;
+ void* dummy_mess1;
+ void* dummy_mess2;
+#endif
+ ulint err = 0;
+ ibool retry;
+ ulint wake_later;
+
+ ut_ad(file);
+ ut_ad(buf);
+ ut_ad(n > 0);
+ ut_ad(n % OS_FILE_LOG_BLOCK_SIZE == 0);
+ ut_ad((ulint)buf % OS_FILE_LOG_BLOCK_SIZE == 0)
+ ut_ad(offset % OS_FILE_LOG_BLOCK_SIZE == 0);
+ ut_ad(os_aio_validate());
+
+ wake_later = mode & OS_AIO_SIMULATED_WAKE_LATER;
+ mode = mode & (~OS_AIO_SIMULATED_WAKE_LATER);
+
+ if (mode == OS_AIO_SYNC
+#ifdef WIN_ASYNC_IO
+ && !os_aio_use_native_aio
+#endif
+ ) {
+ /* This is actually an ordinary synchronous read or write:
+ no need to use an i/o-handler thread. NOTE that if we use
+ Windows async i/o, Windows does not allow us to use
+ ordinary synchronous os_file_read etc. on the same file,
+ therefore we have built a special mechanism for synchronous
+ wait in the Windows case. */
+
+ if (type == OS_FILE_READ) {
+ return(os_file_read(file, buf, offset, offset_high, n));
+ }
+
+ ut_a(type == OS_FILE_WRITE);
+
+ return(os_file_write(name, file, buf, offset, offset_high, n));
+ }
+
+try_again:
+ if (mode == OS_AIO_NORMAL) {
+ if (type == OS_FILE_READ) {
+ array = os_aio_read_array;
+ } else {
+ array = os_aio_write_array;
+ }
+ } else if (mode == OS_AIO_IBUF) {
+ ut_ad(type == OS_FILE_READ);
+
+ array = os_aio_ibuf_array;
+ } else if (mode == OS_AIO_LOG) {
+
+ array = os_aio_log_array;
+ } else if (mode == OS_AIO_SYNC) {
+ array = os_aio_sync_array;
+ } else {
+ ut_error;
+ }
+
+ slot = os_aio_array_reserve_slot(type, array, message1, message2, file,
+ name, buf, offset, offset_high, n);
+ if (type == OS_FILE_READ) {
+ if (os_aio_use_native_aio) {
+#ifdef WIN_ASYNC_IO
+ ret = ReadFile(file, buf, (DWORD)n, &len,
+ &(slot->control));
+#elif defined(POSIX_ASYNC_IO)
+ slot->control.aio_lio_opcode = LIO_READ;
+ err = (ulint) aio_read(&(slot->control));
+ printf("Starting Posix aio read %lu\n", err);
+#endif
+ } else {
+ if (!wake_later) {
+ os_aio_simulated_wake_handler_thread(
+ os_aio_get_segment_no_from_slot(array, slot));
+ }
+ }
+ } else if (type == OS_FILE_WRITE) {
+ if (os_aio_use_native_aio) {
+#ifdef WIN_ASYNC_IO
+ ret = WriteFile(file, buf, (DWORD)n, &len,
+ &(slot->control));
+#elif defined(POSIX_ASYNC_IO)
+ slot->control.aio_lio_opcode = LIO_WRITE;
+ err = (ulint) aio_write(&(slot->control));
+ printf("Starting Posix aio write %lu\n", err);
+#endif
+ } else {
+ if (!wake_later) {
+ os_aio_simulated_wake_handler_thread(
+ os_aio_get_segment_no_from_slot(array, slot));
+ }
+ }
+ } else {
+ ut_error;
+ }
+
+#ifdef WIN_ASYNC_IO
+ if (os_aio_use_native_aio) {
+ if ((ret && len == n)
+ || (!ret && GetLastError() == ERROR_IO_PENDING)) {
+
+ /* aio was queued successfully! */
+
+ if (mode == OS_AIO_SYNC) {
+ /* We want a synchronous i/o operation on a file
+ where we also use async i/o: in Windows we must
+ use the same wait mechanism as for async i/o */
+
+ return(os_aio_windows_handle(ULINT_UNDEFINED,
+ slot->pos,
+ &dummy_mess1, &dummy_mess2));
+ }
+
+ return(TRUE);
+ }
+
+ goto error_handling;
+ }
+#endif
+ if (err == 0) {
+ /* aio was queued successfully! */
+
+ return(TRUE);
+ }
+
+error_handling:
+ os_aio_array_free_slot(array, slot);
+
+ retry = os_file_handle_error(file, name);
+
+ if (retry) {
+
+ goto try_again;
+ }
+
+ ut_error;
+
+ return(FALSE);
+}
+
+#ifdef WIN_ASYNC_IO
+/**************************************************************************
+This function is only used in Windows asynchronous i/o.
+Waits for an aio operation to complete. This function is used to wait the
+for completed requests. The aio array of pending requests is divided
+into segments. The thread specifies which segment or slot it wants to wait
+for. NOTE: this function will also take care of freeing the aio slot,
+therefore no other thread is allowed to do the freeing! */
+
+ibool
+os_aio_windows_handle(
+/*==================*/
+ /* out: TRUE if the aio operation succeeded */
+ ulint segment, /* in: the number of the segment in the aio
+ arrays to wait for; segment 0 is the ibuf
+ i/o thread, segment 1 the log i/o thread,
+ then follow the non-ibuf read threads, and as
+ the last are the non-ibuf write threads; if
+ this is ULINT_UNDEFINED, then it means that
+ sync aio is used, and this parameter is
+ ignored */
+ ulint pos, /* this parameter is used only in sync aio:
+ wait for the aio slot at this position */
+ void** message1, /* out: the messages passed with the aio
+ request; note that also in the case where
+ the aio operation failed, these output
+ parameters are valid and can be used to
+ restart the operation, for example */
+ void** message2)
+{
+ os_aio_array_t* array;
+ os_aio_slot_t* slot;
+ ulint n;
+ ulint i;
+ ibool ret_val;
+ ulint err;
+ BOOL ret;
+ DWORD len;
+
+ if (segment == ULINT_UNDEFINED) {
+ array = os_aio_sync_array;
+ segment = 0;
+ } else {
+ segment = os_aio_get_array_and_local_segment(&array, segment);
+ }
+
+ /* NOTE! We only access constant fields in os_aio_array. Therefore
+ we do not have to acquire the protecting mutex yet */
+
+ ut_ad(os_aio_validate());
+ ut_ad(segment < array->n_segments);
+
+ n = array->n_slots / array->n_segments;
+
+ if (array == os_aio_sync_array) {
+ ut_ad(pos < array->n_slots);
+ os_event_wait(array->events[pos]);
+ i = pos;
+ } else {
+ i = os_event_wait_multiple(n, (array->events) + segment * n);
+ }
+
+ os_mutex_enter(array->mutex);
+
+ slot = os_aio_array_get_nth_slot(array, i + segment * n);
+
+ ut_a(slot->reserved);
+
+ ret = GetOverlappedResult(slot->file, &(slot->control), &len, TRUE);
+
+ *message1 = slot->message1;
+ *message2 = slot->message2;
+
+ if (ret && len == slot->len) {
+ ret_val = TRUE;
+ } else {
+ err = GetLastError();
+ ut_error;
+
+ ret_val = FALSE;
+ }
+
+ os_mutex_exit(array->mutex);
+
+ os_aio_array_free_slot(array, slot);
+
+ return(ret_val);
+}
+#endif
+
+#ifdef POSIX_ASYNC_IO
+
+/**************************************************************************
+This function is only used in Posix asynchronous i/o. Waits for an aio
+operation to complete. */
+
+ibool
+os_aio_posix_handle(
+/*================*/
+ /* out: TRUE if the aio operation succeeded */
+ ulint array_no, /* in: array number 0 - 3 */
+ void** message1, /* out: the messages passed with the aio
+ request; note that also in the case where
+ the aio operation failed, these output
+ parameters are valid and can be used to
+ restart the operation, for example */
+ void** message2)
+{
+ os_aio_array_t* array;
+ os_aio_slot_t* slot;
+ siginfo_t info;
+ sigset_t sigset;
+ sigset_t proc_sigset;
+ sigset_t thr_sigset;
+ int ret;
+ int i;
+ int sig;
+
+ sigemptyset(&sigset);
+ sigaddset(&sigset, SIGRTMIN + 1 + array_no);
+
+ pthread_sigmask(SIG_UNBLOCK, &sigset, NULL);
+
+ /*
+ sigprocmask(0, NULL, &proc_sigset);
+ pthread_sigmask(0, NULL, &thr_sigset);
+
+ for (i = 32 ; i < 40; i++) {
+ printf("%lu : %lu %lu\n", (ulint)i,
+ (ulint)sigismember(&proc_sigset, i),
+ (ulint)sigismember(&thr_sigset, i));
+ }
+ */
+
+ ret = sigwaitinfo(&sigset, &info);
+
+ if (sig != SIGRTMIN + 1 + array_no) {
+
+ ut_a(0);
+
+ return(FALSE);
+ }
+
+ printf("Handling Posix aio\n");
+
+ array = os_aio_get_array_from_no(array_no);
+
+ os_mutex_enter(array->mutex);
+
+ slot = info.si_value.sival_ptr;
+
+ ut_a(slot->reserved);
+
+ *message1 = slot->message1;
+ *message2 = slot->message2;
+
+ os_mutex_exit(array->mutex);
+
+ os_aio_array_free_slot(array, slot);
+
+ return(TRUE);
+}
+#endif
+
+/**************************************************************************
+Does simulated aio. This function should be called by an i/o-handler
+thread. */
+
+ibool
+os_aio_simulated_handle(
+/*====================*/
+ /* out: TRUE if the aio operation succeeded */
+ ulint global_segment, /* in: the number of the segment in the aio
+ arrays to wait for; segment 0 is the ibuf
+ i/o thread, segment 1 the log i/o thread,
+ then follow the non-ibuf read threads, and as
+ the last are the non-ibuf write threads */
+ void** message1, /* out: the messages passed with the aio
+ request; note that also in the case where
+ the aio operation failed, these output
+ parameters are valid and can be used to
+ restart the operation, for example */
+ void** message2)
+{
+ os_aio_array_t* array;
+ ulint segment;
+ os_aio_slot_t* slot;
+ os_aio_slot_t* slot2;
+ os_aio_slot_t* consecutive_ios[OS_AIO_MERGE_N_CONSECUTIVE];
+ ulint n_consecutive;
+ ulint total_len;
+ ulint offs;
+ ulint lowest_offset;
+ byte* combined_buf;
+ ibool ret;
+ ulint n;
+ ulint i;
+
+ segment = os_aio_get_array_and_local_segment(&array, global_segment);
+
+restart:
+ /* Give other threads chance to add several i/os to the array
+ at once */
+
+ os_thread_yield();
+
+ /* NOTE! We only access constant fields in os_aio_array. Therefore
+ we do not have to acquire the protecting mutex yet */
+
+ ut_ad(os_aio_validate());
+ ut_ad(segment < array->n_segments);
+
+ n = array->n_slots / array->n_segments;
+
+ /* Look through n slots after the segment * n'th slot */
+
+ os_mutex_enter(array->mutex);
+
+ /* Check if there is a slot for which the i/o has already been
+ done */
+
+ for (i = 0; i < n; i++) {
+ slot = os_aio_array_get_nth_slot(array, i + segment * n);
+
+ if (slot->reserved && slot->io_already_done) {
+
+ ret = TRUE;
+
+ goto slot_io_done;
+ }
+ }
+
+ n_consecutive = 0;
+
+ /* Look for an i/o request at the lowest offset in the array */
+
+ lowest_offset = ULINT_MAX;
+
+ for (i = 0; i < n; i++) {
+ slot = os_aio_array_get_nth_slot(array, i + segment * n);
+
+ if (slot->reserved && slot->offset < lowest_offset) {
+
+ /* Found an i/o request */
+ consecutive_ios[0] = slot;
+
+ n_consecutive = 1;
+
+ lowest_offset = slot->offset;
+ }
+ }
+
+ if (n_consecutive == 0) {
+
+ /* No i/o requested at the moment */
+
+ goto wait_for_io;
+ }
+
+ slot = consecutive_ios[0];
+
+ /* Check if there are several consecutive blocks to read or write */
+
+consecutive_loop:
+ for (i = 0; i < n; i++) {
+ slot2 = os_aio_array_get_nth_slot(array, i + segment * n);
+
+ if (slot2->reserved && slot2 != slot
+ && slot2->offset == slot->offset + slot->len
+ && slot->offset + slot->len > slot->offset /* check that
+ sum does not wrap over */
+ && slot2->offset_high == slot->offset_high
+ && slot2->type == slot->type
+ && slot2->file == slot->file) {
+
+ /* Found a consecutive i/o request */
+
+ consecutive_ios[n_consecutive] = slot2;
+ n_consecutive++;
+
+ slot = slot2;
+
+ if (n_consecutive < OS_AIO_MERGE_N_CONSECUTIVE) {
+
+ goto consecutive_loop;
+ } else {
+ break;
+ }
+ }
+ }
+
+ /* We have now collected n_consecutive i/o requests in the array;
+ allocate a single buffer which can hold all data, and perform the
+ i/o */
+
+ total_len = 0;
+ slot = consecutive_ios[0];
+
+ for (i = 0; i < n_consecutive; i++) {
+ total_len += consecutive_ios[i]->len;
+ }
+
+ if (n_consecutive == 1) {
+ /* We can use the buffer of the i/o request */
+ combined_buf = slot->buf;
+ } else {
+ combined_buf = ut_malloc(total_len);
+
+ ut_a(combined_buf);
+ }
+
+ /* We release the array mutex for the time of the i/o: NOTE that
+ this assumes that there is just one i/o-handler thread serving
+ a single segment of slots! */
+
+ os_mutex_exit(array->mutex);
+
+ if (slot->type == OS_FILE_WRITE && n_consecutive > 1) {
+ /* Copy the buffers to the combined buffer */
+ offs = 0;
+
+ for (i = 0; i < n_consecutive; i++) {
+
+ ut_memcpy(combined_buf + offs, consecutive_ios[i]->buf,
+ consecutive_ios[i]->len);
+ offs += consecutive_ios[i]->len;
+ }
+ }
+
+ /* Do the i/o with ordinary, synchronous i/o functions: */
+ if (slot->type == OS_FILE_WRITE) {
+ ret = os_file_write(slot->name, slot->file, combined_buf,
+ slot->offset, slot->offset_high, total_len);
+ } else {
+ ret = os_file_read(slot->file, combined_buf,
+ slot->offset, slot->offset_high, total_len);
+ }
+
+ ut_a(ret);
+
+/* printf("aio: %lu consecutive %lu:th segment, first offs %lu blocks\n",
+ n_consecutive, global_segment, slot->offset
+ / UNIV_PAGE_SIZE); */
+
+ if (slot->type == OS_FILE_READ && n_consecutive > 1) {
+ /* Copy the combined buffer to individual buffers */
+ offs = 0;
+
+ for (i = 0; i < n_consecutive; i++) {
+
+ ut_memcpy(consecutive_ios[i]->buf, combined_buf + offs,
+ consecutive_ios[i]->len);
+ offs += consecutive_ios[i]->len;
+ }
+ }
+
+ if (n_consecutive > 1) {
+ ut_free(combined_buf);
+ }
+
+ os_mutex_enter(array->mutex);
+
+ /* Mark the i/os done in slots */
+
+ for (i = 0; i < n_consecutive; i++) {
+ consecutive_ios[i]->io_already_done = TRUE;
+ }
+
+ /* We return the messages for the first slot now, and if there were
+ several slots, the messages will be returned with subsequent calls
+ of this function */
+
+slot_io_done:
+
+ ut_a(slot->reserved);
+
+ *message1 = slot->message1;
+ *message2 = slot->message2;
+
+ os_mutex_exit(array->mutex);
+
+ os_aio_array_free_slot(array, slot);
+
+ return(ret);
+
+wait_for_io:
+ /* We wait here until there again can be i/os in the segment
+ of this thread */
+
+ os_event_reset(os_aio_segment_wait_events[global_segment]);
+
+ os_mutex_exit(array->mutex);
+
+ os_event_wait(os_aio_segment_wait_events[global_segment]);
+
+ goto restart;
+}
+
+/**************************************************************************
+Validates the consistency of an aio array. */
+static
+ibool
+os_aio_array_validate(
+/*==================*/
+ /* out: TRUE if ok */
+ os_aio_array_t* array) /* in: aio wait array */
+{
+ os_aio_slot_t* slot;
+ ulint n_reserved = 0;
+ ulint i;
+
+ ut_a(array);
+
+ os_mutex_enter(array->mutex);
+
+ ut_a(array->n_slots > 0);
+ ut_a(array->n_segments > 0);
+
+ for (i = 0; i < array->n_slots; i++) {
+ slot = os_aio_array_get_nth_slot(array, i);
+
+ if (slot->reserved) {
+ n_reserved++;
+ ut_a(slot->len > 0);
+ }
+ }
+
+ ut_a(array->n_reserved == n_reserved);
+
+ os_mutex_exit(array->mutex);
+
+ return(TRUE);
+}
+
+/**************************************************************************
+Validates the consistency the aio system. */
+
+ibool
+os_aio_validate(void)
+/*=================*/
+ /* out: TRUE if ok */
+{
+ os_aio_array_validate(os_aio_read_array);
+ os_aio_array_validate(os_aio_write_array);
+ os_aio_array_validate(os_aio_ibuf_array);
+ os_aio_array_validate(os_aio_log_array);
+ os_aio_array_validate(os_aio_sync_array);
+
+ return(TRUE);
+}
+
+/**************************************************************************
+Prints info of the aio arrays. */
+
+void
+os_aio_print(void)
+/*==============*/
+{
+ os_aio_array_t* array;
+ os_aio_slot_t* slot;
+ ulint n_reserved;
+ ulint i;
+
+ array = os_aio_read_array;
+loop:
+ ut_a(array);
+
+ printf("INFO OF AN AIO ARRAY\n");
+
+ os_mutex_enter(array->mutex);
+
+ ut_a(array->n_slots > 0);
+ ut_a(array->n_segments > 0);
+
+ n_reserved = 0;
+
+ for (i = 0; i < array->n_slots; i++) {
+ slot = os_aio_array_get_nth_slot(array, i);
+
+ if (slot->reserved) {
+ n_reserved++;
+ printf("Reserved slot, messages %lx %lx\n",
+ slot->message1, slot->message2);
+ ut_a(slot->len > 0);
+ }
+ }
+
+ ut_a(array->n_reserved == n_reserved);
+
+ printf("Total of %lu reserved aio slots\n", n_reserved);
+
+ os_mutex_exit(array->mutex);
+
+ if (array == os_aio_read_array) {
+ array = os_aio_write_array;
+
+ goto loop;
+ }
+
+ if (array == os_aio_write_array) {
+ array = os_aio_ibuf_array;
+
+ goto loop;
+ }
+
+ if (array == os_aio_ibuf_array) {
+ array = os_aio_log_array;
+
+ goto loop;
+ }
+
+ if (array == os_aio_log_array) {
+ array = os_aio_sync_array;
+
+ goto loop;
+ }
+}
+
+/**************************************************************************
+Checks that all slots in the system have been freed, that is, there are
+no pending io operations. */
+
+ibool
+os_aio_all_slots_free(void)
+/*=======================*/
+ /* out: TRUE if all free */
+{
+ os_aio_array_t* array;
+ ulint n_res = 0;
+
+ array = os_aio_read_array;
+
+ os_mutex_enter(array->mutex);
+
+ n_res += array->n_reserved;
+
+ os_mutex_exit(array->mutex);
+
+ array = os_aio_write_array;
+
+ os_mutex_enter(array->mutex);
+
+ n_res += array->n_reserved;
+
+ os_mutex_exit(array->mutex);
+
+ array = os_aio_ibuf_array;
+
+ os_mutex_enter(array->mutex);
+
+ n_res += array->n_reserved;
+
+ os_mutex_exit(array->mutex);
+
+ array = os_aio_log_array;
+
+ os_mutex_enter(array->mutex);
+
+ n_res += array->n_reserved;
+
+ os_mutex_exit(array->mutex);
+
+ array = os_aio_sync_array;
+
+ os_mutex_enter(array->mutex);
+
+ n_res += array->n_reserved;
+
+ os_mutex_exit(array->mutex);
+
+ if (n_res == 0) {
+
+ return(TRUE);
+ }
+
+ return(FALSE);
+}
diff --git a/innobase/os/os0fileold.c b/innobase/os/os0fileold.c
new file mode 100644
index 00000000000..a9554727f0e
--- /dev/null
+++ b/innobase/os/os0fileold.c
@@ -0,0 +1,1956 @@
+/******************************************************
+The interface to the operating system file i/o primitives
+
+(c) 1995 Innobase Oy
+
+Created 10/21/1995 Heikki Tuuri
+*******************************************************/
+
+#include "os0file.h"
+#include "os0sync.h"
+#include "ut0mem.h"
+
+#ifndef __WIN__
+#include <errno.h>
+#endif
+
+/* We use these mutexes to protect lseek + file i/o operation, if the
+OS does not provide an atomic pread or pwrite, or similar */
+#define OS_FILE_N_SEEK_MUTEXES 16
+os_mutex_t os_file_seek_mutexes[OS_FILE_N_SEEK_MUTEXES];
+
+/* In simulated aio, merge at most this many consecutive i/os */
+#define OS_AIO_MERGE_N_CONSECUTIVE 32
+
+/* If this flag is TRUE, then we will use the native aio of the
+OS (provided we compiled Innobase with it in), otherwise we will
+use simulated aio we build below with threads */
+
+bool os_aio_use_native_aio = FALSE;
+
+/* The aio array slot structure */
+typedef struct os_aio_slot_struct os_aio_slot_t;
+
+struct os_aio_slot_struct{
+ bool is_read; /* TRUE if a read operation */
+ ulint pos; /* index of the slot in the aio
+ array */
+ bool reserved; /* TRUE if this slot is reserved */
+ ulint len; /* length of the block to read or
+ write */
+ byte* buf; /* buffer used in i/o */
+ ulint type; /* OS_FILE_READ or OS_FILE_WRITE */
+ ulint offset; /* 32 low bits of file offset in
+ bytes */
+ ulint offset_high; /* 32 high bits of file offset */
+ os_file_t file; /* file where to read or write */
+ char* name; /* file name or path */
+ bool io_already_done;/* used only in simulated aio:
+ TRUE if the physical i/o already
+ made and only the slot message
+ needs to be passed to the caller
+ of os_aio_simulated_handle */
+ void* message1; /* message which is given by the */
+ void* message2; /* the requester of an aio operation
+ and which can be used to identify
+ which pending aio operation was
+ completed */
+#ifdef WIN_ASYNC_IO
+ OVERLAPPED control; /* Windows control block for the
+ aio request */
+#elif defined(POSIX_ASYNC_IO)
+ struct aiocb control; /* Posix control block for aio
+ request */
+#endif
+};
+
+/* The aio array structure */
+typedef struct os_aio_array_struct os_aio_array_t;
+
+struct os_aio_array_struct{
+ os_mutex_t mutex; /* the mutex protecting the aio array */
+ os_event_t not_full; /* The event which is set to signaled
+ state when there is space in the aio
+ outside the ibuf segment */
+ ulint n_slots; /* Total number of slots in the aio array.
+ This must be divisible by n_threads. */
+ ulint n_segments;/* Number of segments in the aio array of
+ pending aio requests. A thread can wait
+ separately for any one of the segments. */
+ ulint n_reserved;/* Number of reserved slots in the
+ aio array outside the ibuf segment */
+ os_aio_slot_t* slots; /* Pointer to the slots in the array */
+ os_event_t* events; /* Pointer to an array of event handles
+ where we copied the handles from slots,
+ in the same order. This can be used in
+ WaitForMultipleObjects; used only in
+ Windows */
+};
+
+/* Array of events used in simulated aio */
+os_event_t* os_aio_segment_wait_events = NULL;
+
+/* The aio arrays for non-ibuf i/o and ibuf i/o, as well as sync aio. These
+are NULL when the module has not yet been initialized. */
+os_aio_array_t* os_aio_read_array = NULL;
+os_aio_array_t* os_aio_write_array = NULL;
+os_aio_array_t* os_aio_ibuf_array = NULL;
+os_aio_array_t* os_aio_log_array = NULL;
+os_aio_array_t* os_aio_sync_array = NULL;
+
+ulint os_aio_n_segments = ULINT_UNDEFINED;
+
+/***************************************************************************
+Retrieves the last error number if an error occurs in a file io function.
+The number should be retrieved before any other OS calls (because they may
+overwrite the error number). If the number is not known to this program,
+the OS error number + 100 is returned. */
+
+ulint
+os_file_get_last_error(void)
+/*========================*/
+ /* out: error number, or OS error number + 100 */
+{
+ ulint err;
+
+#ifdef __WIN__
+
+ err = (ulint) GetLastError();
+
+ if (err == ERROR_FILE_NOT_FOUND) {
+ return(OS_FILE_NOT_FOUND);
+ } else if (err == ERROR_DISK_FULL) {
+ return(OS_FILE_DISK_FULL);
+ } else if (err == ERROR_FILE_EXISTS) {
+ return(OS_FILE_ALREADY_EXISTS);
+ } else {
+ return(100 + err);
+ }
+#else
+ err = (ulint) errno;
+
+ printf("%lu\n", err);
+ perror("os0file:");
+
+ if (err == ENOSPC ) {
+ return(OS_FILE_DISK_FULL);
+#ifdef POSIX_ASYNC_IO
+ } else if (err == EAGAIN) {
+ return(OS_FILE_AIO_RESOURCES_RESERVED);
+#endif
+ } else if (err == ENOENT) {
+ return(OS_FILE_NOT_FOUND);
+ } else if (err == EEXIST) {
+ return(OS_FILE_ALREADY_EXISTS);
+ } else {
+ return(100 + err);
+ }
+#endif
+}
+
+/********************************************************************
+Does error handling when a file operation fails. If we have run out
+of disk space, then the user can clean the disk. If we do not find
+a specified file, then the user can copy it to disk. */
+static
+bool
+os_file_handle_error(
+/*=================*/
+ /* out: TRUE if we should retry the operation */
+ os_file_t file, /* in: file pointer */
+ char* name) /* in: name of a file or NULL */
+{
+ int input_char;
+ ulint err;
+
+ err = os_file_get_last_error();
+
+ if (err == OS_FILE_DISK_FULL) {
+ask_again:
+ printf("\n");
+ if (name) {
+ printf(
+ "Innobase encountered a problem with file %s.\n",
+ name);
+ }
+ printf("Disk is full. Try to clean the disk to free space\n");
+ printf("before answering the following: How to continue?\n");
+ printf("(Y == freed some space: try again)\n");
+ printf("(N == crash the database: will restart it)?\n");
+ask_with_no_question:
+ input_char = getchar();
+
+ if (input_char == (int) 'N') {
+ ut_error;
+
+ return(FALSE);
+ } else if (input_char == (int) 'Y') {
+
+ return(TRUE);
+ } else if (input_char == (int) '\n') {
+
+ goto ask_with_no_question;
+ } else {
+ goto ask_again;
+ }
+ } else if (err == OS_FILE_AIO_RESOURCES_RESERVED) {
+
+ return(TRUE);
+ } else {
+ ut_error;
+ }
+
+ return(FALSE);
+}
+
+/********************************************************************
+Opens an existing file or creates a new. */
+
+os_file_t
+os_file_create(
+/*===========*/
+ /* out, own: handle to the file, not defined if error,
+ error number can be retrieved with os_get_last_error */
+ char* name, /* in: name of the file or path as a null-terminated
+ string */
+ ulint create_mode, /* in: OS_FILE_OPEN if an existing file is opened
+ (if does not exist, error), or OS_FILE_CREATE if a new
+ file is created (if exists, error), OS_FILE_OVERWRITE
+ if a new is created or an old overwritten */
+ ulint purpose,/* in: OS_FILE_AIO, if asynchronous, non-buffered i/o
+ is desired, OS_FILE_NORMAL, if any normal file */
+ bool* success)/* out: TRUE if succeed, FALSE if error */
+{
+#ifdef __WIN__
+ os_file_t file;
+ DWORD create_flag;
+ DWORD attributes;
+ bool retry;
+
+try_again:
+ ut_a(name);
+
+ if (create_mode == OS_FILE_OPEN) {
+ create_flag = OPEN_EXISTING;
+ } else if (create_mode == OS_FILE_CREATE) {
+ create_flag = CREATE_NEW;
+ } else if (create_mode == OS_FILE_OVERWRITE) {
+ create_flag = CREATE_ALWAYS;
+ } else {
+ create_flag = 0;
+ ut_error;
+ }
+
+ if (purpose == OS_FILE_AIO) {
+ /* use asynchronous (overlapped) io and no buffering
+ of writes in the OS */
+ attributes = 0;
+#ifdef WIN_ASYNC_IO
+ if (os_aio_use_native_aio) {
+ attributes = attributes | FILE_FLAG_OVERLAPPED;
+ }
+#endif
+#ifdef UNIV_NON_BUFFERED_IO
+ attributes = attributes | FILE_FLAG_NO_BUFFERING;
+#endif
+ } else if (purpose == OS_FILE_NORMAL) {
+ attributes = 0
+#ifdef UNIV_NON_BUFFERED_IO
+ | FILE_FLAG_NO_BUFFERING
+#endif
+ ;
+ } else {
+ attributes = 0;
+ ut_error;
+ }
+
+ file = CreateFile(name,
+ GENERIC_READ | GENERIC_WRITE, /* read and write
+ access */
+ FILE_SHARE_READ,/* file can be read by other
+ processes */
+ NULL, /* default security attributes */
+ create_flag,
+ attributes,
+ NULL); /* no template file */
+
+ if (file == INVALID_HANDLE_VALUE) {
+ *success = FALSE;
+
+ if (create_mode != OS_FILE_OPEN
+ && os_file_get_last_error() == OS_FILE_DISK_FULL) {
+
+ retry = os_file_handle_error(file, name);
+
+ if (retry) {
+ goto try_again;
+ }
+ }
+ } else {
+ *success = TRUE;
+ }
+
+ return(file);
+#else
+ os_file_t file;
+ int create_flag;
+ bool retry;
+
+try_again:
+ ut_a(name);
+
+ if (create_mode == OS_FILE_OPEN) {
+ create_flag = O_RDWR;
+ } else if (create_mode == OS_FILE_CREATE) {
+ create_flag = O_RDWR | O_CREAT | O_EXCL;
+ } else if (create_mode == OS_FILE_OVERWRITE) {
+ create_flag = O_RDWR | O_CREAT | O_TRUNC;
+ } else {
+ create_flag = 0;
+ ut_error;
+ }
+
+ UT_NOT_USED(purpose);
+
+ if (create_mode == OS_FILE_CREATE) {
+
+ file = open(name, create_flag, S_IRWXU | S_IRWXG | S_IRWXO);
+ } else {
+ file = open(name, create_flag);
+ }
+
+ if (file == -1) {
+ *success = FALSE;
+
+ printf("Error in opening file %s, errno %lu\n", name,
+ (ulint)errno);
+ perror("os0file.c:");
+
+ if (create_mode != OS_FILE_OPEN
+ && errno == ENOSPC) {
+
+ retry = os_file_handle_error(file, name);
+
+ if (retry) {
+ goto try_again;
+ }
+ }
+ } else {
+ *success = TRUE;
+ }
+
+ return(file);
+#endif
+}
+
+/***************************************************************************
+Closes a file handle. In case of error, error number can be retrieved with
+os_file_get_last_error. */
+
+bool
+os_file_close(
+/*==========*/
+ /* out: TRUE if success */
+ os_file_t file) /* in, own: handle to a file */
+{
+#ifdef __WIN__
+ BOOL ret;
+
+ ut_a(file);
+
+ ret = CloseHandle(file);
+
+ if (ret) {
+ return(TRUE);
+ }
+
+ return(FALSE);
+#else
+ int ret;
+
+ ret = close(file);
+
+ if (ret == -1) {
+ return(FALSE);
+ }
+
+ return(TRUE);
+#endif
+}
+
+/***************************************************************************
+Gets a file size. */
+
+bool
+os_file_get_size(
+/*=============*/
+ /* out: TRUE if success */
+ os_file_t file, /* in: handle to a file */
+ ulint* size, /* out: least significant 32 bits of file
+ size */
+ ulint* size_high)/* out: most significant 32 bits of size */
+{
+#ifdef __WIN__
+ DWORD high;
+ DWORD low;
+
+ low = GetFileSize(file, &high);
+
+ if ((low == 0xFFFFFFFF) && (GetLastError() != NO_ERROR)) {
+ return(FALSE);
+ }
+
+ *size = low;
+ *size_high = high;
+
+ return(TRUE);
+#else
+ *size = (ulint) lseek(file, 0, SEEK_END);
+ *size_high = 0;
+
+ return(TRUE);
+#endif
+}
+
+/***************************************************************************
+Sets a file size. This function can be used to extend or truncate a file. */
+
+bool
+os_file_set_size(
+/*=============*/
+ /* out: TRUE if success */
+ char* name, /* in: name of the file or path as a
+ null-terminated string */
+ os_file_t file, /* in: handle to a file */
+ ulint size, /* in: least significant 32 bits of file
+ size */
+ ulint size_high)/* in: most significant 32 bits of size */
+{
+#ifdef __WIN__
+ DWORD high;
+ DWORD low;
+ DWORD ret;
+ BOOL ret2;
+ DWORD err;
+ bool retry;
+
+try_again:
+ low = size;
+ high = size_high;
+
+ ret = SetFilePointer(file, low, &high, FILE_BEGIN);
+
+ if (ret == 0xFFFFFFFF && GetLastError() != NO_ERROR) {
+ err = GetLastError();
+
+ goto error_handling;
+ }
+
+ ret2 = SetEndOfFile(file);
+
+ if (ret2) {
+ ret2 = os_file_flush(file);
+ }
+
+ if (ret2) {
+ return(TRUE);
+ }
+#else
+ ulint offset;
+ ulint n_bytes;
+ ulint low;
+ ssize_t ret;
+ bool retry;
+ ulint i;
+ byte buf[UNIV_PAGE_SIZE * 8];
+
+ /* Write buffer full of zeros */
+ for (i = 0; i < UNIV_PAGE_SIZE * 8; i++) {
+ buf[i] = '\0';
+ }
+
+try_again:
+ low = size;
+#if (UNIV_WORD_SIZE == 8)
+ low = low + (size_high << 32);
+#endif
+ while (offset < low) {
+ if (low - offset < UNIV_PAGE_SIZE * 8) {
+ n_bytes = low - offset;
+ } else {
+ n_bytes = UNIV_PAGE_SIZE * 8;
+ }
+
+ ret = pwrite(file, buf, n_bytes, offset);
+
+ if (ret != n_bytes) {
+ goto error_handling;
+ }
+ offset += n_bytes;
+ }
+
+ ret = os_file_flush(file);
+
+ if (ret) {
+ return(TRUE);
+ }
+#endif
+
+error_handling:
+ retry = os_file_handle_error(file, name);
+
+ if (retry) {
+ goto try_again;
+ }
+
+ ut_error;
+}
+
+/***************************************************************************
+Flushes the write buffers of a given file to the disk. */
+
+bool
+os_file_flush(
+/*==========*/
+ /* out: TRUE if success */
+ os_file_t file) /* in, own: handle to a file */
+{
+#ifdef __WIN__
+ BOOL ret;
+
+ ut_a(file);
+
+ ret = FlushFileBuffers(file);
+
+ if (ret) {
+ return(TRUE);
+ }
+
+ return(FALSE);
+#else
+ int ret;
+
+ ret = fsync(file);
+
+ if (ret == 0) {
+ return(TRUE);
+ }
+
+ return(FALSE);
+#endif
+}
+
+
+#ifndef __WIN__
+/***********************************************************************
+Does a synchronous read operation in Posix. */
+static
+ssize_t
+os_file_pread(
+/*==========*/
+ /* out: number of bytes read, -1 if error */
+ os_file_t file, /* in: handle to a file */
+ void* buf, /* in: buffer where to read */
+ ulint n, /* in: number of bytes to read */
+ ulint offset) /* in: offset from where to read */
+{
+#ifdef HAVE_PREAD
+ return(pread(file, buf, n, (off_t) offset));
+#else
+ ssize_t ret;
+ ulint i;
+
+ /* Protect the seek / read operation with a mutex */
+ i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
+
+ os_mutex_enter(os_file_seek_mutexes[i]);
+
+ ret = lseek(file, (off_t) offset, 0);
+
+ if (ret < 0) {
+ os_mutex_exit(os_file_seek_mutexes[i]);
+
+ return(ret);
+ }
+
+ ret = read(file, buf, n);
+
+ os_mutex_exit(os_file_seek_mutexes[i]);
+
+ return(ret);
+#endif
+}
+
+/***********************************************************************
+Does a synchronous write operation in Posix. */
+static
+ssize_t
+os_file_pwrite(
+/*===========*/
+ /* out: number of bytes written, -1 if error */
+ os_file_t file, /* in: handle to a file */
+ void* buf, /* in: buffer from where to write */
+ ulint n, /* in: number of bytes to write */
+ ulint offset) /* in: offset where to write */
+{
+#ifdef HAVE_PWRITE
+ return(pwrite(file, buf, n, (off_t) offset));
+#else
+ ssize_t ret;
+ ulint i;
+
+ /* Protect the seek / write operation with a mutex */
+ i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
+
+ os_mutex_enter(os_file_seek_mutexes[i]);
+
+ ret = lseek(file, (off_t) offset, 0);
+
+ if (ret < 0) {
+ os_mutex_exit(os_file_seek_mutexes[i]);
+
+ return(ret);
+ }
+
+ ret = write(file, buf, n);
+
+ os_mutex_exit(os_file_seek_mutexes[i]);
+
+ return(ret);
+#endif
+}
+#endif
+
+/***********************************************************************
+Requests a synchronous positioned read operation. */
+
+bool
+os_file_read(
+/*=========*/
+ /* out: TRUE if request was
+ successful, FALSE if fail */
+ os_file_t file, /* in: handle to a file */
+ void* buf, /* in: buffer where to read */
+ ulint offset, /* in: least significant 32 bits of file
+ offset where to read */
+ ulint offset_high, /* in: most significant 32 bits of
+ offset */
+ ulint n) /* in: number of bytes to read */
+{
+#ifdef __WIN__
+ BOOL ret;
+ DWORD len;
+ DWORD ret2;
+ DWORD err;
+ DWORD low;
+ DWORD high;
+ bool retry;
+ ulint i;
+
+try_again:
+ ut_ad(file);
+ ut_ad(buf);
+ ut_ad(n > 0);
+
+ low = offset;
+ high = offset_high;
+
+ /* Protect the seek / read operation with a mutex */
+ i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
+
+ os_mutex_enter(os_file_seek_mutexes[i]);
+
+ ret2 = SetFilePointer(file, low, &high, FILE_BEGIN);
+
+ if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) {
+ err = GetLastError();
+
+ os_mutex_exit(os_file_seek_mutexes[i]);
+
+ goto error_handling;
+ }
+
+ ret = ReadFile(file, buf, n, &len, NULL);
+
+ os_mutex_exit(os_file_seek_mutexes[i]);
+
+ if (ret && len == n) {
+ return(TRUE);
+ }
+
+ err = GetLastError();
+#else
+ bool retry;
+ ssize_t ret;
+ ulint i;
+
+#if (UNIV_WORD_SIZE == 8)
+ offset = offset + (offset_high << 32);
+#endif
+try_again:
+ /* Protect the seek / read operation with a mutex */
+ i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
+
+ os_mutex_enter(os_file_seek_mutexes[i]);
+
+ ret = os_file_pread(file, buf, n, (off_t) offset);
+
+ if (ret == n) {
+ os_mutex_exit(os_file_seek_mutexes[i]);
+
+ return(TRUE);
+ }
+#endif
+error_handling:
+ retry = os_file_handle_error(file, NULL);
+
+ if (retry) {
+ goto try_again;
+ }
+
+ ut_error;
+
+ return(FALSE);
+}
+
+/***********************************************************************
+Requests a synchronous write operation. */
+
+bool
+os_file_write(
+/*==========*/
+ /* out: TRUE if request was
+ successful, FALSE if fail */
+ char* name, /* in: name of the file or path as a
+ null-terminated string */
+ os_file_t file, /* in: handle to a file */
+ void* buf, /* in: buffer from which to write */
+ ulint offset, /* in: least significant 32 bits of file
+ offset where to write */
+ ulint offset_high, /* in: most significant 32 bits of
+ offset */
+ ulint n) /* in: number of bytes to write */
+{
+#ifdef __WIN__
+ BOOL ret;
+ DWORD len;
+ DWORD ret2;
+ DWORD err;
+ DWORD low;
+ DWORD high;
+ bool retry;
+ ulint i;
+
+try_again:
+ ut_ad(file);
+ ut_ad(buf);
+ ut_ad(n > 0);
+
+ low = offset;
+ high = offset_high;
+
+ /* Protect the seek / write operation with a mutex */
+ i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
+
+ os_mutex_enter(os_file_seek_mutexes[i]);
+
+ ret2 = SetFilePointer(file, low, &high, FILE_BEGIN);
+
+ if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) {
+ err = GetLastError();
+
+ os_mutex_exit(os_file_seek_mutexes[i]);
+
+ goto error_handling;
+ }
+
+ ret = WriteFile(file, buf, n, &len, NULL);
+
+ os_mutex_exit(os_file_seek_mutexes[i]);
+
+ if (ret && len == n) {
+ return(TRUE);
+ }
+#else
+ bool retry;
+ ssize_t ret;
+
+#if (UNIV_WORD_SIZE == 8)
+ offset = offset + (offset_high << 32);
+#endif
+try_again:
+ ret = pwrite(file, buf, n, (off_t) offset);
+
+ if (ret == n) {
+ return(TRUE);
+ }
+#endif
+
+error_handling:
+ retry = os_file_handle_error(file, name);
+
+ if (retry) {
+ goto try_again;
+ }
+
+ ut_error;
+
+ return(FALSE);
+}
+
+/********************************************************************
+Returns a pointer to the nth slot in the aio array. */
+static
+os_aio_slot_t*
+os_aio_array_get_nth_slot(
+/*======================*/
+ /* out: pointer to slot */
+ os_aio_array_t* array, /* in: aio array */
+ ulint index) /* in: index of the slot */
+{
+ ut_a(index < array->n_slots);
+
+ return((array->slots) + index);
+}
+
+/****************************************************************************
+Creates an aio wait array. */
+static
+os_aio_array_t*
+os_aio_array_create(
+/*================*/
+ /* out, own: aio array */
+ ulint n, /* in: maximum number of pending aio operations
+ allowed; n must be divisible by n_segments */
+ ulint n_segments) /* in: number of segments in the aio array */
+{
+ os_aio_array_t* array;
+ ulint i;
+ os_aio_slot_t* slot;
+#ifdef WIN_ASYNC_IO
+ OVERLAPPED* over;
+#endif
+ ut_a(n > 0);
+ ut_a(n_segments > 0);
+ ut_a(n % n_segments == 0);
+
+ array = ut_malloc(sizeof(os_aio_array_t));
+
+ array->mutex = os_mutex_create(NULL);
+ array->not_full = os_event_create(NULL);
+ array->n_slots = n;
+ array->n_segments = n_segments;
+ array->n_reserved = 0;
+ array->slots = ut_malloc(n * sizeof(os_aio_slot_t));
+ array->events = ut_malloc(n * sizeof(os_event_t));
+
+ for (i = 0; i < n; i++) {
+ slot = os_aio_array_get_nth_slot(array, i);
+
+ slot->pos = i;
+ slot->reserved = FALSE;
+#ifdef WIN_ASYNC_IO
+ over = &(slot->control);
+
+ over->hEvent = os_event_create(NULL);
+
+ *((array->events) + i) = over->hEvent;
+#elif defined(POSIX_ASYNC_IO)
+ slot->ready = os_event_create(NULL);
+#endif
+ }
+
+ return(array);
+}
+
+/****************************************************************************
+Initializes the asynchronous io system. Creates separate aio array for
+non-ibuf read and write, a third aio array for the ibuf i/o, with just one
+segment, two aio arrays for log reads and writes with one segment, and a
+synchronous aio array of the specified size. The combined number of segments
+in the three first aio arrays is the parameter n_segments given to the
+function. The caller must create an i/o handler thread for each segment in
+the four first arrays, but not for the sync aio array. */
+
+void
+os_aio_init(
+/*========*/
+ ulint n, /* in: maximum number of pending aio operations
+ allowed; n must be divisible by n_segments */
+ ulint n_segments, /* in: combined number of segments in the four
+ first aio arrays; must be >= 4 */
+ ulint n_slots_sync) /* in: number of slots in the sync aio array */
+{
+ ulint n_read_segs;
+ ulint n_write_segs;
+ ulint n_per_seg;
+ ulint i;
+
+ ut_ad(n % n_segments == 0);
+ ut_ad(n_segments >= 4);
+
+ n_per_seg = n / n_segments;
+ n_write_segs = (n_segments - 2) / 2;
+ n_read_segs = n_segments - 2 - n_write_segs;
+
+ os_aio_read_array = os_aio_array_create(n_read_segs * n_per_seg,
+ n_read_segs);
+ os_aio_write_array = os_aio_array_create(n_write_segs * n_per_seg,
+ n_write_segs);
+ os_aio_ibuf_array = os_aio_array_create(n_per_seg, 1);
+
+ os_aio_log_array = os_aio_array_create(n_per_seg, 1);
+
+ os_aio_sync_array = os_aio_array_create(n_slots_sync, 1);
+
+ os_aio_n_segments = n_segments;
+
+#if !(defined(WIN_ASYNC_IO) || defined(POSIX_ASYNC_IO))
+ os_aio_use_native_aio = FALSE;
+#endif
+ os_aio_validate();
+
+ for (i = 0; i < OS_FILE_N_SEEK_MUTEXES; i++) {
+ os_file_seek_mutexes[i] = os_mutex_create(NULL);
+ }
+
+ os_aio_segment_wait_events = ut_malloc(n_segments * sizeof(void*));
+
+ for (i = 0; i < n_segments; i++) {
+ os_aio_segment_wait_events[i] = os_event_create(NULL);
+ }
+}
+
+/**************************************************************************
+Calculates segment number for a slot. */
+static
+ulint
+os_aio_get_segment_no_from_slot(
+/*============================*/
+ /* out: segment number (which is the number
+ used by, for example, i/o-handler threads) */
+ os_aio_array_t* array, /* in: aio wait array */
+ os_aio_slot_t* slot) /* in: slot in this array */
+{
+ ulint segment;
+ ulint seg_len;
+
+ if (array == os_aio_ibuf_array) {
+ segment = 0;
+
+ } else if (array == os_aio_log_array) {
+ segment = 1;
+
+ } else if (array == os_aio_read_array) {
+ seg_len = os_aio_read_array->n_slots /
+ os_aio_read_array->n_segments;
+
+ segment = 2 + slot->pos / seg_len;
+ } else {
+ ut_a(array == os_aio_write_array);
+ seg_len = os_aio_write_array->n_slots /
+ os_aio_write_array->n_segments;
+
+ segment = os_aio_read_array->n_segments + 2
+ + slot->pos / seg_len;
+ }
+
+ return(segment);
+}
+
+/**************************************************************************
+Calculates local segment number and aio array from global segment number. */
+static
+ulint
+os_aio_get_array_and_local_segment(
+/*===============================*/
+ /* out: local segment number within
+ the aio array */
+ os_aio_array_t** array, /* out: aio wait array */
+ ulint global_segment)/* in: global segment number */
+{
+ ulint segment;
+
+ ut_a(global_segment < os_aio_n_segments);
+
+ if (global_segment == 0) {
+ *array = os_aio_ibuf_array;
+ segment = 0;
+
+ } else if (global_segment == 1) {
+ *array = os_aio_log_array;
+ segment = 0;
+
+ } else if (global_segment < os_aio_read_array->n_segments + 2) {
+ *array = os_aio_read_array;
+
+ segment = global_segment - 2;
+ } else {
+ *array = os_aio_write_array;
+
+ segment = global_segment - (os_aio_read_array->n_segments + 2);
+ }
+
+ return(segment);
+}
+
+/***********************************************************************
+Gets an integer value designating a specified aio array. This is used
+to give numbers to signals in Posix aio. */
+static
+ulint
+os_aio_get_array_no(
+/*================*/
+ os_aio_array_t* array) /* in: aio array */
+{
+ if (array == os_aio_ibuf_array) {
+
+ return(0);
+
+ } else if (array == os_aio_log_array) {
+
+ return(1);
+
+ } else if (array == os_aio_read_array) {
+
+ return(2);
+ } else if (array == os_aio_write_array) {
+
+ return(3);
+ } else {
+ ut_a(0);
+
+ return(0);
+ }
+}
+
+/***********************************************************************
+Gets the aio array for its number. */
+static
+os_aio_array_t*
+os_aio_get_array_from_no(
+/*=====================*/
+ /* out: aio array */
+ ulint n) /* in: array number */
+{
+ if (n == 0) {
+ return(os_aio_ibuf_array);
+ } else if (n == 1) {
+
+ return(os_aio_log_array);
+ } else if (n == 2) {
+
+ return(os_aio_read_array);
+ } else if (n == 3) {
+
+ return(os_aio_write_array);
+ } else {
+ ut_a(0);
+
+ return(NULL);
+ }
+}
+
+/***********************************************************************
+Requests for a slot in the aio array. If no slot is available, waits until
+not_full-event becomes signaled. */
+static
+os_aio_slot_t*
+os_aio_array_reserve_slot(
+/*======================*/
+ /* out: pointer to slot */
+ ulint type, /* in: OS_FILE_READ or OS_FILE_WRITE */
+ os_aio_array_t* array, /* in: aio array */
+ void* message1,/* in: message to be passed along with
+ the aio operation */
+ void* message2,/* in: message to be passed along with
+ the aio operation */
+ os_file_t file, /* in: file handle */
+ char* name, /* in: name of the file or path as a
+ null-terminated string */
+ void* buf, /* in: buffer where to read or from which
+ to write */
+ ulint offset, /* in: least significant 32 bits of file
+ offset */
+ ulint offset_high, /* in: most significant 32 bits of
+ offset */
+ ulint len) /* in: length of the block to read or write */
+{
+ os_aio_slot_t* slot;
+#ifdef WIN_ASYNC_IO
+ OVERLAPPED* control;
+#elif POSIX_ASYNC_IO
+ struct aiocb* control;
+ ulint type;
+#endif
+ ulint i;
+loop:
+ os_mutex_enter(array->mutex);
+
+ if (array->n_reserved == array->n_slots) {
+ os_mutex_exit(array->mutex);
+ os_event_wait(array->not_full);
+
+ goto loop;
+ }
+
+ for (i = 0;; i++) {
+ slot = os_aio_array_get_nth_slot(array, i);
+
+ if (slot->reserved == FALSE) {
+ break;
+ }
+ }
+
+ array->n_reserved++;
+
+ if (array->n_reserved == array->n_slots) {
+ os_event_reset(array->not_full);
+ }
+
+ slot->reserved = TRUE;
+ slot->message1 = message1;
+ slot->message2 = message2;
+ slot->file = file;
+ slot->name = name;
+ slot->len = len;
+ slot->type = type;
+ slot->buf = buf;
+ slot->offset = offset;
+ slot->offset_high = offset_high;
+ slot->io_already_done = FALSE;
+
+#ifdef WIN_ASYNC_IO
+ control = &(slot->control);
+ control->Offset = (DWORD)offset;
+ control->OffsetHigh = (DWORD)offset_high;
+ os_event_reset(control->hEvent);
+
+#elif POSIX_ASYNC_IO
+
+#if (UNIV_WORD_SIZE == 8)
+ offset = offset + (offset_high << 32);
+#else
+ ut_a(offset_high == 0);
+#endif
+ control = &(slot->control);
+ control->aio_fildes = file;
+ control->aio_buf = buf;
+ control->aio_nbytes = len;
+ control->aio_offset = offset;
+ control->aio_reqprio = 0;
+ control->aio_sigevent.sigev_notify = SIGEV_SIGNAL;
+ control->aio_sigevent.signo =
+ SIGRTMAX + 1 + os_aio_get_array_no(array);
+ /* TODO: How to choose the signal numbers? */
+ control->aio_sigevent.sigev_value.sival_ptr = slot;
+#endif
+ os_mutex_exit(array->mutex);
+
+ return(slot);
+}
+
+/***********************************************************************
+Frees a slot in the aio array. */
+static
+void
+os_aio_array_free_slot(
+/*===================*/
+ os_aio_array_t* array, /* in: aio array */
+ os_aio_slot_t* slot) /* in: pointer to slot */
+{
+ ut_ad(array);
+ ut_ad(slot);
+
+ os_mutex_enter(array->mutex);
+
+ ut_ad(slot->reserved);
+
+ slot->reserved = FALSE;
+
+ array->n_reserved--;
+
+ if (array->n_reserved == array->n_slots - 1) {
+ os_event_set(array->not_full);
+ }
+
+#ifdef WIN_ASYNC_IO
+ os_event_reset(slot->control.hEvent);
+#endif
+ os_mutex_exit(array->mutex);
+}
+
+/**************************************************************************
+Wakes up a simulated aio i/o-handler thread if it has something to do. */
+static
+void
+os_aio_simulated_wake_handler_thread(
+/*=================================*/
+ ulint global_segment) /* in: the number of the segment in the aio
+ arrays */
+{
+ os_aio_array_t* array;
+ ulint segment;
+ os_aio_slot_t* slot;
+ ulint n;
+ ulint i;
+
+ ut_ad(!os_aio_use_native_aio);
+
+ segment = os_aio_get_array_and_local_segment(&array, global_segment);
+
+ n = array->n_slots / array->n_segments;
+
+ /* Look through n slots after the segment * n'th slot */
+
+ os_mutex_enter(array->mutex);
+
+ for (i = 0; i < n; i++) {
+ slot = os_aio_array_get_nth_slot(array, i + segment * n);
+
+ if (slot->reserved) {
+ /* Found an i/o request */
+
+ break;
+ }
+ }
+
+ os_mutex_exit(array->mutex);
+
+ if (i < n) {
+ os_event_set(os_aio_segment_wait_events[global_segment]);
+ }
+}
+
+/**************************************************************************
+Wakes up simulated aio i/o-handler threads if they have something to do. */
+
+void
+os_aio_simulated_wake_handler_threads(void)
+/*=======================================*/
+{
+ ulint i;
+
+ if (os_aio_use_native_aio) {
+ /* We do not use simulated aio: do nothing */
+
+ return;
+ }
+
+ for (i = 0; i < os_aio_n_segments; i++) {
+ os_aio_simulated_wake_handler_thread(i);
+ }
+}
+
+/***********************************************************************
+Requests an asynchronous i/o operation. */
+
+bool
+os_aio(
+/*===*/
+ /* out: TRUE if request was queued
+ successfully, FALSE if fail */
+ ulint type, /* in: OS_FILE_READ or OS_FILE_WRITE */
+ ulint mode, /* in: OS_AIO_NORMAL, ..., possibly ORed
+ to OS_AIO_SIMULATED_WAKE_LATER: the
+ last flag advises this function not to wake
+ i/o-handler threads, but the caller will
+ do the waking explicitly later, in this
+ way the caller can post several requests in
+ a batch; NOTE that the batch must not be
+ so big that it exhausts the slots in aio
+ arrays! NOTE that a simulated batch
+ may introduce hidden chances of deadlocks,
+ because i/os are not actually handled until
+ all have been posted: use with great
+ caution! */
+ char* name, /* in: name of the file or path as a
+ null-terminated string */
+ os_file_t file, /* in: handle to a file */
+ void* buf, /* in: buffer where to read or from which
+ to write */
+ ulint offset, /* in: least significant 32 bits of file
+ offset where to read or write */
+ ulint offset_high, /* in: most significant 32 bits of
+ offset */
+ ulint n, /* in: number of bytes to read or write */
+ void* message1,/* in: messages for the aio handler (these
+ can be used to identify a completed aio
+ operation); if mode is OS_AIO_SYNC, these
+ are ignored */
+ void* message2)
+{
+ os_aio_array_t* array;
+ os_aio_slot_t* slot;
+#ifdef WIN_ASYNC_IO
+ BOOL ret = TRUE;
+ DWORD len = n;
+ void* dummy_mess1;
+ void* dummy_mess2;
+#endif
+ ulint err = 0;
+ bool retry;
+ ulint wake_later;
+
+ ut_ad(file);
+ ut_ad(buf);
+ ut_ad(n > 0);
+ ut_ad(n % OS_FILE_LOG_BLOCK_SIZE == 0);
+ ut_ad((ulint)buf % OS_FILE_LOG_BLOCK_SIZE == 0)
+ ut_ad(offset % OS_FILE_LOG_BLOCK_SIZE == 0);
+ ut_ad(os_aio_validate());
+
+ wake_later = mode & OS_AIO_SIMULATED_WAKE_LATER;
+ mode = mode & (~OS_AIO_SIMULATED_WAKE_LATER);
+
+ if (mode == OS_AIO_SYNC
+#ifdef WIN_ASYNC_IO
+ && !os_aio_use_native_aio
+#endif
+ ) {
+ /* This is actually an ordinary synchronous read or write:
+ no need to use an i/o-handler thread. NOTE that if we use
+ Windows async i/o, Windows does not allow us to use
+ ordinary synchronous os_file_read etc. on the same file,
+ therefore we have built a special mechanism for synchronous
+ wait in the Windows case. */
+
+ if (type == OS_FILE_READ) {
+ return(os_file_read(file, buf, offset, offset_high, n));
+ }
+
+ ut_a(type == OS_FILE_WRITE);
+
+ return(os_file_write(name, file, buf, offset, offset_high, n));
+ }
+
+try_again:
+ if (mode == OS_AIO_NORMAL) {
+ if (type == OS_FILE_READ) {
+ array = os_aio_read_array;
+ } else {
+ array = os_aio_write_array;
+ }
+ } else if (mode == OS_AIO_IBUF) {
+ ut_ad(type == OS_FILE_READ);
+
+ array = os_aio_ibuf_array;
+ } else if (mode == OS_AIO_LOG) {
+
+ array = os_aio_log_array;
+ } else if (mode == OS_AIO_SYNC) {
+ array = os_aio_sync_array;
+ } else {
+ ut_error;
+ }
+
+ slot = os_aio_array_reserve_slot(type, array, message1, message2, file,
+ name, buf, offset, offset_high, n);
+ if (type == OS_FILE_READ) {
+ if (os_aio_use_native_aio) {
+#ifdef WIN_ASYNC_IO
+ ret = ReadFile(file, buf, (DWORD)n, &len,
+ &(slot->control));
+#elif defined(POSIX_ASYNC_IO)
+ err = (ulint) aio_read(&(slot->control));
+#endif
+ } else {
+ if (!wake_later) {
+ os_aio_simulated_wake_handler_thread(
+ os_aio_get_segment_no_from_slot(array, slot));
+ }
+ }
+ } else if (type == OS_FILE_WRITE) {
+ if (os_aio_use_native_aio) {
+#ifdef WIN_ASYNC_IO
+ ret = WriteFile(file, buf, (DWORD)n, &len,
+ &(slot->control));
+#elif defined(POSIX_ASYNC_IO)
+ err = (ulint) aio_write(&(slot->control));
+#endif
+ } else {
+ if (!wake_later) {
+ os_aio_simulated_wake_handler_thread(
+ os_aio_get_segment_no_from_slot(array, slot));
+ }
+ }
+ } else {
+ ut_error;
+ }
+
+#ifdef WIN_ASYNC_IO
+ if ((ret && len == n)
+ || (!ret && GetLastError() == ERROR_IO_PENDING)) {
+
+ /* aio was queued successfully! */
+
+ if (mode == OS_AIO_SYNC) {
+ /* We want a synchronous i/o operation on a file
+ where we also use async i/o: in Windows we must
+ use the same wait mechanism as for async i/o */
+
+ return(os_aio_windows_handle(ULINT_UNDEFINED,
+ slot->pos,
+ &dummy_mess1, &dummy_mess2));
+ }
+
+ return(TRUE);
+ }
+#else
+ if (err == 0) {
+ /* aio was queued successfully! */
+
+ return(TRUE);
+ }
+#endif
+ os_aio_array_free_slot(array, slot);
+
+ retry = os_file_handle_error(file, name);
+
+ if (retry) {
+
+ goto try_again;
+ }
+
+ ut_error;
+
+ return(FALSE);
+}
+
+#ifdef WIN_ASYNC_IO
+/**************************************************************************
+This function is only used in Windows asynchronous i/o.
+Waits for an aio operation to complete. This function is used to wait the
+for completed requests. The aio array of pending requests is divided
+into segments. The thread specifies which segment or slot it wants to wait
+for. NOTE: this function will also take care of freeing the aio slot,
+therefore no other thread is allowed to do the freeing! */
+
+bool
+os_aio_windows_handle(
+/*==================*/
+ /* out: TRUE if the aio operation succeeded */
+ ulint segment, /* in: the number of the segment in the aio
+ arrays to wait for; segment 0 is the ibuf
+ i/o thread, segment 1 the log i/o thread,
+ then follow the non-ibuf read threads, and as
+ the last are the non-ibuf write threads; if
+ this is ULINT_UNDEFINED, then it means that
+ sync aio is used, and this parameter is
+ ignored */
+ ulint pos, /* this parameter is used only in sync aio:
+ wait for the aio slot at this position */
+ void** message1, /* out: the messages passed with the aio
+ request; note that also in the case where
+ the aio operation failed, these output
+ parameters are valid and can be used to
+ restart the operation, for example */
+ void** message2)
+{
+ os_aio_array_t* array;
+ os_aio_slot_t* slot;
+ ulint n;
+ ulint i;
+ bool ret_val;
+ ulint err;
+ BOOL ret;
+ DWORD len;
+
+ if (segment == ULINT_UNDEFINED) {
+ array = os_aio_sync_array;
+ segment = 0;
+ } else {
+ segment = os_aio_get_array_and_local_segment(&array, segment);
+ }
+
+ /* NOTE! We only access constant fields in os_aio_array. Therefore
+ we do not have to acquire the protecting mutex yet */
+
+ ut_ad(os_aio_validate());
+ ut_ad(segment < array->n_segments);
+
+ n = array->n_slots / array->n_segments;
+
+ if (array == os_aio_sync_array) {
+ ut_ad(pos < array->n_slots);
+ os_event_wait(array->events[pos]);
+ i = pos;
+ } else {
+ i = os_event_wait_multiple(n, (array->events) + segment * n);
+ }
+
+ os_mutex_enter(array->mutex);
+
+ slot = os_aio_array_get_nth_slot(array, i + segment * n);
+
+ ut_a(slot->reserved);
+
+ ret = GetOverlappedResult(slot->file, &(slot->control), &len, TRUE);
+
+ *message1 = slot->message1;
+ *message2 = slot->message2;
+
+ if (ret && len == slot->len) {
+ ret_val = TRUE;
+ } else {
+ err = GetLastError();
+ ut_error;
+
+ ret_val = FALSE;
+ }
+
+ os_mutex_exit(array->mutex);
+
+ os_aio_array_free_slot(array, slot);
+
+ return(ret_val);
+}
+#endif
+
+#ifdef POSIX_ASYNC_IO
+/**************************************************************************
+This function is only used in Posix asynchronous i/o. Waits for an aio
+operation to complete. */
+
+bool
+os_aio_posix_handle(
+/*================*/
+ /* out: TRUE if the aio operation succeeded */
+ ulint array_no, /* in: array number 0 - 3 */
+ void** message1, /* out: the messages passed with the aio
+ request; note that also in the case where
+ the aio operation failed, these output
+ parameters are valid and can be used to
+ restart the operation, for example */
+ void** message2)
+{
+ os_aio_array_t* array;
+ os_aio_slot_t* slot;
+ siginfo_t info;
+ sigset_t sigset;
+ int ret;
+
+ sigemptyset(&sigset);
+ sigaddset(&sigset, SIGRTMAX + 1 + array_no);
+
+ ret = sigwaitinfo(&sigset, &info);
+
+ if (ret != SIGRTMAX + 1 + array_no) {
+
+ ut_a(0);
+
+ return(FALSE);
+ }
+
+ array = os_aio_get_array_from_no(array_no);
+
+ os_mutex_enter(array->mutex);
+
+ slot = siginfo.si_value.sival_ptr;
+
+ ut_a(slot->reserved);
+
+ *message1 = slot->message1;
+ *message2 = slot->message2;
+
+ os_mutex_exit(array->mutex);
+
+ os_aio_array_free_slot(array, slot);
+
+ return(TRUE);
+}
+#endif
+
+/**************************************************************************
+Does simulated aio. This function should be called by an i/o-handler
+thread. */
+
+bool
+os_aio_simulated_handle(
+/*====================*/
+ /* out: TRUE if the aio operation succeeded */
+ ulint global_segment, /* in: the number of the segment in the aio
+ arrays to wait for; segment 0 is the ibuf
+ i/o thread, segment 1 the log i/o thread,
+ then follow the non-ibuf read threads, and as
+ the last are the non-ibuf write threads */
+ void** message1, /* out: the messages passed with the aio
+ request; note that also in the case where
+ the aio operation failed, these output
+ parameters are valid and can be used to
+ restart the operation, for example */
+ void** message2)
+{
+ os_aio_array_t* array;
+ ulint segment;
+ os_aio_slot_t* slot;
+ os_aio_slot_t* slot2;
+ os_aio_slot_t* consecutive_ios[OS_AIO_MERGE_N_CONSECUTIVE];
+ ulint n_consecutive;
+ ulint total_len;
+ ulint offs;
+ byte* combined_buf;
+ bool ret;
+ ulint n;
+ ulint i;
+
+ segment = os_aio_get_array_and_local_segment(&array, global_segment);
+
+restart:
+ /* NOTE! We only access constant fields in os_aio_array. Therefore
+ we do not have to acquire the protecting mutex yet */
+
+ ut_ad(os_aio_validate());
+ ut_ad(segment < array->n_segments);
+
+ n = array->n_slots / array->n_segments;
+
+ /* Look through n slots after the segment * n'th slot */
+
+ os_mutex_enter(array->mutex);
+
+ /* Check if there is a slot for which the i/o has already been
+ done */
+
+ for (i = 0; i < n; i++) {
+ slot = os_aio_array_get_nth_slot(array, i + segment * n);
+
+ if (slot->reserved && slot->io_already_done) {
+
+ goto slot_io_done;
+ }
+ }
+
+ n_consecutive = 0;
+
+ for (i = 0; i < n; i++) {
+ slot = os_aio_array_get_nth_slot(array, i + segment * n);
+
+ if (slot->reserved) {
+ /* Found an i/o request */
+ consecutive_ios[n_consecutive] = slot;
+ n_consecutive++;
+
+ break;
+ }
+ }
+
+ /* Check if there are several consecutive blocks to read or write */
+
+consecutive_loop:
+ for (i = 0; i < n; i++) {
+ slot2 = os_aio_array_get_nth_slot(array, i + segment * n);
+
+ if (slot2->reserved && slot2 != slot
+ && slot2->offset == slot->offset + slot->len
+ && slot->offset + slot->len > slot->offset /* check that
+ sum does not wrap over */
+ && slot2->offset_high == slot->offset_high
+ && slot2->type == slot->type
+ && slot2->file == slot->file) {
+
+ /* Found a consecutive i/o request */
+
+ consecutive_ios[n_consecutive] = slot2;
+ n_consecutive++;
+
+ slot = slot2;
+
+ if (n_consecutive < OS_AIO_MERGE_N_CONSECUTIVE) {
+
+ goto consecutive_loop;
+ } else {
+ break;
+ }
+ }
+ }
+
+ if (n_consecutive == 0) {
+
+ /* No i/o requested at the moment */
+
+ goto wait_for_io;
+ }
+
+ /* We have now collected n_consecutive i/o requests in the array;
+ allocate a single buffer which can hold all data, and perform the
+ i/o */
+
+ total_len = 0;
+ slot = consecutive_ios[0];
+
+ for (i = 0; i < n_consecutive; i++) {
+ total_len += consecutive_ios[i]->len;
+ }
+
+ if (n_consecutive == 1) {
+ /* We can use the buffer of the i/o request */
+ combined_buf = slot->buf;
+ } else {
+ combined_buf = ut_malloc(total_len);
+
+ ut_a(combined_buf);
+ }
+
+ /* We release the array mutex for the time of the i/o: NOTE that
+ this assumes that there is just one i/o-handler thread serving
+ a single segment of slots! */
+
+ os_mutex_exit(array->mutex);
+
+ if (slot->type == OS_FILE_WRITE && n_consecutive > 1) {
+ /* Copy the buffers to the combined buffer */
+ offs = 0;
+
+ for (i = 0; i < n_consecutive; i++) {
+
+ ut_memcpy(combined_buf + offs, consecutive_ios[i]->buf,
+ consecutive_ios[i]->len);
+ offs += consecutive_ios[i]->len;
+ }
+ }
+
+ /* Do the i/o with ordinary, synchronous i/o functions: */
+ if (slot->type == OS_FILE_WRITE) {
+ ret = os_file_write(slot->name, slot->file, combined_buf,
+ slot->offset, slot->offset_high, total_len);
+ } else {
+ ret = os_file_read(slot->file, combined_buf,
+ slot->offset, slot->offset_high, total_len);
+ }
+
+ ut_a(ret);
+
+ if (slot->type == OS_FILE_READ && n_consecutive > 1) {
+ /* Copy the combined buffer to individual buffers */
+ offs = 0;
+
+ for (i = 0; i < n_consecutive; i++) {
+
+ ut_memcpy(consecutive_ios[i]->buf, combined_buf + offs,
+ consecutive_ios[i]->len);
+ offs += consecutive_ios[i]->len;
+ }
+ }
+
+ if (n_consecutive > 1) {
+ ut_free(combined_buf);
+ }
+
+ os_mutex_enter(array->mutex);
+
+ /* Mark the i/os done in slots */
+
+ for (i = 0; i < n_consecutive; i++) {
+ consecutive_ios[i]->io_already_done = TRUE;
+ }
+
+ /* We return the messages for the first slot now, and if there were
+ several slots, the messages will be returned with subsequent calls
+ of this function */
+
+slot_io_done:
+
+ ut_a(slot->reserved);
+
+ *message1 = slot->message1;
+ *message2 = slot->message2;
+
+ os_mutex_exit(array->mutex);
+
+ os_aio_array_free_slot(array, slot);
+
+ return(ret);
+
+wait_for_io:
+ /* We wait here until there again can be i/os in the segment
+ of this thread */
+
+ os_event_reset(os_aio_segment_wait_events[global_segment]);
+
+ os_mutex_exit(array->mutex);
+
+ os_event_wait(os_aio_segment_wait_events[global_segment]);
+
+ goto restart;
+}
+
+/**************************************************************************
+Validates the consistency of an aio array. */
+static
+bool
+os_aio_array_validate(
+/*==================*/
+ /* out: TRUE if ok */
+ os_aio_array_t* array) /* in: aio wait array */
+{
+ os_aio_slot_t* slot;
+ ulint n_reserved = 0;
+ ulint i;
+
+ ut_a(array);
+
+ os_mutex_enter(array->mutex);
+
+ ut_a(array->n_slots > 0);
+ ut_a(array->n_segments > 0);
+
+ for (i = 0; i < array->n_slots; i++) {
+ slot = os_aio_array_get_nth_slot(array, i);
+
+ if (slot->reserved) {
+ n_reserved++;
+ ut_a(slot->len > 0);
+ }
+ }
+
+ ut_a(array->n_reserved == n_reserved);
+
+ os_mutex_exit(array->mutex);
+
+ return(TRUE);
+}
+
+/**************************************************************************
+Validates the consistency the aio system. */
+
+bool
+os_aio_validate(void)
+/*=================*/
+ /* out: TRUE if ok */
+{
+ os_aio_array_validate(os_aio_read_array);
+ os_aio_array_validate(os_aio_write_array);
+ os_aio_array_validate(os_aio_ibuf_array);
+ os_aio_array_validate(os_aio_log_array);
+ os_aio_array_validate(os_aio_sync_array);
+
+ return(TRUE);
+}
+
+/**************************************************************************
+Prints info of the aio arrays. */
+
+void
+os_aio_print(void)
+/*==============*/
+{
+ os_aio_array_t* array;
+ os_aio_slot_t* slot;
+ ulint n_reserved;
+ ulint i;
+
+ array = os_aio_read_array;
+loop:
+ ut_a(array);
+
+ printf("INFO OF AN AIO ARRAY\n");
+
+ os_mutex_enter(array->mutex);
+
+ ut_a(array->n_slots > 0);
+ ut_a(array->n_segments > 0);
+
+ n_reserved = 0;
+
+ for (i = 0; i < array->n_slots; i++) {
+ slot = os_aio_array_get_nth_slot(array, i);
+
+ if (slot->reserved) {
+ n_reserved++;
+ printf("Reserved slot, messages %lx %lx\n",
+ slot->message1, slot->message2);
+ ut_a(slot->len > 0);
+ }
+ }
+
+ ut_a(array->n_reserved == n_reserved);
+
+ printf("Total of %lu reserved aio slots\n", n_reserved);
+
+ os_mutex_exit(array->mutex);
+
+ if (array == os_aio_read_array) {
+ array = os_aio_write_array;
+
+ goto loop;
+ }
+
+ if (array == os_aio_write_array) {
+ array = os_aio_ibuf_array;
+
+ goto loop;
+ }
+
+ if (array == os_aio_ibuf_array) {
+ array = os_aio_log_array;
+
+ goto loop;
+ }
+
+ if (array == os_aio_log_array) {
+ array = os_aio_sync_array;
+
+ goto loop;
+ }
+}
+
+/**************************************************************************
+Checks that all slots in the system have been freed, that is, there are
+no pending io operations. */
+
+bool
+os_aio_all_slots_free(void)
+/*=======================*/
+ /* out: TRUE if all free */
+{
+ os_aio_array_t* array;
+ ulint n_res = 0;
+
+ array = os_aio_read_array;
+
+ os_mutex_enter(array->mutex);
+
+ n_res += array->n_reserved;
+
+ os_mutex_exit(array->mutex);
+
+ array = os_aio_write_array;
+
+ os_mutex_enter(array->mutex);
+
+ n_res += array->n_reserved;
+
+ os_mutex_exit(array->mutex);
+
+ array = os_aio_ibuf_array;
+
+ os_mutex_enter(array->mutex);
+
+ n_res += array->n_reserved;
+
+ os_mutex_exit(array->mutex);
+
+ array = os_aio_log_array;
+
+ os_mutex_enter(array->mutex);
+
+ n_res += array->n_reserved;
+
+ os_mutex_exit(array->mutex);
+
+ array = os_aio_sync_array;
+
+ os_mutex_enter(array->mutex);
+
+ n_res += array->n_reserved;
+
+ os_mutex_exit(array->mutex);
+
+ if (n_res == 0) {
+
+ return(TRUE);
+ }
+
+ return(FALSE);
+}
diff --git a/innobase/os/os0proc.c b/innobase/os/os0proc.c
new file mode 100644
index 00000000000..43a2db4d306
--- /dev/null
+++ b/innobase/os/os0proc.c
@@ -0,0 +1,150 @@
+/******************************************************
+The interface to the operating system
+process control primitives
+
+(c) 1995 Innobase Oy
+
+Created 9/30/1995 Heikki Tuuri
+*******************************************************/
+
+#include "os0proc.h"
+#ifdef UNIV_NONINL
+#include "os0proc.ic"
+#endif
+
+#ifdef __WIN__
+#include <windows.h>
+#endif
+
+#include "ut0mem.h"
+
+/********************************************************************
+Allocates non-cacheable memory. */
+
+void*
+os_mem_alloc_nocache(
+/*=================*/
+ /* out: allocated memory */
+ ulint n) /* in: number of bytes */
+{
+#ifdef __WIN__
+ void* ptr;
+
+ ptr = VirtualAlloc(NULL, n, MEM_COMMIT,
+ PAGE_READWRITE | PAGE_NOCACHE);
+ ut_a(ptr);
+
+ return(ptr);
+#else
+ return(ut_malloc(n));
+#endif
+}
+
+#ifdef notdefined
+/********************************************************************
+Creates a new process. */
+
+ibool
+os_process_create(
+/*==============*/
+ char* name, /* in: name of the executable to start
+ or its full path name */
+ char* cmd, /* in: command line for the starting
+ process, or NULL if no command line
+ specified */
+ os_process_t* proc, /* out: handle to the process */
+ os_process_id_t* id) /* out: process id */
+
+{
+ BOOL ret;
+ PROCESS_INFORMATION pinfo;
+ STARTUPINFO sinfo;
+
+ /* The following assignments are default for the startupinfo
+ structure */
+ sinfo.cb = sizeof(STARTUPINFO);
+ sinfo.lpReserved = NULL;
+ sinfo.lpDesktop = NULL;
+ sinfo.cbReserved2 = 0;
+ sinfo.lpReserved = NULL;
+
+ ret = CreateProcess(name,
+ cmd,
+ NULL, /* No security attributes */
+ NULL, /* No thread security attrs */
+ FALSE, /* Do not inherit handles */
+ 0, /* No creation flags */
+ NULL, /* No environment */
+ NULL, /* Same current directory */
+ &sinfo,
+ &pinfo);
+
+ *proc = pinfo.hProcess;
+ *id = pinfo.dwProcessId;
+
+ return(ret);
+}
+
+/**************************************************************************
+Exits a process. */
+
+void
+os_process_exit(
+/*============*/
+ ulint code) /* in: exit code */
+{
+ ExitProcess((UINT)code);
+}
+
+/**************************************************************************
+Gets a process exit code. */
+
+ibool
+os_process_get_exit_code(
+/*=====================*/
+ /* out: TRUE if succeed, FALSE if fail */
+ os_process_t proc, /* in: handle to the process */
+ ulint* code) /* out: exit code */
+{
+ DWORD ex_code;
+ BOOL ret;
+
+ ret = GetExitCodeProcess(proc, &ex_code);
+
+ *code = (ulint)ex_code;
+
+ return(ret);
+}
+#endif /* notdedfined */
+
+/********************************************************************
+Sets the priority boost for threads released from waiting within the current
+process. */
+
+void
+os_process_set_priority_boost(
+/*==========================*/
+ ibool do_boost) /* in: TRUE if priority boost should be done,
+ FALSE if not */
+{
+#ifdef __WIN__
+ ibool no_boost;
+
+ if (do_boost) {
+ no_boost = FALSE;
+ } else {
+ no_boost = TRUE;
+ }
+
+ ut_a(TRUE == 1);
+
+/* Does not do anything currently!
+ SetProcessPriorityBoost(GetCurrentProcess(), no_boost);
+*/
+ printf(
+ "Warning: process priority boost setting currently not functional!\n"
+ );
+#else
+ UT_NOT_USED(do_boost);
+#endif
+}
diff --git a/innobase/os/os0shm.c b/innobase/os/os0shm.c
new file mode 100644
index 00000000000..e03440cd4f4
--- /dev/null
+++ b/innobase/os/os0shm.c
@@ -0,0 +1,146 @@
+/******************************************************
+The interface to the operating system
+shared memory primitives
+
+(c) 1995 Innobase Oy
+
+Created 9/23/1995 Heikki Tuuri
+*******************************************************/
+
+#include "os0shm.h"
+#ifdef UNIV_NONINL
+#include "os0shm.ic"
+#endif
+
+#ifdef __WIN__
+#include "windows.h"
+
+typedef HANDLE os_shm_t;
+#endif
+
+/********************************************************************
+Creates an area of shared memory. It can be named so that
+different processes may access it in the same computer.
+If an area with the same name already exists, returns
+a handle to that area (where the size of the area is
+not changed even if this call requests a different size).
+To use the area, it first has to be mapped to the process
+address space by os_shm_map. */
+
+os_shm_t
+os_shm_create(
+/*==========*/
+ /* out, own: handle to the shared
+ memory area, NULL if error */
+ ulint size, /* in: area size < 4 GB */
+ char* name) /* in: name of the area as a null-terminated
+ string */
+{
+#ifdef __WIN__
+ os_shm_t shm;
+
+ ut_a(name);
+ ut_a(size > 0);
+ ut_a(size < 0xFFFFFFFF);
+
+ /* In Windows NT shared memory is created as a memory mapped
+ file */
+ shm = CreateFileMapping((HANDLE)0xFFFFFFFF, /* use operating system
+ swap file as the backing
+ file */
+ NULL, /* default security
+ descriptor */
+ PAGE_READWRITE, /* allow reading and
+ writing */
+ 0, /* size must be less
+ than 4 GB */
+ (DWORD)size,
+ name);
+ return(shm);
+#else
+ UT_NOT_USED(size);
+ UT_NOT_USED(name);
+
+ return(NULL);
+#endif
+}
+
+/***************************************************************************
+Frees a shared memory area. The area can be freed only after it
+has been unmapped in all the processes where it was mapped. */
+
+ibool
+os_shm_free(
+/*========*/
+ /* out: TRUE if success */
+ os_shm_t shm) /* in, own: handle to a shared memory area */
+{
+#ifdef __WIN__
+
+ BOOL ret;
+
+ ut_a(shm);
+
+ ret = CloseHandle(shm);
+
+ if (ret) {
+ return(TRUE);
+ } else {
+ return(FALSE);
+ }
+#else
+ UT_NOT_USED(shm);
+#endif
+}
+
+/***************************************************************************
+Maps a shared memory area in the address space of a process. */
+
+void*
+os_shm_map(
+/*=======*/
+ /* out: address of the area, NULL if error */
+ os_shm_t shm) /* in: handle to a shared memory area */
+{
+#ifdef __WIN__
+ void* mem;
+
+ ut_a(shm);
+
+ mem = MapViewOfFile(shm,
+ FILE_MAP_ALL_ACCESS, /* read and write access
+ allowed */
+ 0, /* map from start of */
+ 0, /* area */
+ 0); /* map the whole area */
+ return(mem);
+#else
+ UT_NOT_USED(shm);
+#endif
+}
+
+/***************************************************************************
+Unmaps a shared memory area from the address space of a process. */
+
+ibool
+os_shm_unmap(
+/*=========*/
+ /* out: TRUE if succeed */
+ void* addr) /* in: address of the area */
+{
+#ifdef __WIN__
+ BOOL ret;
+
+ ut_a(addr);
+
+ ret = UnmapViewOfFile(addr);
+
+ if (ret) {
+ return(TRUE);
+ } else {
+ return(FALSE);
+ }
+#else
+ UT_NOT_USED(addr);
+#endif
+}
diff --git a/innobase/os/os0sync.c b/innobase/os/os0sync.c
new file mode 100644
index 00000000000..1647dd982f3
--- /dev/null
+++ b/innobase/os/os0sync.c
@@ -0,0 +1,461 @@
+/******************************************************
+The interface to the operating system
+synchronization primitives.
+
+(c) 1995 Innobase Oy
+
+Created 9/6/1995 Heikki Tuuri
+*******************************************************/
+
+#include "os0sync.h"
+#ifdef UNIV_NONINL
+#include "os0sync.ic"
+#endif
+
+#ifdef __WIN__
+#include <windows.h>
+#endif
+
+#include "ut0mem.h"
+
+/* Type definition for an operating system mutex struct */
+struct os_mutex_struct{
+ void* handle; /* OS handle to mutex */
+ ulint count; /* we use this counter to check
+ that the same thread does not
+ recursively lock the mutex: we
+ do not assume that the OS mutex
+ supports recursive locking, though
+ NT seems to do that */
+};
+
+/*************************************************************
+Creates an event semaphore, i.e., a semaphore which may
+just have two states: signaled and nonsignaled.
+The created event is manual reset: it must be reset
+explicitly by calling sync_os_reset_event. */
+
+os_event_t
+os_event_create(
+/*============*/
+ /* out: the event handle */
+ char* name) /* in: the name of the event, if NULL
+ the event is created without a name */
+{
+#ifdef __WIN__
+ HANDLE event;
+
+ event = CreateEvent(NULL, /* No security attributes */
+ TRUE, /* Manual reset */
+ FALSE, /* Initial state nonsignaled */
+ name);
+ ut_a(event);
+
+ return(event);
+#else
+ os_event_t event;
+
+ UT_NOT_USED(name);
+
+ event = ut_malloc(sizeof(struct os_event_struct));
+
+ os_fast_mutex_init(&(event->os_mutex));
+ os_fast_mutex_init(&(event->wait_mutex));
+
+ event->is_set = TRUE;
+
+ return(event);
+#endif
+}
+
+/*************************************************************
+Creates an auto-reset event semaphore, i.e., an event
+which is automatically reset when a single thread is
+released. */
+
+os_event_t
+os_event_create_auto(
+/*=================*/
+ /* out: the event handle */
+ char* name) /* in: the name of the event, if NULL
+ the event is created without a name */
+{
+#ifdef __WIN__
+ HANDLE event;
+
+ event = CreateEvent(NULL, /* No security attributes */
+ FALSE, /* Auto-reset */
+ FALSE, /* Initial state nonsignaled */
+ name);
+ ut_a(event);
+
+ return(event);
+#else
+ /* Does nothing in Posix because we do not need this with MySQL */
+
+ UT_NOT_USED(name);
+
+ return(NULL);
+#endif
+}
+
+/**************************************************************
+Sets an event semaphore to the signaled state: lets waiting threads
+proceed. */
+
+void
+os_event_set(
+/*=========*/
+ os_event_t event) /* in: event to set */
+{
+#ifdef __WIN__
+ ut_a(event);
+ ut_a(SetEvent(event));
+#else
+ ut_a(event);
+
+ os_fast_mutex_lock(&(event->os_mutex));
+
+ if (event->is_set) {
+ /* Do nothing */
+ } else {
+ os_fast_mutex_unlock(&(event->wait_mutex));
+ event->is_set = TRUE;
+ }
+
+ os_fast_mutex_unlock(&(event->os_mutex));
+#endif
+}
+
+/**************************************************************
+Resets an event semaphore to the nonsignaled state. Waiting threads will
+stop to wait for the event. */
+
+void
+os_event_reset(
+/*===========*/
+ os_event_t event) /* in: event to reset */
+{
+#ifdef __WIN__
+ ut_a(event);
+
+ ut_a(ResetEvent(event));
+#else
+ ut_a(event);
+
+ os_fast_mutex_lock(&(event->os_mutex));
+
+ if (!event->is_set) {
+ /* Do nothing */
+ } else {
+ os_fast_mutex_lock(&(event->wait_mutex));
+ event->is_set = FALSE;
+ }
+
+ os_fast_mutex_unlock(&(event->os_mutex));
+#endif
+}
+
+/**************************************************************
+Frees an event object. */
+
+void
+os_event_free(
+/*==========*/
+ os_event_t event) /* in: event to free */
+
+{
+#ifdef __WIN__
+ ut_a(event);
+
+ ut_a(CloseHandle(event));
+#else
+ ut_a(event);
+
+ os_fast_mutex_free(&(event->os_mutex));
+ os_fast_mutex_free(&(event->wait_mutex));
+
+ ut_free(event);
+#endif
+}
+
+/**************************************************************
+Waits for an event object until it is in the signaled state. */
+
+void
+os_event_wait(
+/*==========*/
+ os_event_t event) /* in: event to wait */
+{
+#ifdef __WIN__
+ DWORD err;
+
+ ut_a(event);
+
+ /* Specify an infinite time limit for waiting */
+ err = WaitForSingleObject(event, INFINITE);
+
+ ut_a(err == WAIT_OBJECT_0);
+#else
+ os_fast_mutex_lock(&(event->wait_mutex));
+ os_fast_mutex_unlock(&(event->wait_mutex));
+#endif
+}
+
+/**************************************************************
+Waits for an event object until it is in the signaled state or
+a timeout is exceeded. */
+
+ulint
+os_event_wait_time(
+/*===============*/
+ /* out: 0 if success, OS_SYNC_TIME_EXCEEDED if
+ timeout was exceeded */
+ os_event_t event, /* in: event to wait */
+ ulint time) /* in: timeout in microseconds, or
+ OS_SYNC_INFINITE_TIME */
+{
+#ifdef __WIN__
+ DWORD err;
+
+ ut_a(event);
+
+ if (time != OS_SYNC_INFINITE_TIME) {
+ err = WaitForSingleObject(event, time / 1000);
+ } else {
+ err = WaitForSingleObject(event, INFINITE);
+ }
+
+ if (err == WAIT_OBJECT_0) {
+
+ return(0);
+ } else if (err == WAIT_TIMEOUT) {
+
+ return(OS_SYNC_TIME_EXCEEDED);
+ } else {
+ ut_error;
+ }
+#else
+ UT_NOT_USED(time);
+
+ /* In Posix this is just an ordinary, infinite wait */
+
+ os_event_wait(event);
+
+ return(0);
+#endif
+}
+
+/**************************************************************
+Waits for any event in an event array. Returns if even a single
+one is signaled or becomes signaled. */
+
+ulint
+os_event_wait_multiple(
+/*===================*/
+ /* out: index of the event
+ which was signaled */
+ ulint n, /* in: number of events in the
+ array */
+ os_event_t* event_array) /* in: pointer to an array of event
+ handles */
+{
+#ifdef __WIN__
+ DWORD index;
+
+ ut_a(event_array);
+ ut_a(n > 0);
+
+ index = WaitForMultipleObjects(n,
+ event_array,
+ FALSE, /* Wait for any 1 event */
+ INFINITE); /* Infinite wait time
+ limit */
+ ut_a(index >= WAIT_OBJECT_0);
+ ut_a(index < WAIT_OBJECT_0 + n);
+
+ return(index - WAIT_OBJECT_0);
+#else
+ ut_a(n == 0);
+
+ /* In Posix we can only wait for a single event */
+
+ os_event_wait(*event_array);
+
+ return(0);
+#endif
+}
+
+/*************************************************************
+Creates an operating system mutex semaphore.
+Because these are slow, the mutex semaphore of the database
+itself (sync_mutex_t) should be used where possible. */
+
+os_mutex_t
+os_mutex_create(
+/*============*/
+ /* out: the mutex handle */
+ char* name) /* in: the name of the mutex, if NULL
+ the mutex is created without a name */
+{
+#ifdef __WIN__
+ HANDLE mutex;
+ os_mutex_t mutex_str;
+
+ mutex = CreateMutex(NULL, /* No security attributes */
+ FALSE, /* Initial state: no owner */
+ name);
+ ut_a(mutex);
+
+ mutex_str = ut_malloc(sizeof(os_mutex_str_t));
+
+ mutex_str->handle = mutex;
+ mutex_str->count = 0;
+
+ return(mutex_str);
+#else
+ os_fast_mutex_t* os_mutex;
+ os_mutex_t mutex_str;
+
+ UT_NOT_USED(name);
+
+ os_mutex = ut_malloc(sizeof(os_fast_mutex_t));
+
+ os_fast_mutex_init(os_mutex);
+
+ mutex_str = ut_malloc(sizeof(os_mutex_str_t));
+
+ mutex_str->handle = os_mutex;
+ mutex_str->count = 0;
+
+ return(mutex_str);
+#endif
+}
+
+/**************************************************************
+Acquires ownership of a mutex semaphore. */
+
+void
+os_mutex_enter(
+/*===========*/
+ os_mutex_t mutex) /* in: mutex to acquire */
+{
+#ifdef __WIN__
+ DWORD err;
+
+ ut_a(mutex);
+
+ /* Specify infinite time limit for waiting */
+ err = WaitForSingleObject(mutex->handle, INFINITE);
+
+ ut_a(err == WAIT_OBJECT_0);
+
+ (mutex->count)++;
+ ut_a(mutex->count == 1);
+#else
+ os_fast_mutex_lock(mutex->handle);
+
+ (mutex->count)++;
+
+ ut_a(mutex->count == 1);
+#endif
+}
+
+/**************************************************************
+Releases ownership of a mutex. */
+
+void
+os_mutex_exit(
+/*==========*/
+ os_mutex_t mutex) /* in: mutex to release */
+{
+#ifdef __WIN__
+ ut_a(mutex);
+
+ ut_a(mutex->count == 1);
+
+ (mutex->count)--;
+
+ ut_a(ReleaseMutex(mutex->handle));
+#else
+ ut_a(mutex);
+
+ ut_a(mutex->count == 1);
+
+ (mutex->count)--;
+
+ os_fast_mutex_unlock(mutex->handle);
+#endif
+}
+
+/**************************************************************
+Frees a mutex object. */
+
+void
+os_mutex_free(
+/*==========*/
+ os_mutex_t mutex) /* in: mutex to free */
+{
+#ifdef __WIN__
+ ut_a(mutex);
+
+ ut_a(CloseHandle(mutex->handle));
+ ut_free(mutex);
+#else
+ os_fast_mutex_free(mutex->handle);
+ ut_free(mutex->handle);
+ ut_free(mutex);
+#endif
+}
+
+#ifndef _WIN32
+/*************************************************************
+Initializes an operating system fast mutex semaphore. */
+
+void
+os_fast_mutex_init(
+/*===============*/
+ os_fast_mutex_t* fast_mutex) /* in: fast mutex */
+{
+#ifdef __WIN__
+ ut_a(fast_mutex);
+
+ InitializeCriticalSection((LPCRITICAL_SECTION) fast_mutex);
+#else
+ pthread_mutex_init(fast_mutex, NULL);
+#endif
+}
+
+/**************************************************************
+Acquires ownership of a fast mutex. */
+
+void
+os_fast_mutex_lock(
+/*===============*/
+ os_fast_mutex_t* fast_mutex) /* in: mutex to acquire */
+{
+#ifdef __WIN__
+ EnterCriticalSection((LPCRITICAL_SECTION) fast_mutex);
+#else
+ pthread_mutex_lock(fast_mutex);
+#endif
+}
+
+/**************************************************************
+Frees a mutex object. */
+
+void
+os_fast_mutex_free(
+/*===============*/
+ os_fast_mutex_t* fast_mutex) /* in: mutex to free */
+{
+#ifdef __WIN__
+ ut_a(fast_mutex);
+
+ DeleteCriticalSection((LPCRITICAL_SECTION) fast_mutex);
+#else
+ UT_NOT_USED(fast_mutex);
+
+#endif
+}
+#endif
diff --git a/innobase/os/os0thread.c b/innobase/os/os0thread.c
new file mode 100644
index 00000000000..a33613267ac
--- /dev/null
+++ b/innobase/os/os0thread.c
@@ -0,0 +1,210 @@
+/******************************************************
+The interface to the operating system
+process and thread control primitives
+
+(c) 1995 Innobase Oy
+
+Created 9/8/1995 Heikki Tuuri
+*******************************************************/
+
+#include "os0thread.h"
+#ifdef UNIV_NONINL
+#include "os0thread.ic"
+#endif
+
+#ifdef __WIN__
+#include <windows.h>
+#endif
+
+/*********************************************************************
+Returns the thread identifier of current thread. */
+
+os_thread_id_t
+os_thread_get_curr_id(void)
+/*=======================*/
+{
+#ifdef __WIN__
+ return(GetCurrentThreadId());
+#else
+ return((os_thread_id_t) pthread_self());
+#endif
+}
+
+/* Define a function pointer type to use in a typecast */
+typedef void* (*os_posix_f_t) (void*);
+
+/********************************************************************
+Creates a new thread of execution. The execution starts from
+the function given. The start function takes a void* parameter
+and returns an ulint. */
+
+os_thread_t
+os_thread_create(
+/*=============*/
+ /* out: handle to the thread */
+ ulint (*start_f)(void*), /* in: pointer to function
+ from which to start */
+ void* arg, /* in: argument to start
+ function */
+ os_thread_id_t* thread_id) /* out: id of created
+ thread */
+{
+#ifdef __WIN__
+ os_thread_t thread;
+
+ thread = CreateThread(NULL, /* no security attributes */
+ 0, /* default size stack */
+ (LPTHREAD_START_ROUTINE)start_f,
+ arg,
+ 0, /* thread runs immediately */
+ thread_id);
+ ut_a(thread);
+
+ return(thread);
+#else
+ int ret;
+ os_thread_t pthread;
+
+ /* Note that below we cast the start function returning an integer
+ to a function returning a pointer: this may cause error
+ if the return value is used somewhere! */
+
+ ret = pthread_create(&pthread, NULL, (os_posix_f_t) start_f, arg);
+
+ return(pthread);
+#endif
+}
+
+/*********************************************************************
+Returns handle to the current thread. */
+
+os_thread_t
+os_thread_get_curr(void)
+/*=======================*/
+{
+#ifdef __WIN__
+ return(GetCurrentThread());
+#else
+ return(pthread_self());
+#endif
+}
+
+/*********************************************************************
+Converts a thread id to a ulint. */
+
+ulint
+os_thread_conv_id_to_ulint(
+/*=======================*/
+ /* out: converted to ulint */
+ os_thread_id_t id) /* in: thread id */
+{
+ return((ulint)id);
+}
+
+/*********************************************************************
+Advises the os to give up remainder of the thread's time slice. */
+
+void
+os_thread_yield(void)
+/*=================*/
+{
+#ifdef __WIN__
+ Sleep(0);
+#else
+ os_thread_sleep(0);
+#endif
+}
+
+/*********************************************************************
+The thread sleeps at least the time given in microseconds. */
+
+void
+os_thread_sleep(
+/*============*/
+ ulint tm) /* in: time in microseconds */
+{
+#ifdef __WIN__
+ Sleep(tm / 1000);
+#else
+ struct timeval t;
+
+ t.tv_sec = 0;
+ t.tv_usec = tm;
+
+ select(0, NULL, NULL, NULL, &t);
+#endif
+}
+
+/**********************************************************************
+Sets a thread priority. */
+
+void
+os_thread_set_priority(
+/*===================*/
+ os_thread_t handle, /* in: OS handle to the thread */
+ ulint pri) /* in: priority */
+{
+#ifdef __WIN__
+ int os_pri;
+
+ if (pri == OS_THREAD_PRIORITY_BACKGROUND) {
+ os_pri = THREAD_PRIORITY_BELOW_NORMAL;
+ } else if (pri == OS_THREAD_PRIORITY_NORMAL) {
+ os_pri = THREAD_PRIORITY_NORMAL;
+ } else if (pri == OS_THREAD_PRIORITY_ABOVE_NORMAL) {
+ os_pri = THREAD_PRIORITY_HIGHEST;
+ } else {
+ ut_error;
+ }
+
+ ut_a(SetThreadPriority(handle, os_pri));
+#else
+ UT_NOT_USED(handle);
+ UT_NOT_USED(pri);
+#endif
+}
+
+/**********************************************************************
+Gets a thread priority. */
+
+ulint
+os_thread_get_priority(
+/*===================*/
+ /* out: priority */
+ os_thread_t handle) /* in: OS handle to the thread */
+{
+#ifdef __WIN__
+ int os_pri;
+ ulint pri;
+
+ os_pri = GetThreadPriority(handle);
+
+ if (os_pri == THREAD_PRIORITY_BELOW_NORMAL) {
+ pri = OS_THREAD_PRIORITY_BACKGROUND;
+ } else if (os_pri == THREAD_PRIORITY_NORMAL) {
+ pri = OS_THREAD_PRIORITY_NORMAL;
+ } else if (os_pri == THREAD_PRIORITY_HIGHEST) {
+ pri = OS_THREAD_PRIORITY_ABOVE_NORMAL;
+ } else {
+ ut_error;
+ }
+
+ return(pri);
+#else
+ return(0);
+#endif
+}
+
+/**********************************************************************
+Gets the last operating system error code for the calling thread. */
+
+ulint
+os_thread_get_last_error(void)
+/*==========================*/
+{
+#ifdef __WIN__
+ return(GetLastError());
+#else
+ return(0);
+#endif
+}
diff --git a/innobase/os/os0trash.c b/innobase/os/os0trash.c
new file mode 100644
index 00000000000..e896ac9f083
--- /dev/null
+++ b/innobase/os/os0trash.c
@@ -0,0 +1,43 @@
+/* Stores the old console mode when echo is turned off */
+ulint os_old_console_mode;
+
+/********************************************************************
+Turns off echo from console input. */
+
+void
+os_console_echo_off(void)
+/*=====================*/
+{
+ GetConsoleMode(stdio, &os_old_console_mode);
+ SetConsoleMode(stdio, ENABLE_PROCESSED_INPUT);
+}
+
+/********************************************************************
+Turns on echo in console input. */
+
+void
+os_console_echo_on(void)
+/*====================*/
+{
+ SetConsoleMode(stdio, &os_old_console_mode);
+}
+
+/********************************************************************
+Reads a character from the console. */
+
+char
+os_read_console(void)
+/*=================*/
+{
+ char input_char;
+ ulint n_chars;
+
+ n_chars = 0;
+
+ while (n_chars == 0) {
+ ReadConsole(stdio, &input_char, 1, &n_chars, NULL);
+ }
+
+ return(input_char);
+}
+
diff --git a/innobase/os/ts/makefile b/innobase/os/ts/makefile
new file mode 100644
index 00000000000..0e145a14e7f
--- /dev/null
+++ b/innobase/os/ts/makefile
@@ -0,0 +1,20 @@
+
+
+
+include ..\..\makefile.i
+
+doall: tsos tsosaux
+
+
+tsos: ..\os.lib tsos.c
+ $(CCOM) $(CFL) -I.. -I..\.. ..\..\ut.lib ..\..\sync.lib ..\..\mem.lib ..\os.lib tsos.c $(LFL)
+
+tsosaux: tsosaux.c
+ $(CCOM) $(CFL) -I.. -I..\.. ..\..\ut.lib ..\os.lib tsosaux.c $(LFL)
+
+
+
+
+
+
+
diff --git a/innobase/os/ts/tsos.c b/innobase/os/ts/tsos.c
new file mode 100644
index 00000000000..ecc10b86d2f
--- /dev/null
+++ b/innobase/os/ts/tsos.c
@@ -0,0 +1,793 @@
+/************************************************************************
+The test module for the operating system interface
+
+(c) 1995 Innobase Oy
+
+Created 9/27/1995 Heikki Tuuri
+*************************************************************************/
+
+
+#include "../os0thread.h"
+#include "../os0shm.h"
+#include "../os0proc.h"
+#include "../os0sync.h"
+#include "../os0file.h"
+#include "ut0ut.h"
+#include "ut0mem.h"
+#include "sync0sync.h"
+#include "mem0mem.h"
+
+#define _WIN32_WINNT 0x0400
+#include "n:\program files\devstudio\vc\include\windows.h"
+#include "n:\program files\devstudio\vc\include\winbase.h"
+
+ulint last_thr = 1;
+
+byte global_buf[4000000];
+
+ulint* cache_buf;
+
+os_file_t file;
+os_file_t file2;
+
+os_event_t gl_ready;
+
+mutex_t ios_mutex;
+ulint ios;
+ulint rnd = 9837497;
+
+/********************************************************************
+Start function for threads in test1. */
+
+ulint
+thread(void* arg)
+/*==============*/
+{
+ ulint i;
+ void* arg2;
+ ulint count = 0;
+ ulint n;
+ ulint rnd_loc;
+ byte local_buf[2000];
+
+ arg2 = arg;
+
+ n = *((ulint*)arg);
+
+/* printf("Thread %lu started!\n", n); */
+
+ for (i = 0; i < 8000; i++) {
+
+ rnd_loc = rnd;
+ rnd += 763482469;
+
+ ut_memcpy(global_buf + (rnd_loc % 1500000) + 8200, local_buf,
+ 2000);
+ if (last_thr != n) {
+ count++;
+ last_thr = n;
+ }
+
+ if (i % 32 == 0) {
+ os_thread_yield();
+ }
+ }
+
+ printf("Thread %lu exits: %lu thread switches noticed\n", n, count);
+
+ return(0);
+}
+
+/*********************************************************************
+Test of the speed of wait for multiple events. */
+
+void
+testa1(void)
+/*========*/
+{
+ ulint i;
+ os_event_t arr[64];
+ ulint tm, oldtm;
+
+ printf("-------------------------------------------------\n");
+ printf("TEST A1. Speed of waits\n");
+
+ for (i = 0; i < 64; i++) {
+ arr[i] = os_event_create(NULL);
+ ut_a(arr[i]);
+ }
+
+ os_event_set(arr[1]);
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 10000; i++) {
+ os_event_wait_multiple(4, arr);
+ }
+
+ tm = ut_clock();
+
+ printf("Wall clock time for %lu multiple waits %lu millisecs\n",
+ i, tm - oldtm);
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 10000; i++) {
+ os_event_wait(arr[1]);
+ }
+
+ tm = ut_clock();
+
+ printf("Wall clock time for %lu single waits %lu millisecs\n",
+ i, tm - oldtm);
+
+
+ for (i = 0; i < 64; i++) {
+ os_event_free(arr[i]);
+ }
+}
+
+/*********************************************************************
+Test for threads. */
+
+void
+test1(void)
+/*=======*/
+{
+ os_thread_t thr[64];
+ os_thread_id_t id[64];
+ ulint n[64];
+ ulint tm, oldtm;
+ ulint i, j;
+
+ printf("-------------------------------------------\n");
+ printf("OS-TEST 1. Test of thread switching through yield\n");
+
+ printf("Main thread %lu starts!\n", os_thread_get_curr_id());
+
+ for (j = 0; j < 2; j++) {
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 64; i++) {
+ n[i] = i;
+
+ thr[i] = os_thread_create(thread, n + i, id + i);
+/* printf("Thread %lu created, id %lu \n", i, id[i]); */
+ }
+
+ for (i = 0; i < 64; i++) {
+ os_thread_wait(thr[i]);
+ }
+
+ tm = ut_clock();
+ printf("Wall clock time for test %lu milliseconds\n", tm - oldtm);
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 64; i++) {
+
+ thr[5] = os_thread_create(thread, n + 5, id + 5);
+
+/* printf("Thread created, id %lu \n", id[5]); */
+
+ os_thread_wait(thr[5]);
+ }
+
+ tm = ut_clock();
+ printf("Wall clock time for single thread test %lu milliseconds\n",
+ tm - oldtm);
+ }
+}
+
+/*********************************************************************
+Test for shared memory and process switching through yield. */
+
+void
+test2(void)
+/*=======*/
+{
+ os_shm_t shm;
+ ulint tm, oldtm;
+ ulint* pr_no;
+ ulint count;
+ ulint i;
+ bool ret;
+ os_process_t proc;
+ os_process_id_t proc_id;
+
+ printf("-------------------------------------------\n");
+ printf("OS-TEST 2. Test of process switching through yield\n");
+
+ shm = os_shm_create(1000, "TSOS_SHM");
+
+ pr_no = os_shm_map(shm);
+
+ *pr_no = 1;
+ count = 0;
+
+ ret = os_process_create("tsosaux.exe", NULL, &proc, &proc_id);
+
+ printf("Last error: %lu\n", os_thread_get_last_error());
+
+ ut_a(ret);
+
+ printf("Process 1 starts test!\n");
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 500000; i++) {
+ if (*pr_no != 1) {
+ count++;
+ *pr_no = 1;
+ }
+
+ os_thread_yield();
+ }
+
+ tm = ut_clock();
+
+ printf("Process 1 finishes test: %lu process switches noticed\n",
+ count);
+
+ printf("Wall clock time for test %lu milliseconds\n", tm - oldtm);
+
+ os_shm_unmap(shm);
+
+ os_shm_free(shm);
+}
+
+#ifdef notdefined
+
+/*********************************************************************
+Test for asynchronous file io. */
+
+void
+test3(void)
+/*=======*/
+{
+ ulint i;
+ ulint j;
+ void* mess;
+ bool ret;
+ void* buf;
+ ulint rnd;
+ ulint addr[64];
+ ulint serv[64];
+ ulint tm, oldtm;
+
+ printf("-------------------------------------------\n");
+ printf("OS-TEST 3. Test of asynchronous file io\n");
+
+ /* Align the buffer for file io */
+
+ buf = (void*)(((ulint)global_buf + 6300) & (~0xFFF));
+
+ rnd = ut_time();
+
+ rnd = rnd * 3416133;
+
+ printf("rnd seed %lu\n", rnd % 4900);
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 32; i++) {
+
+ ret = os_aio_read(file, buf, 8192 * (rnd % 4900), 0,
+ 8192, (void*)i);
+ ut_a(ret);
+ rnd += 1;
+ ret = os_aio_wait(0, &mess);
+ ut_a(ret);
+ }
+
+ tm = ut_clock();
+ printf("Wall clock time for synchr. io %lu milliseconds\n",
+ tm - oldtm);
+
+ rnd = rnd * 3416133;
+
+ printf("rnd seed %lu\n", rnd % 5000);
+
+ oldtm = ut_clock();
+
+ for (j = 0; j < 5; j++) {
+
+ rnd = rnd + 3416133;
+
+ for (i = 0; i < 16; i++) {
+ ret = os_aio_read(file, buf, 8192 * (rnd % 5000), 0, 8192,
+ (void*)i);
+ addr[i] = rnd % 5000;
+ ut_a(ret);
+ rnd += 1;
+ }
+
+
+ for (i = 0; i < 16; i++) {
+ ret = os_aio_read(file, buf, 8192 * (rnd % 5000), 0, 8192,
+ (void*)i);
+ addr[i] = rnd % 5000;
+ ut_a(ret);
+ rnd += 1;
+ }
+
+ rnd = rnd + 3416133;
+
+ for (i = 0; i < 32; i++) {
+ ret = os_aio_wait(0, &mess);
+ ut_a(ret);
+ ut_a((ulint)mess < 64);
+ serv[(ulint)mess] = i;
+ }
+ }
+ tm = ut_clock();
+ printf("Wall clock time for aio %lu milliseconds\n", tm - oldtm);
+
+ rnd = rnd * 3416133;
+
+ printf("rnd seed %lu\n", rnd % 4900);
+
+ oldtm = ut_clock();
+
+for (j = 0; j < 5; j++) {
+
+ rnd = rnd + 3416133;
+
+ for (i = 0; i < 1; i++) {
+ ret = os_aio_read(file, buf, 8192 * (rnd % 4900), 0,
+ 64 * 8192, (void*)i);
+ ut_a(ret);
+ rnd += 4;
+ ret = os_aio_wait(0, &mess);
+ ut_a(ret);
+ ut_a((ulint)mess < 64);
+ }
+}
+ tm = ut_clock();
+ printf("Wall clock time for synchr. io %lu milliseconds\n",
+ tm - oldtm);
+
+
+/*
+ for (i = 0; i < 63; i++) {
+ printf("read %lu addr %lu served as %lu\n",
+ i, addr[i], serv[i]);
+ }
+*/
+
+ ut_a(ret);
+}
+
+/************************************************************************
+Io-handler thread function. */
+
+ulint
+handler_thread(
+/*===========*/
+ void* arg)
+{
+ ulint segment;
+ void* mess;
+ ulint i;
+ bool ret;
+
+ segment = *((ulint*)arg);
+
+ printf("Thread %lu starts\n", segment);
+
+ for (i = 0;; i++) {
+ ret = os_aio_wait(segment, &mess);
+
+ mutex_enter(&ios_mutex);
+ ios++;
+ mutex_exit(&ios_mutex);
+
+ ut_a(ret);
+/* printf("Message for thread %lu %lu\n", segment,
+ (ulint)mess); */
+ if ((ulint)mess == 3333) {
+ os_event_set(gl_ready);
+ }
+ }
+
+ return(0);
+}
+
+/************************************************************************
+Test of io-handler threads */
+
+void
+test4(void)
+/*=======*/
+{
+ ulint i;
+ ulint j;
+ bool ret;
+ void* buf;
+ ulint rnd;
+ ulint tm, oldtm;
+ os_thread_t thr[5];
+ os_thread_id_t id[5];
+ ulint n[5];
+
+ printf("-------------------------------------------\n");
+ printf("OS-TEST 4. Test of asynchronous file io\n");
+
+ /* Align the buffer for file io */
+
+ buf = (void*)(((ulint)global_buf + 6300) & (~0xFFF));
+
+ gl_ready = os_event_create(NULL);
+ ios = 0;
+
+ sync_init();
+ mem_init();
+
+ mutex_create(&ios_mutex);
+
+ for (i = 0; i < 5; i++) {
+ n[i] = i;
+
+ thr[i] = os_thread_create(handler_thread, n + i, id + i);
+ }
+
+ rnd = 0;
+
+ oldtm = ut_clock();
+
+for (j = 0; j < 128; j++) {
+
+
+ for (i = 0; i < 32; i++) {
+ ret = os_aio_read(file, (byte*)buf + 8192 * (rnd % 100),
+ 8192 * (rnd % 4096), 0,
+ 8192, (void*)i);
+ ut_a(ret);
+ rnd += 1;
+ }
+
+/*
+ rnd += 67475941;
+
+ for (i = 0; i < 1; i++) {
+ ret = os_aio_read(file2, buf, 8192 * (rnd % 5000), 0,
+ 8192, (void*)i);
+ ut_a(ret);
+ rnd += 1;
+ }
+*/
+}
+ ret = os_aio_read(file, buf, 8192 * (rnd % 4096), 0, 8192,
+ (void*)3333);
+ ut_a(ret);
+
+ ut_a(!os_aio_all_slots_free());
+/*
+ printf("Starting flush!\n");
+ ret = os_file_flush(file);
+ ut_a(ret);
+ printf("Ending flush!\n");
+*/
+ tm = ut_clock();
+
+ printf("All ios queued! N ios: %lu\n", ios);
+
+ printf("Wall clock time for test %lu milliseconds\n", tm - oldtm);
+
+ os_event_wait(gl_ready);
+
+ tm = ut_clock();
+ printf("N ios: %lu\n", ios);
+ printf("Wall clock time for test %lu milliseconds\n", tm - oldtm);
+
+ os_thread_sleep(2000000);
+
+ printf("N ios: %lu\n", ios);
+
+ ut_a(os_aio_all_slots_free());
+}
+
+/*************************************************************************
+Initializes the asyncronous io system for tests. */
+
+void
+init_aio(void)
+/*==========*/
+{
+ bool ret;
+ ulint i;
+ void* buf;
+ void* mess;
+
+ buf = (void*)(((ulint)global_buf + 6300) & (~0xFFF));
+
+ os_aio_init(160, 5);
+ file = os_file_create("j:\\tsfile2", OS_FILE_CREATE, OS_FILE_TABLESPACE,
+ &ret);
+
+ if (ret == FALSE) {
+ ut_a(os_file_get_last_error() == OS_FILE_ALREADY_EXISTS);
+
+ file = os_file_create("j:\\tsfile2", OS_FILE_OPEN,
+ OS_FILE_TABLESPACE, &ret);
+
+ ut_a(ret);
+ } else {
+
+ for (i = 0; i < 4100; i++) {
+ ret = os_aio_write(file, buf, 8192 * i, 0, 8192, NULL);
+ ut_a(ret);
+
+ ret = os_aio_wait(0, &mess);
+
+ ut_a(ret);
+ ut_a(mess == NULL);
+ }
+ }
+
+ file2 = os_file_create("F:\\tmp\\tsfile", OS_FILE_CREATE,
+ OS_FILE_TABLESPACE,
+ &ret);
+
+ if (ret == FALSE) {
+ ut_a(os_file_get_last_error() == OS_FILE_ALREADY_EXISTS);
+
+ file2 = os_file_create("F:\\tmp\\tsfile", OS_FILE_OPEN,
+ OS_FILE_TABLESPACE, &ret);
+
+ ut_a(ret);
+ } else {
+
+ for (i = 0; i < 5000; i++) {
+ ret = os_aio_write(file2, buf, 8192 * i, 0, 8192, NULL);
+ ut_a(ret);
+
+ ret = os_aio_wait(0, &mess);
+
+ ut_a(ret);
+ ut_a(mess == NULL);
+ }
+ }
+}
+
+/************************************************************************
+Test of synchronous io */
+
+void
+test5(void)
+/*=======*/
+{
+ ulint i, j, k;
+ bool ret;
+ void* buf;
+ ulint rnd = 0;
+ ulint tm = 0;
+ ulint oldtm = 0;
+ os_file_t files[1000];
+ char name[5];
+ ulint err;
+
+ printf("-------------------------------------------\n");
+ printf("OS-TEST 5. Test of creating and opening of many files\n");
+
+ /* Align the buffer for file io */
+
+ buf = (void*)(((ulint)global_buf + 6300) & (~0xFFF));
+
+ name[2] = '.';
+ name[3] = 'd';
+ name[4] = '\0';
+
+ oldtm = ut_clock();
+
+ for (j = 0; j < 20; j++) {
+ for (i = 0; i < 20; i++) {
+ name[0] = (char)(i + (ulint)'A');
+ name[1] = (char)(j + (ulint)'A');
+ files[j * 20 + i] = os_file_create(name, OS_FILE_CREATE,
+ OS_FILE_NORMAL, &ret);
+ if (!ret) {
+ err = os_file_get_last_error();
+ }
+ ut_a(ret);
+ }
+ }
+
+ for (k = 0; k < i * j; k++) {
+ ret = os_file_close(files[k]);
+ ut_a(ret);
+ }
+
+ for (j = 0; j < 20; j++) {
+ for (i = 0; i < 20; i++) {
+ name[0] = (char)(i + (ulint)'A');
+ name[1] = (char)(j + (ulint)'A');
+ ret = os_file_delete(name);
+ ut_a(ret);
+ }
+ }
+
+ tm = ut_clock();
+ printf("Wall clock time for test %lu milliseconds\n", tm - oldtm);
+}
+
+/************************************************************************
+Test of synchronous io */
+
+void
+test6(void)
+/*=======*/
+{
+ ulint i, j;
+ bool ret;
+ void* buf;
+ ulint rnd = 0;
+ ulint tm = 0;
+ ulint oldtm = 0;
+ os_file_t s_file;
+
+ printf("-------------------------------------------\n");
+ printf("OS-TEST 6. Test of synchronous io\n");
+
+ buf = (void*)(((ulint)global_buf + 6300) & (~0xFFF));
+
+ ret = os_file_close(file);
+ ut_a(ret);
+
+ ret = os_file_close(file2);
+ ut_a(ret);
+
+ s_file = os_file_create("tsfile", OS_FILE_OPEN,
+ OS_FILE_NORMAL, &ret);
+ if (!ret) {
+ printf("Error no %lu\n", os_file_get_last_error());
+ }
+
+ ut_a(ret);
+
+ rnd = ut_time() * 6346353;
+
+ oldtm = ut_clock();
+
+ for (j = 0; j < 100; j++) {
+
+ rnd += 8072791;
+
+ for (i = 0; i < 32; i++) {
+ ret = os_file_read(s_file, buf, 8192 * (rnd % 5000), 0,
+ 8192);
+ ut_a(ret);
+ rnd += 1;
+ }
+ }
+
+ tm = ut_clock();
+
+ printf("Wall clock time for test %lu milliseconds\n", tm - oldtm);
+}
+
+/************************************************************************
+Test of file size operations. */
+
+void
+test7(void)
+/*=======*/
+{
+ bool ret;
+ os_file_t f;
+ ulint len;
+ ulint high;
+
+ printf("-------------------------------------------\n");
+ printf("OS-TEST 7. Test of setting and getting file size\n");
+
+
+ f = os_file_create("sizefile", OS_FILE_CREATE, OS_FILE_TABLESPACE,
+ &ret);
+ ut_a(ret);
+
+ ret = os_file_get_size(f, &len, &high);
+ ut_a(ret);
+
+ ut_a(len == 0);
+ ut_a(high == 0);
+
+ ret = os_file_set_size(f, 5000000, 0);
+ ut_a(ret);
+
+ ret = os_file_get_size(f, &len, &high);
+ ut_a(ret);
+
+ ut_a(len == 5000000);
+ ut_a(high == 0);
+
+ ret = os_file_set_size(f, 4000000, 0);
+ ut_a(ret);
+
+ ret = os_file_get_size(f, &len, &high);
+ ut_a(ret);
+
+ ut_a(len == 4000000);
+ ut_a(high == 0);
+
+ ret = os_file_close(f);
+ ut_a(ret);
+
+ ret = os_file_delete("sizefile");
+ ut_a(ret);
+}
+#endif
+
+/************************************************************************
+Main test function. */
+
+void
+main(void)
+/*======*/
+{
+ ulint tm, oldtm;
+ ulint i;
+ CRITICAL_SECTION cs;
+ ulint sum;
+ ulint rnd;
+
+ cache_buf = VirtualAlloc(NULL, 4 * 1024, MEM_COMMIT,
+ PAGE_READWRITE /* | PAGE_NOCACHE */);
+ oldtm = ut_clock();
+
+ sum = 0;
+ rnd = 0;
+
+ for (i = 0; i < 1000000; i++) {
+
+ sum += cache_buf[rnd * (16)];
+
+ rnd += 1;
+
+ if (rnd > 7) {
+ rnd = 0;
+ }
+ }
+
+ tm = ut_clock();
+
+ printf("Wall clock time for cache test %lu milliseconds\n", tm - oldtm);
+
+ InterlockedExchange(&i, 5);
+
+ InitializeCriticalSection(&cs);
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 10000000; i++) {
+
+ TryEnterCriticalSection(&cs);
+
+ LeaveCriticalSection(&cs);
+ }
+
+ tm = ut_clock();
+ printf("Wall clock time for test %lu milliseconds\n", tm - oldtm);
+
+ testa1();
+
+ test1();
+
+/* test2(); */
+
+/* init_aio(); */
+/*
+ test3();
+*/
+/* test4();
+
+ test5();
+
+ test6();
+
+ test7(); */
+
+ printf("TESTS COMPLETED SUCCESSFULLY!\n");
+}
+
diff --git a/innobase/os/ts/tsosaux.c b/innobase/os/ts/tsosaux.c
new file mode 100644
index 00000000000..8f7780844e9
--- /dev/null
+++ b/innobase/os/ts/tsosaux.c
@@ -0,0 +1,83 @@
+/************************************************************************
+The test module for the operating system interface
+Auxiliary executable run alongside tsos.exe to test
+process switching speed
+
+(c) 1995 Innobase Oy
+
+Created 9/27/1995 Heikki Tuuri
+*************************************************************************/
+
+
+#include "../os0thread.h"
+#include "../os0shm.h"
+#include "../os0proc.h"
+#include "ut0ut.h"
+
+/*********************************************************************
+Test for shared memory and process switching through yield. */
+
+void
+test2(void)
+/*=======*/
+{
+ os_shm_t shm;
+ ulint tm, oldtm;
+ ulint* pr_no;
+ ulint count;
+ ulint i;
+
+ printf("-------------------------------------------\n");
+ printf("OS-TEST 2. Test of process switching through yield\n");
+
+
+ shm = os_shm_create(1000, "TSOS_SHM");
+
+ pr_no = os_shm_map(shm);
+
+ count = 0;
+
+ printf("Process 2 starts test!\n");
+
+ oldtm = ut_clock();
+
+ for (i = 0; i < 100000; i++) {
+ if (*pr_no != 2) {
+ count++;
+ *pr_no = 2;
+ }
+ os_thread_yield();
+ }
+
+ tm = ut_clock();
+
+ printf("Process 2 finishes test: %lu process switches noticed\n",
+ count);
+
+ printf("Wall clock time for test %lu milliseconds\n", tm - oldtm);
+
+
+ os_shm_unmap(shm);
+
+ os_shm_free(shm);
+}
+
+/************************************************************************
+Main test function. */
+
+void
+main(void)
+/*======*/
+{
+ ulint tm, oldtm;
+
+ oldtm = ut_clock();
+
+ test2();
+
+ tm = ut_clock();
+ printf("Wall clock time for test %lu milliseconds\n", tm - oldtm);
+ printf("TESTS COMPLETED SUCCESSFULLY!\n");
+
+ os_process_exit(0);
+}