diff options
Diffstat (limited to 'innobase/os')
-rw-r--r-- | innobase/os/Makefile.am | 24 | ||||
-rw-r--r-- | innobase/os/makefilewin | 20 | ||||
-rw-r--r-- | innobase/os/os0file.c | 2005 | ||||
-rw-r--r-- | innobase/os/os0fileold.c | 1956 | ||||
-rw-r--r-- | innobase/os/os0proc.c | 150 | ||||
-rw-r--r-- | innobase/os/os0shm.c | 146 | ||||
-rw-r--r-- | innobase/os/os0sync.c | 461 | ||||
-rw-r--r-- | innobase/os/os0thread.c | 210 | ||||
-rw-r--r-- | innobase/os/os0trash.c | 43 | ||||
-rw-r--r-- | innobase/os/ts/makefile | 20 | ||||
-rw-r--r-- | innobase/os/ts/tsos.c | 793 | ||||
-rw-r--r-- | innobase/os/ts/tsosaux.c | 83 |
12 files changed, 5911 insertions, 0 deletions
diff --git a/innobase/os/Makefile.am b/innobase/os/Makefile.am new file mode 100644 index 00000000000..b517cac1cc8 --- /dev/null +++ b/innobase/os/Makefile.am @@ -0,0 +1,24 @@ +# Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB +# & Innobase Oy +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +include ../include/Makefile.i + +libs_LIBRARIES = libos.a + +libos_a_SOURCES = os0proc.c os0shm.c os0sync.c os0thread.c os0file.c + +EXTRA_PROGRAMS = diff --git a/innobase/os/makefilewin b/innobase/os/makefilewin new file mode 100644 index 00000000000..08dba0e5e47 --- /dev/null +++ b/innobase/os/makefilewin @@ -0,0 +1,20 @@ +include ..\include\makefile.i + +os.lib: os0sync.obj os0thread.obj os0shm.obj os0proc.obj os0file.obj + lib -out:..\libs\os.lib os0sync.obj os0thread.obj os0shm.obj os0proc.obj os0file.obj + +os0sync.obj: os0sync.c + $(CCOM) $(CFLW) -c os0sync.c + +os0thread.obj: os0thread.c + $(CCOM) $(CFLW) -c os0thread.c + +os0shm.obj: os0shm.c + $(CCOM) $(CFLW) -c os0shm.c + +os0proc.obj: os0proc.c + $(CCOM) $(CFLW) -c os0proc.c + +os0file.obj: os0file.c + $(CCOM) $(CFLW) -c os0file.c + diff --git a/innobase/os/os0file.c b/innobase/os/os0file.c new file mode 100644 index 00000000000..7851b83732d --- /dev/null +++ b/innobase/os/os0file.c @@ -0,0 +1,2005 @@ +/****************************************************** +The interface to the operating system file i/o primitives + +(c) 1995 Innobase Oy + +Created 10/21/1995 Heikki Tuuri +*******************************************************/ + +#include "os0file.h" +#include "os0sync.h" +#include "ut0mem.h" + + +#ifdef POSIX_ASYNC_IO +/* We assume in this case that the OS has standard Posix aio (at least SunOS +2.6, HP-UX 11i and AIX 4.3 have) */ + +#undef __USE_FILE_OFFSET64 + +#include <aio.h> +#endif + +/* We use these mutexes to protect lseek + file i/o operation, if the +OS does not provide an atomic pread or pwrite, or similar */ +#define OS_FILE_N_SEEK_MUTEXES 16 +os_mutex_t os_file_seek_mutexes[OS_FILE_N_SEEK_MUTEXES]; + +/* In simulated aio, merge at most this many consecutive i/os */ +#define OS_AIO_MERGE_N_CONSECUTIVE 32 + +/* If this flag is TRUE, then we will use the native aio of the +OS (provided we compiled Innobase with it in), otherwise we will +use simulated aio we build below with threads */ + +ibool os_aio_use_native_aio = FALSE; + +/* The aio array slot structure */ +typedef struct os_aio_slot_struct os_aio_slot_t; + +struct os_aio_slot_struct{ + ibool is_read; /* TRUE if a read operation */ + ulint pos; /* index of the slot in the aio + array */ + ibool reserved; /* TRUE if this slot is reserved */ + ulint len; /* length of the block to read or + write */ + byte* buf; /* buffer used in i/o */ + ulint type; /* OS_FILE_READ or OS_FILE_WRITE */ + ulint offset; /* 32 low bits of file offset in + bytes */ + ulint offset_high; /* 32 high bits of file offset */ + os_file_t file; /* file where to read or write */ + char* name; /* file name or path */ + ibool io_already_done;/* used only in simulated aio: + TRUE if the physical i/o already + made and only the slot message + needs to be passed to the caller + of os_aio_simulated_handle */ + void* message1; /* message which is given by the */ + void* message2; /* the requester of an aio operation + and which can be used to identify + which pending aio operation was + completed */ +#ifdef WIN_ASYNC_IO + OVERLAPPED control; /* Windows control block for the + aio request */ +#elif defined(POSIX_ASYNC_IO) + struct aiocb control; /* Posix control block for aio + request */ +#endif +}; + +/* The aio array structure */ +typedef struct os_aio_array_struct os_aio_array_t; + +struct os_aio_array_struct{ + os_mutex_t mutex; /* the mutex protecting the aio array */ + os_event_t not_full; /* The event which is set to signaled + state when there is space in the aio + outside the ibuf segment */ + ulint n_slots; /* Total number of slots in the aio array. + This must be divisible by n_threads. */ + ulint n_segments;/* Number of segments in the aio array of + pending aio requests. A thread can wait + separately for any one of the segments. */ + ulint n_reserved;/* Number of reserved slots in the + aio array outside the ibuf segment */ + os_aio_slot_t* slots; /* Pointer to the slots in the array */ + os_event_t* events; /* Pointer to an array of event handles + where we copied the handles from slots, + in the same order. This can be used in + WaitForMultipleObjects; used only in + Windows */ +}; + +/* Array of events used in simulated aio */ +os_event_t* os_aio_segment_wait_events = NULL; + +/* The aio arrays for non-ibuf i/o and ibuf i/o, as well as sync aio. These +are NULL when the module has not yet been initialized. */ +os_aio_array_t* os_aio_read_array = NULL; +os_aio_array_t* os_aio_write_array = NULL; +os_aio_array_t* os_aio_ibuf_array = NULL; +os_aio_array_t* os_aio_log_array = NULL; +os_aio_array_t* os_aio_sync_array = NULL; + +ulint os_aio_n_segments = ULINT_UNDEFINED; + +/*************************************************************************** +Retrieves the last error number if an error occurs in a file io function. +The number should be retrieved before any other OS calls (because they may +overwrite the error number). If the number is not known to this program, +the OS error number + 100 is returned. */ + +ulint +os_file_get_last_error(void) +/*========================*/ + /* out: error number, or OS error number + 100 */ +{ + ulint err; + +#ifdef __WIN__ + + err = (ulint) GetLastError(); + + if (err == ERROR_FILE_NOT_FOUND) { + return(OS_FILE_NOT_FOUND); + } else if (err == ERROR_DISK_FULL) { + return(OS_FILE_DISK_FULL); + } else if (err == ERROR_FILE_EXISTS) { + return(OS_FILE_ALREADY_EXISTS); + } else { + return(100 + err); + } +#else + err = (ulint) errno; + + if (err == ENOSPC ) { + return(OS_FILE_DISK_FULL); +#ifdef POSIX_ASYNC_IO + } else if (err == EAGAIN) { + return(OS_FILE_AIO_RESOURCES_RESERVED); +#endif + } else if (err == ENOENT) { + return(OS_FILE_NOT_FOUND); + } else if (err == EEXIST) { + return(OS_FILE_ALREADY_EXISTS); + } else { + return(100 + err); + } +#endif +} + +/******************************************************************** +Does error handling when a file operation fails. If we have run out +of disk space, then the user can clean the disk. If we do not find +a specified file, then the user can copy it to disk. */ +static +ibool +os_file_handle_error( +/*=================*/ + /* out: TRUE if we should retry the operation */ + os_file_t file, /* in: file pointer */ + char* name) /* in: name of a file or NULL */ +{ + int input_char; + ulint err; + + err = os_file_get_last_error(); + + if (err == OS_FILE_DISK_FULL) { +ask_again: + printf("\n"); + if (name) { + printf( + "Innobase encountered a problem with file %s.\n", + name); + } + printf("Disk is full. Try to clean the disk to free space\n"); + printf("before answering the following: How to continue?\n"); + printf("(Y == freed some space: try again)\n"); + printf("(N == crash the database: will restart it)?\n"); +ask_with_no_question: + input_char = getchar(); + + if (input_char == (int) 'N') { + ut_error; + + return(FALSE); + } else if (input_char == (int) 'Y') { + + return(TRUE); + } else if (input_char == (int) '\n') { + + goto ask_with_no_question; + } else { + goto ask_again; + } + } else if (err == OS_FILE_AIO_RESOURCES_RESERVED) { + + return(TRUE); + } else { + ut_error; + } + + return(FALSE); +} + +/******************************************************************** +Opens an existing file or creates a new. */ + +os_file_t +os_file_create( +/*===========*/ + /* out, own: handle to the file, not defined if error, + error number can be retrieved with os_get_last_error */ + char* name, /* in: name of the file or path as a null-terminated + string */ + ulint create_mode, /* in: OS_FILE_OPEN if an existing file is opened + (if does not exist, error), or OS_FILE_CREATE if a new + file is created (if exists, error), OS_FILE_OVERWRITE + if a new is created or an old overwritten */ + ulint purpose,/* in: OS_FILE_AIO, if asynchronous, non-buffered i/o + is desired, OS_FILE_NORMAL, if any normal file */ + ibool* success)/* out: TRUE if succeed, FALSE if error */ +{ +#ifdef __WIN__ + os_file_t file; + DWORD create_flag; + DWORD attributes; + ibool retry; + +try_again: + ut_a(name); + + if (create_mode == OS_FILE_OPEN) { + create_flag = OPEN_EXISTING; + } else if (create_mode == OS_FILE_CREATE) { + create_flag = CREATE_NEW; + } else if (create_mode == OS_FILE_OVERWRITE) { + create_flag = CREATE_ALWAYS; + } else { + create_flag = 0; + ut_error; + } + + if (purpose == OS_FILE_AIO) { + /* use asynchronous (overlapped) io and no buffering + of writes in the OS */ + attributes = 0; +#ifdef WIN_ASYNC_IO + if (os_aio_use_native_aio) { + attributes = attributes | FILE_FLAG_OVERLAPPED; + } +#endif +#ifdef UNIV_NON_BUFFERED_IO + attributes = attributes | FILE_FLAG_NO_BUFFERING; +#endif + } else if (purpose == OS_FILE_NORMAL) { + attributes = 0 +#ifdef UNIV_NON_BUFFERED_IO + | FILE_FLAG_NO_BUFFERING +#endif + ; + } else { + attributes = 0; + ut_error; + } + + file = CreateFile(name, + GENERIC_READ | GENERIC_WRITE, /* read and write + access */ + FILE_SHARE_READ,/* file can be read by other + processes */ + NULL, /* default security attributes */ + create_flag, + attributes, + NULL); /* no template file */ + + if (file == INVALID_HANDLE_VALUE) { + *success = FALSE; + + if (create_mode != OS_FILE_OPEN + && os_file_get_last_error() == OS_FILE_DISK_FULL) { + + retry = os_file_handle_error(file, name); + + if (retry) { + goto try_again; + } + } + } else { + *success = TRUE; + } + + return(file); +#else + os_file_t file; + int create_flag; + ibool retry; + +try_again: + ut_a(name); + + if (create_mode == OS_FILE_OPEN) { + create_flag = O_RDWR; + } else if (create_mode == OS_FILE_CREATE) { + create_flag = O_RDWR | O_CREAT | O_EXCL; + } else if (create_mode == OS_FILE_OVERWRITE) { + create_flag = O_RDWR | O_CREAT | O_TRUNC; + } else { + create_flag = 0; + ut_error; + } + + UT_NOT_USED(purpose); + + if (create_mode == OS_FILE_CREATE) { + + file = open(name, create_flag, S_IRWXU | S_IRWXG | S_IRWXO); + } else { + file = open(name, create_flag); + } + + if (file == -1) { + *success = FALSE; + + if (create_mode != OS_FILE_OPEN + && errno == ENOSPC) { + + retry = os_file_handle_error(file, name); + + if (retry) { + goto try_again; + } + } + } else { + *success = TRUE; + } + + return(file); +#endif +} + +/*************************************************************************** +Closes a file handle. In case of error, error number can be retrieved with +os_file_get_last_error. */ + +ibool +os_file_close( +/*==========*/ + /* out: TRUE if success */ + os_file_t file) /* in, own: handle to a file */ +{ +#ifdef __WIN__ + BOOL ret; + + ut_a(file); + + ret = CloseHandle(file); + + if (ret) { + return(TRUE); + } + + return(FALSE); +#else + int ret; + + ret = close(file); + + if (ret == -1) { + return(FALSE); + } + + return(TRUE); +#endif +} + +/*************************************************************************** +Gets a file size. */ + +ibool +os_file_get_size( +/*=============*/ + /* out: TRUE if success */ + os_file_t file, /* in: handle to a file */ + ulint* size, /* out: least significant 32 bits of file + size */ + ulint* size_high)/* out: most significant 32 bits of size */ +{ +#ifdef __WIN__ + DWORD high; + DWORD low; + + low = GetFileSize(file, &high); + + if ((low == 0xFFFFFFFF) && (GetLastError() != NO_ERROR)) { + return(FALSE); + } + + *size = low; + *size_high = high; + + return(TRUE); +#else + *size = (ulint) lseek(file, 0, SEEK_END); + *size_high = 0; + + return(TRUE); +#endif +} + +/*************************************************************************** +Sets a file size. This function can be used to extend or truncate a file. */ + +ibool +os_file_set_size( +/*=============*/ + /* out: TRUE if success */ + char* name, /* in: name of the file or path as a + null-terminated string */ + os_file_t file, /* in: handle to a file */ + ulint size, /* in: least significant 32 bits of file + size */ + ulint size_high)/* in: most significant 32 bits of size */ +{ + ulint offset; + ulint n_bytes; + ulint low; + ibool ret; + ibool retry; + ulint i; + byte* buf; + +try_again: + buf = ut_malloc(UNIV_PAGE_SIZE * 64); + + /* Write buffer full of zeros */ + for (i = 0; i < UNIV_PAGE_SIZE * 64; i++) { + buf[i] = '\0'; + } + + offset = 0; + low = size; +#if (UNIV_WORD_SIZE == 8) + low = low + (size_high << 32); +#endif + while (offset < low) { + if (low - offset < UNIV_PAGE_SIZE * 64) { + n_bytes = low - offset; + } else { + n_bytes = UNIV_PAGE_SIZE * 64; + } + + ret = os_file_write(name, file, buf, offset, 0, n_bytes); + + if (!ret) { + ut_free(buf); + goto error_handling; + } + offset += n_bytes; + } + + ut_free(buf); + + ret = os_file_flush(file); + + if (ret) { + return(TRUE); + } + +error_handling: + retry = os_file_handle_error(file, name); + + if (retry) { + goto try_again; + } + + ut_error; +} + +/*************************************************************************** +Flushes the write buffers of a given file to the disk. */ + +ibool +os_file_flush( +/*==========*/ + /* out: TRUE if success */ + os_file_t file) /* in, own: handle to a file */ +{ +#ifdef __WIN__ + BOOL ret; + + ut_a(file); + + ret = FlushFileBuffers(file); + + if (ret) { + return(TRUE); + } + + return(FALSE); +#else + int ret; + + ret = fsync(file); + + if (ret == 0) { + return(TRUE); + } + + return(FALSE); +#endif +} + + +#ifndef __WIN__ +/*********************************************************************** +Does a synchronous read operation in Posix. */ +static +ssize_t +os_file_pread( +/*==========*/ + /* out: number of bytes read, -1 if error */ + os_file_t file, /* in: handle to a file */ + void* buf, /* in: buffer where to read */ + ulint n, /* in: number of bytes to read */ + ulint offset) /* in: offset from where to read */ +{ +#ifdef HAVE_PREAD + return(pread(file, buf, n, (off_t) offset)); +#else + ssize_t ret; + ulint i; + + /* Protect the seek / read operation with a mutex */ + i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES; + + os_mutex_enter(os_file_seek_mutexes[i]); + + ret = lseek(file, (off_t) offset, 0); + + if (ret < 0) { + os_mutex_exit(os_file_seek_mutexes[i]); + + return(ret); + } + + ret = read(file, buf, n); + + os_mutex_exit(os_file_seek_mutexes[i]); + + return(ret); +#endif +} + +/*********************************************************************** +Does a synchronous write operation in Posix. */ +static +ssize_t +os_file_pwrite( +/*===========*/ + /* out: number of bytes written, -1 if error */ + os_file_t file, /* in: handle to a file */ + void* buf, /* in: buffer from where to write */ + ulint n, /* in: number of bytes to write */ + ulint offset) /* in: offset where to write */ +{ +#ifdef HAVE_PWRITE + return(pwrite(file, buf, n, (off_t) offset)); +#else + ssize_t ret; + ulint i; + + /* Protect the seek / write operation with a mutex */ + i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES; + + os_mutex_enter(os_file_seek_mutexes[i]); + + ret = lseek(file, (off_t) offset, 0); + + if (ret < 0) { + os_mutex_exit(os_file_seek_mutexes[i]); + + return(ret); + } + + ret = write(file, buf, n); + + os_mutex_exit(os_file_seek_mutexes[i]); + + return(ret); +#endif +} +#endif + +/*********************************************************************** +Requests a synchronous positioned read operation. */ + +ibool +os_file_read( +/*=========*/ + /* out: TRUE if request was + successful, FALSE if fail */ + os_file_t file, /* in: handle to a file */ + void* buf, /* in: buffer where to read */ + ulint offset, /* in: least significant 32 bits of file + offset where to read */ + ulint offset_high, /* in: most significant 32 bits of + offset */ + ulint n) /* in: number of bytes to read */ +{ +#ifdef __WIN__ + BOOL ret; + DWORD len; + DWORD ret2; + DWORD err; + DWORD low; + DWORD high; + ibool retry; + ulint i; + +try_again: + ut_ad(file); + ut_ad(buf); + ut_ad(n > 0); + + low = offset; + high = offset_high; + + /* Protect the seek / read operation with a mutex */ + i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES; + + os_mutex_enter(os_file_seek_mutexes[i]); + + ret2 = SetFilePointer(file, low, &high, FILE_BEGIN); + + if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) { + err = GetLastError(); + + os_mutex_exit(os_file_seek_mutexes[i]); + + goto error_handling; + } + + ret = ReadFile(file, buf, n, &len, NULL); + + os_mutex_exit(os_file_seek_mutexes[i]); + + if (ret && len == n) { + return(TRUE); + } + + err = GetLastError(); +#else + ibool retry; + ssize_t ret; + ulint i; + +#if (UNIV_WORD_SIZE == 8) + offset = offset + (offset_high << 32); +#endif +try_again: + /* Protect the seek / read operation with a mutex */ + i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES; + + os_mutex_enter(os_file_seek_mutexes[i]); + + ret = os_file_pread(file, buf, n, (off_t) offset); + + if (ret == n) { + os_mutex_exit(os_file_seek_mutexes[i]); + + return(TRUE); + } +#endif +error_handling: + retry = os_file_handle_error(file, NULL); + + if (retry) { + goto try_again; + } + + ut_error; + + return(FALSE); +} + +/*********************************************************************** +Requests a synchronous write operation. */ + +ibool +os_file_write( +/*==========*/ + /* out: TRUE if request was + successful, FALSE if fail */ + char* name, /* in: name of the file or path as a + null-terminated string */ + os_file_t file, /* in: handle to a file */ + void* buf, /* in: buffer from which to write */ + ulint offset, /* in: least significant 32 bits of file + offset where to write */ + ulint offset_high, /* in: most significant 32 bits of + offset */ + ulint n) /* in: number of bytes to write */ +{ +#ifdef __WIN__ + BOOL ret; + DWORD len; + DWORD ret2; + DWORD err; + DWORD low; + DWORD high; + ibool retry; + ulint i; + +try_again: + ut_ad(file); + ut_ad(buf); + ut_ad(n > 0); + + low = offset; + high = offset_high; + + /* Protect the seek / write operation with a mutex */ + i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES; + + os_mutex_enter(os_file_seek_mutexes[i]); + + ret2 = SetFilePointer(file, low, &high, FILE_BEGIN); + + if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) { + err = GetLastError(); + + os_mutex_exit(os_file_seek_mutexes[i]); + + goto error_handling; + } + + ret = WriteFile(file, buf, n, &len, NULL); + + os_mutex_exit(os_file_seek_mutexes[i]); + + if (ret && len == n) { + return(TRUE); + } +#else + ibool retry; + ssize_t ret; + +#if (UNIV_WORD_SIZE == 8) + offset = offset + (offset_high << 32); +#endif +try_again: + ret = pwrite(file, buf, n, (off_t) offset); + + if (ret == n) { + return(TRUE); + } +#endif + +error_handling: + retry = os_file_handle_error(file, name); + + if (retry) { + goto try_again; + } + + ut_error; + + return(FALSE); +} + +/******************************************************************** +Returns a pointer to the nth slot in the aio array. */ +static +os_aio_slot_t* +os_aio_array_get_nth_slot( +/*======================*/ + /* out: pointer to slot */ + os_aio_array_t* array, /* in: aio array */ + ulint index) /* in: index of the slot */ +{ + ut_a(index < array->n_slots); + + return((array->slots) + index); +} + +/**************************************************************************** +Creates an aio wait array. */ +static +os_aio_array_t* +os_aio_array_create( +/*================*/ + /* out, own: aio array */ + ulint n, /* in: maximum number of pending aio operations + allowed; n must be divisible by n_segments */ + ulint n_segments) /* in: number of segments in the aio array */ +{ + os_aio_array_t* array; + ulint i; + os_aio_slot_t* slot; +#ifdef WIN_ASYNC_IO + OVERLAPPED* over; +#endif + ut_a(n > 0); + ut_a(n_segments > 0); + ut_a(n % n_segments == 0); + + array = ut_malloc(sizeof(os_aio_array_t)); + + array->mutex = os_mutex_create(NULL); + array->not_full = os_event_create(NULL); + array->n_slots = n; + array->n_segments = n_segments; + array->n_reserved = 0; + array->slots = ut_malloc(n * sizeof(os_aio_slot_t)); + array->events = ut_malloc(n * sizeof(os_event_t)); + + for (i = 0; i < n; i++) { + slot = os_aio_array_get_nth_slot(array, i); + + slot->pos = i; + slot->reserved = FALSE; +#ifdef WIN_ASYNC_IO + over = &(slot->control); + + over->hEvent = os_event_create(NULL); + + *((array->events) + i) = over->hEvent; +#endif + } + + return(array); +} + +/**************************************************************************** +Initializes the asynchronous io system. Creates separate aio array for +non-ibuf read and write, a third aio array for the ibuf i/o, with just one +segment, two aio arrays for log reads and writes with one segment, and a +synchronous aio array of the specified size. The combined number of segments +in the three first aio arrays is the parameter n_segments given to the +function. The caller must create an i/o handler thread for each segment in +the four first arrays, but not for the sync aio array. */ + +void +os_aio_init( +/*========*/ + ulint n, /* in: maximum number of pending aio operations + allowed; n must be divisible by n_segments */ + ulint n_segments, /* in: combined number of segments in the four + first aio arrays; must be >= 4 */ + ulint n_slots_sync) /* in: number of slots in the sync aio array */ +{ + ulint n_read_segs; + ulint n_write_segs; + ulint n_per_seg; + ulint i; +#ifdef POSIX_ASYNC_IO + sigset_t sigset; +#endif + ut_ad(n % n_segments == 0); + ut_ad(n_segments >= 4); + + n_per_seg = n / n_segments; + n_write_segs = (n_segments - 2) / 2; + n_read_segs = n_segments - 2 - n_write_segs; + + /* printf("Array n per seg %lu\n", n_per_seg); */ + + os_aio_read_array = os_aio_array_create(n_read_segs * n_per_seg, + n_read_segs); + os_aio_write_array = os_aio_array_create(n_write_segs * n_per_seg, + n_write_segs); + os_aio_ibuf_array = os_aio_array_create(n_per_seg, 1); + + os_aio_log_array = os_aio_array_create(n_per_seg, 1); + + os_aio_sync_array = os_aio_array_create(n_slots_sync, 1); + + os_aio_n_segments = n_segments; + + os_aio_validate(); + + for (i = 0; i < OS_FILE_N_SEEK_MUTEXES; i++) { + os_file_seek_mutexes[i] = os_mutex_create(NULL); + } + + os_aio_segment_wait_events = ut_malloc(n_segments * sizeof(void*)); + + for (i = 0; i < n_segments; i++) { + os_aio_segment_wait_events[i] = os_event_create(NULL); + } + +#ifdef POSIX_ASYNC_IO + /* Block aio signals from the current thread and its children: + for this to work, the current thread must be the first created + in the database, so that all its children will inherit its + signal mask */ + + sigemptyset(&sigset); + sigaddset(&sigset, SIGRTMIN + 1 + 0); + sigaddset(&sigset, SIGRTMIN + 1 + 1); + sigaddset(&sigset, SIGRTMIN + 1 + 2); + sigaddset(&sigset, SIGRTMIN + 1 + 3); + + pthread_sigmask(SIG_BLOCK, &sigset, NULL); +#endif +} + +/************************************************************************** +Calculates segment number for a slot. */ +static +ulint +os_aio_get_segment_no_from_slot( +/*============================*/ + /* out: segment number (which is the number + used by, for example, i/o-handler threads) */ + os_aio_array_t* array, /* in: aio wait array */ + os_aio_slot_t* slot) /* in: slot in this array */ +{ + ulint segment; + ulint seg_len; + + if (array == os_aio_ibuf_array) { + segment = 0; + + } else if (array == os_aio_log_array) { + segment = 1; + + } else if (array == os_aio_read_array) { + seg_len = os_aio_read_array->n_slots / + os_aio_read_array->n_segments; + + segment = 2 + slot->pos / seg_len; + } else { + ut_a(array == os_aio_write_array); + seg_len = os_aio_write_array->n_slots / + os_aio_write_array->n_segments; + + segment = os_aio_read_array->n_segments + 2 + + slot->pos / seg_len; + } + + return(segment); +} + +/************************************************************************** +Calculates local segment number and aio array from global segment number. */ +static +ulint +os_aio_get_array_and_local_segment( +/*===============================*/ + /* out: local segment number within + the aio array */ + os_aio_array_t** array, /* out: aio wait array */ + ulint global_segment)/* in: global segment number */ +{ + ulint segment; + + ut_a(global_segment < os_aio_n_segments); + + if (global_segment == 0) { + *array = os_aio_ibuf_array; + segment = 0; + + } else if (global_segment == 1) { + *array = os_aio_log_array; + segment = 0; + + } else if (global_segment < os_aio_read_array->n_segments + 2) { + *array = os_aio_read_array; + + segment = global_segment - 2; + } else { + *array = os_aio_write_array; + + segment = global_segment - (os_aio_read_array->n_segments + 2); + } + + return(segment); +} + +/*********************************************************************** +Gets an integer value designating a specified aio array. This is used +to give numbers to signals in Posix aio. */ +static +ulint +os_aio_get_array_no( +/*================*/ + os_aio_array_t* array) /* in: aio array */ +{ + if (array == os_aio_ibuf_array) { + + return(0); + + } else if (array == os_aio_log_array) { + + return(1); + + } else if (array == os_aio_read_array) { + + return(2); + } else if (array == os_aio_write_array) { + + return(3); + } else { + ut_a(0); + + return(0); + } +} + +/*********************************************************************** +Gets the aio array for its number. */ +static +os_aio_array_t* +os_aio_get_array_from_no( +/*=====================*/ + /* out: aio array */ + ulint n) /* in: array number */ +{ + if (n == 0) { + return(os_aio_ibuf_array); + } else if (n == 1) { + + return(os_aio_log_array); + } else if (n == 2) { + + return(os_aio_read_array); + } else if (n == 3) { + + return(os_aio_write_array); + } else { + ut_a(0); + + return(NULL); + } +} + +/*********************************************************************** +Requests for a slot in the aio array. If no slot is available, waits until +not_full-event becomes signaled. */ +static +os_aio_slot_t* +os_aio_array_reserve_slot( +/*======================*/ + /* out: pointer to slot */ + ulint type, /* in: OS_FILE_READ or OS_FILE_WRITE */ + os_aio_array_t* array, /* in: aio array */ + void* message1,/* in: message to be passed along with + the aio operation */ + void* message2,/* in: message to be passed along with + the aio operation */ + os_file_t file, /* in: file handle */ + char* name, /* in: name of the file or path as a + null-terminated string */ + void* buf, /* in: buffer where to read or from which + to write */ + ulint offset, /* in: least significant 32 bits of file + offset */ + ulint offset_high, /* in: most significant 32 bits of + offset */ + ulint len) /* in: length of the block to read or write */ +{ + os_aio_slot_t* slot; +#ifdef WIN_ASYNC_IO + OVERLAPPED* control; + +#elif defined(POSIX_ASYNC_IO) + + struct aiocb* control; +#endif + ulint i; +loop: + os_mutex_enter(array->mutex); + + if (array->n_reserved == array->n_slots) { + os_mutex_exit(array->mutex); + + if (!os_aio_use_native_aio) { + /* If the handler threads are suspended, wake them + so that we get more slots */ + + os_aio_simulated_wake_handler_threads(); + } + + os_event_wait(array->not_full); + + goto loop; + } + + for (i = 0;; i++) { + slot = os_aio_array_get_nth_slot(array, i); + + if (slot->reserved == FALSE) { + break; + } + } + + array->n_reserved++; + + if (array->n_reserved == array->n_slots) { + os_event_reset(array->not_full); + } + + slot->reserved = TRUE; + slot->message1 = message1; + slot->message2 = message2; + slot->file = file; + slot->name = name; + slot->len = len; + slot->type = type; + slot->buf = buf; + slot->offset = offset; + slot->offset_high = offset_high; + slot->io_already_done = FALSE; + +#ifdef WIN_ASYNC_IO + control = &(slot->control); + control->Offset = (DWORD)offset; + control->OffsetHigh = (DWORD)offset_high; + os_event_reset(control->hEvent); + +#elif defined(POSIX_ASYNC_IO) + +#if (UNIV_WORD_SIZE == 8) + offset = offset + (offset_high << 32); +#else + ut_a(offset_high == 0); +#endif + control = &(slot->control); + control->aio_fildes = file; + control->aio_buf = buf; + control->aio_nbytes = len; + control->aio_offset = offset; + control->aio_reqprio = 0; + control->aio_sigevent.sigev_notify = SIGEV_SIGNAL; + control->aio_sigevent.sigev_signo = + SIGRTMIN + 1 + os_aio_get_array_no(array); + /* TODO: How to choose the signal numbers? */ +/* + printf("AIO signal number %lu\n", (ulint) control->aio_sigevent.sigev_signo); +*/ + control->aio_sigevent.sigev_value.sival_ptr = slot; +#endif + os_mutex_exit(array->mutex); + + return(slot); +} + +/*********************************************************************** +Frees a slot in the aio array. */ +static +void +os_aio_array_free_slot( +/*===================*/ + os_aio_array_t* array, /* in: aio array */ + os_aio_slot_t* slot) /* in: pointer to slot */ +{ + ut_ad(array); + ut_ad(slot); + + os_mutex_enter(array->mutex); + + ut_ad(slot->reserved); + + slot->reserved = FALSE; + + array->n_reserved--; + + if (array->n_reserved == array->n_slots - 1) { + os_event_set(array->not_full); + } + +#ifdef WIN_ASYNC_IO + os_event_reset(slot->control.hEvent); +#endif + os_mutex_exit(array->mutex); +} + +/************************************************************************** +Wakes up a simulated aio i/o-handler thread if it has something to do. */ +static +void +os_aio_simulated_wake_handler_thread( +/*=================================*/ + ulint global_segment) /* in: the number of the segment in the aio + arrays */ +{ + os_aio_array_t* array; + ulint segment; + os_aio_slot_t* slot; + ulint n; + ulint i; + + ut_ad(!os_aio_use_native_aio); + + segment = os_aio_get_array_and_local_segment(&array, global_segment); + + n = array->n_slots / array->n_segments; + + /* Look through n slots after the segment * n'th slot */ + + os_mutex_enter(array->mutex); + + for (i = 0; i < n; i++) { + slot = os_aio_array_get_nth_slot(array, i + segment * n); + + if (slot->reserved) { + /* Found an i/o request */ + + break; + } + } + + os_mutex_exit(array->mutex); + + if (i < n) { + os_event_set(os_aio_segment_wait_events[global_segment]); + } +} + +/************************************************************************** +Wakes up simulated aio i/o-handler threads if they have something to do. */ + +void +os_aio_simulated_wake_handler_threads(void) +/*=======================================*/ +{ + ulint i; + + if (os_aio_use_native_aio) { + /* We do not use simulated aio: do nothing */ + + return; + } + + for (i = 0; i < os_aio_n_segments; i++) { + os_aio_simulated_wake_handler_thread(i); + } +} + +/*********************************************************************** +Requests an asynchronous i/o operation. */ + +ibool +os_aio( +/*===*/ + /* out: TRUE if request was queued + successfully, FALSE if fail */ + ulint type, /* in: OS_FILE_READ or OS_FILE_WRITE */ + ulint mode, /* in: OS_AIO_NORMAL, ..., possibly ORed + to OS_AIO_SIMULATED_WAKE_LATER: the + last flag advises this function not to wake + i/o-handler threads, but the caller will + do the waking explicitly later, in this + way the caller can post several requests in + a batch; NOTE that the batch must not be + so big that it exhausts the slots in aio + arrays! NOTE that a simulated batch + may introduce hidden chances of deadlocks, + because i/os are not actually handled until + all have been posted: use with great + caution! */ + char* name, /* in: name of the file or path as a + null-terminated string */ + os_file_t file, /* in: handle to a file */ + void* buf, /* in: buffer where to read or from which + to write */ + ulint offset, /* in: least significant 32 bits of file + offset where to read or write */ + ulint offset_high, /* in: most significant 32 bits of + offset */ + ulint n, /* in: number of bytes to read or write */ + void* message1,/* in: messages for the aio handler (these + can be used to identify a completed aio + operation); if mode is OS_AIO_SYNC, these + are ignored */ + void* message2) +{ + os_aio_array_t* array; + os_aio_slot_t* slot; +#ifdef WIN_ASYNC_IO + BOOL ret = TRUE; + DWORD len = n; + void* dummy_mess1; + void* dummy_mess2; +#endif + ulint err = 0; + ibool retry; + ulint wake_later; + + ut_ad(file); + ut_ad(buf); + ut_ad(n > 0); + ut_ad(n % OS_FILE_LOG_BLOCK_SIZE == 0); + ut_ad((ulint)buf % OS_FILE_LOG_BLOCK_SIZE == 0) + ut_ad(offset % OS_FILE_LOG_BLOCK_SIZE == 0); + ut_ad(os_aio_validate()); + + wake_later = mode & OS_AIO_SIMULATED_WAKE_LATER; + mode = mode & (~OS_AIO_SIMULATED_WAKE_LATER); + + if (mode == OS_AIO_SYNC +#ifdef WIN_ASYNC_IO + && !os_aio_use_native_aio +#endif + ) { + /* This is actually an ordinary synchronous read or write: + no need to use an i/o-handler thread. NOTE that if we use + Windows async i/o, Windows does not allow us to use + ordinary synchronous os_file_read etc. on the same file, + therefore we have built a special mechanism for synchronous + wait in the Windows case. */ + + if (type == OS_FILE_READ) { + return(os_file_read(file, buf, offset, offset_high, n)); + } + + ut_a(type == OS_FILE_WRITE); + + return(os_file_write(name, file, buf, offset, offset_high, n)); + } + +try_again: + if (mode == OS_AIO_NORMAL) { + if (type == OS_FILE_READ) { + array = os_aio_read_array; + } else { + array = os_aio_write_array; + } + } else if (mode == OS_AIO_IBUF) { + ut_ad(type == OS_FILE_READ); + + array = os_aio_ibuf_array; + } else if (mode == OS_AIO_LOG) { + + array = os_aio_log_array; + } else if (mode == OS_AIO_SYNC) { + array = os_aio_sync_array; + } else { + ut_error; + } + + slot = os_aio_array_reserve_slot(type, array, message1, message2, file, + name, buf, offset, offset_high, n); + if (type == OS_FILE_READ) { + if (os_aio_use_native_aio) { +#ifdef WIN_ASYNC_IO + ret = ReadFile(file, buf, (DWORD)n, &len, + &(slot->control)); +#elif defined(POSIX_ASYNC_IO) + slot->control.aio_lio_opcode = LIO_READ; + err = (ulint) aio_read(&(slot->control)); + printf("Starting Posix aio read %lu\n", err); +#endif + } else { + if (!wake_later) { + os_aio_simulated_wake_handler_thread( + os_aio_get_segment_no_from_slot(array, slot)); + } + } + } else if (type == OS_FILE_WRITE) { + if (os_aio_use_native_aio) { +#ifdef WIN_ASYNC_IO + ret = WriteFile(file, buf, (DWORD)n, &len, + &(slot->control)); +#elif defined(POSIX_ASYNC_IO) + slot->control.aio_lio_opcode = LIO_WRITE; + err = (ulint) aio_write(&(slot->control)); + printf("Starting Posix aio write %lu\n", err); +#endif + } else { + if (!wake_later) { + os_aio_simulated_wake_handler_thread( + os_aio_get_segment_no_from_slot(array, slot)); + } + } + } else { + ut_error; + } + +#ifdef WIN_ASYNC_IO + if (os_aio_use_native_aio) { + if ((ret && len == n) + || (!ret && GetLastError() == ERROR_IO_PENDING)) { + + /* aio was queued successfully! */ + + if (mode == OS_AIO_SYNC) { + /* We want a synchronous i/o operation on a file + where we also use async i/o: in Windows we must + use the same wait mechanism as for async i/o */ + + return(os_aio_windows_handle(ULINT_UNDEFINED, + slot->pos, + &dummy_mess1, &dummy_mess2)); + } + + return(TRUE); + } + + goto error_handling; + } +#endif + if (err == 0) { + /* aio was queued successfully! */ + + return(TRUE); + } + +error_handling: + os_aio_array_free_slot(array, slot); + + retry = os_file_handle_error(file, name); + + if (retry) { + + goto try_again; + } + + ut_error; + + return(FALSE); +} + +#ifdef WIN_ASYNC_IO +/************************************************************************** +This function is only used in Windows asynchronous i/o. +Waits for an aio operation to complete. This function is used to wait the +for completed requests. The aio array of pending requests is divided +into segments. The thread specifies which segment or slot it wants to wait +for. NOTE: this function will also take care of freeing the aio slot, +therefore no other thread is allowed to do the freeing! */ + +ibool +os_aio_windows_handle( +/*==================*/ + /* out: TRUE if the aio operation succeeded */ + ulint segment, /* in: the number of the segment in the aio + arrays to wait for; segment 0 is the ibuf + i/o thread, segment 1 the log i/o thread, + then follow the non-ibuf read threads, and as + the last are the non-ibuf write threads; if + this is ULINT_UNDEFINED, then it means that + sync aio is used, and this parameter is + ignored */ + ulint pos, /* this parameter is used only in sync aio: + wait for the aio slot at this position */ + void** message1, /* out: the messages passed with the aio + request; note that also in the case where + the aio operation failed, these output + parameters are valid and can be used to + restart the operation, for example */ + void** message2) +{ + os_aio_array_t* array; + os_aio_slot_t* slot; + ulint n; + ulint i; + ibool ret_val; + ulint err; + BOOL ret; + DWORD len; + + if (segment == ULINT_UNDEFINED) { + array = os_aio_sync_array; + segment = 0; + } else { + segment = os_aio_get_array_and_local_segment(&array, segment); + } + + /* NOTE! We only access constant fields in os_aio_array. Therefore + we do not have to acquire the protecting mutex yet */ + + ut_ad(os_aio_validate()); + ut_ad(segment < array->n_segments); + + n = array->n_slots / array->n_segments; + + if (array == os_aio_sync_array) { + ut_ad(pos < array->n_slots); + os_event_wait(array->events[pos]); + i = pos; + } else { + i = os_event_wait_multiple(n, (array->events) + segment * n); + } + + os_mutex_enter(array->mutex); + + slot = os_aio_array_get_nth_slot(array, i + segment * n); + + ut_a(slot->reserved); + + ret = GetOverlappedResult(slot->file, &(slot->control), &len, TRUE); + + *message1 = slot->message1; + *message2 = slot->message2; + + if (ret && len == slot->len) { + ret_val = TRUE; + } else { + err = GetLastError(); + ut_error; + + ret_val = FALSE; + } + + os_mutex_exit(array->mutex); + + os_aio_array_free_slot(array, slot); + + return(ret_val); +} +#endif + +#ifdef POSIX_ASYNC_IO + +/************************************************************************** +This function is only used in Posix asynchronous i/o. Waits for an aio +operation to complete. */ + +ibool +os_aio_posix_handle( +/*================*/ + /* out: TRUE if the aio operation succeeded */ + ulint array_no, /* in: array number 0 - 3 */ + void** message1, /* out: the messages passed with the aio + request; note that also in the case where + the aio operation failed, these output + parameters are valid and can be used to + restart the operation, for example */ + void** message2) +{ + os_aio_array_t* array; + os_aio_slot_t* slot; + siginfo_t info; + sigset_t sigset; + sigset_t proc_sigset; + sigset_t thr_sigset; + int ret; + int i; + int sig; + + sigemptyset(&sigset); + sigaddset(&sigset, SIGRTMIN + 1 + array_no); + + pthread_sigmask(SIG_UNBLOCK, &sigset, NULL); + + /* + sigprocmask(0, NULL, &proc_sigset); + pthread_sigmask(0, NULL, &thr_sigset); + + for (i = 32 ; i < 40; i++) { + printf("%lu : %lu %lu\n", (ulint)i, + (ulint)sigismember(&proc_sigset, i), + (ulint)sigismember(&thr_sigset, i)); + } + */ + + ret = sigwaitinfo(&sigset, &info); + + if (sig != SIGRTMIN + 1 + array_no) { + + ut_a(0); + + return(FALSE); + } + + printf("Handling Posix aio\n"); + + array = os_aio_get_array_from_no(array_no); + + os_mutex_enter(array->mutex); + + slot = info.si_value.sival_ptr; + + ut_a(slot->reserved); + + *message1 = slot->message1; + *message2 = slot->message2; + + os_mutex_exit(array->mutex); + + os_aio_array_free_slot(array, slot); + + return(TRUE); +} +#endif + +/************************************************************************** +Does simulated aio. This function should be called by an i/o-handler +thread. */ + +ibool +os_aio_simulated_handle( +/*====================*/ + /* out: TRUE if the aio operation succeeded */ + ulint global_segment, /* in: the number of the segment in the aio + arrays to wait for; segment 0 is the ibuf + i/o thread, segment 1 the log i/o thread, + then follow the non-ibuf read threads, and as + the last are the non-ibuf write threads */ + void** message1, /* out: the messages passed with the aio + request; note that also in the case where + the aio operation failed, these output + parameters are valid and can be used to + restart the operation, for example */ + void** message2) +{ + os_aio_array_t* array; + ulint segment; + os_aio_slot_t* slot; + os_aio_slot_t* slot2; + os_aio_slot_t* consecutive_ios[OS_AIO_MERGE_N_CONSECUTIVE]; + ulint n_consecutive; + ulint total_len; + ulint offs; + ulint lowest_offset; + byte* combined_buf; + ibool ret; + ulint n; + ulint i; + + segment = os_aio_get_array_and_local_segment(&array, global_segment); + +restart: + /* Give other threads chance to add several i/os to the array + at once */ + + os_thread_yield(); + + /* NOTE! We only access constant fields in os_aio_array. Therefore + we do not have to acquire the protecting mutex yet */ + + ut_ad(os_aio_validate()); + ut_ad(segment < array->n_segments); + + n = array->n_slots / array->n_segments; + + /* Look through n slots after the segment * n'th slot */ + + os_mutex_enter(array->mutex); + + /* Check if there is a slot for which the i/o has already been + done */ + + for (i = 0; i < n; i++) { + slot = os_aio_array_get_nth_slot(array, i + segment * n); + + if (slot->reserved && slot->io_already_done) { + + ret = TRUE; + + goto slot_io_done; + } + } + + n_consecutive = 0; + + /* Look for an i/o request at the lowest offset in the array */ + + lowest_offset = ULINT_MAX; + + for (i = 0; i < n; i++) { + slot = os_aio_array_get_nth_slot(array, i + segment * n); + + if (slot->reserved && slot->offset < lowest_offset) { + + /* Found an i/o request */ + consecutive_ios[0] = slot; + + n_consecutive = 1; + + lowest_offset = slot->offset; + } + } + + if (n_consecutive == 0) { + + /* No i/o requested at the moment */ + + goto wait_for_io; + } + + slot = consecutive_ios[0]; + + /* Check if there are several consecutive blocks to read or write */ + +consecutive_loop: + for (i = 0; i < n; i++) { + slot2 = os_aio_array_get_nth_slot(array, i + segment * n); + + if (slot2->reserved && slot2 != slot + && slot2->offset == slot->offset + slot->len + && slot->offset + slot->len > slot->offset /* check that + sum does not wrap over */ + && slot2->offset_high == slot->offset_high + && slot2->type == slot->type + && slot2->file == slot->file) { + + /* Found a consecutive i/o request */ + + consecutive_ios[n_consecutive] = slot2; + n_consecutive++; + + slot = slot2; + + if (n_consecutive < OS_AIO_MERGE_N_CONSECUTIVE) { + + goto consecutive_loop; + } else { + break; + } + } + } + + /* We have now collected n_consecutive i/o requests in the array; + allocate a single buffer which can hold all data, and perform the + i/o */ + + total_len = 0; + slot = consecutive_ios[0]; + + for (i = 0; i < n_consecutive; i++) { + total_len += consecutive_ios[i]->len; + } + + if (n_consecutive == 1) { + /* We can use the buffer of the i/o request */ + combined_buf = slot->buf; + } else { + combined_buf = ut_malloc(total_len); + + ut_a(combined_buf); + } + + /* We release the array mutex for the time of the i/o: NOTE that + this assumes that there is just one i/o-handler thread serving + a single segment of slots! */ + + os_mutex_exit(array->mutex); + + if (slot->type == OS_FILE_WRITE && n_consecutive > 1) { + /* Copy the buffers to the combined buffer */ + offs = 0; + + for (i = 0; i < n_consecutive; i++) { + + ut_memcpy(combined_buf + offs, consecutive_ios[i]->buf, + consecutive_ios[i]->len); + offs += consecutive_ios[i]->len; + } + } + + /* Do the i/o with ordinary, synchronous i/o functions: */ + if (slot->type == OS_FILE_WRITE) { + ret = os_file_write(slot->name, slot->file, combined_buf, + slot->offset, slot->offset_high, total_len); + } else { + ret = os_file_read(slot->file, combined_buf, + slot->offset, slot->offset_high, total_len); + } + + ut_a(ret); + +/* printf("aio: %lu consecutive %lu:th segment, first offs %lu blocks\n", + n_consecutive, global_segment, slot->offset + / UNIV_PAGE_SIZE); */ + + if (slot->type == OS_FILE_READ && n_consecutive > 1) { + /* Copy the combined buffer to individual buffers */ + offs = 0; + + for (i = 0; i < n_consecutive; i++) { + + ut_memcpy(consecutive_ios[i]->buf, combined_buf + offs, + consecutive_ios[i]->len); + offs += consecutive_ios[i]->len; + } + } + + if (n_consecutive > 1) { + ut_free(combined_buf); + } + + os_mutex_enter(array->mutex); + + /* Mark the i/os done in slots */ + + for (i = 0; i < n_consecutive; i++) { + consecutive_ios[i]->io_already_done = TRUE; + } + + /* We return the messages for the first slot now, and if there were + several slots, the messages will be returned with subsequent calls + of this function */ + +slot_io_done: + + ut_a(slot->reserved); + + *message1 = slot->message1; + *message2 = slot->message2; + + os_mutex_exit(array->mutex); + + os_aio_array_free_slot(array, slot); + + return(ret); + +wait_for_io: + /* We wait here until there again can be i/os in the segment + of this thread */ + + os_event_reset(os_aio_segment_wait_events[global_segment]); + + os_mutex_exit(array->mutex); + + os_event_wait(os_aio_segment_wait_events[global_segment]); + + goto restart; +} + +/************************************************************************** +Validates the consistency of an aio array. */ +static +ibool +os_aio_array_validate( +/*==================*/ + /* out: TRUE if ok */ + os_aio_array_t* array) /* in: aio wait array */ +{ + os_aio_slot_t* slot; + ulint n_reserved = 0; + ulint i; + + ut_a(array); + + os_mutex_enter(array->mutex); + + ut_a(array->n_slots > 0); + ut_a(array->n_segments > 0); + + for (i = 0; i < array->n_slots; i++) { + slot = os_aio_array_get_nth_slot(array, i); + + if (slot->reserved) { + n_reserved++; + ut_a(slot->len > 0); + } + } + + ut_a(array->n_reserved == n_reserved); + + os_mutex_exit(array->mutex); + + return(TRUE); +} + +/************************************************************************** +Validates the consistency the aio system. */ + +ibool +os_aio_validate(void) +/*=================*/ + /* out: TRUE if ok */ +{ + os_aio_array_validate(os_aio_read_array); + os_aio_array_validate(os_aio_write_array); + os_aio_array_validate(os_aio_ibuf_array); + os_aio_array_validate(os_aio_log_array); + os_aio_array_validate(os_aio_sync_array); + + return(TRUE); +} + +/************************************************************************** +Prints info of the aio arrays. */ + +void +os_aio_print(void) +/*==============*/ +{ + os_aio_array_t* array; + os_aio_slot_t* slot; + ulint n_reserved; + ulint i; + + array = os_aio_read_array; +loop: + ut_a(array); + + printf("INFO OF AN AIO ARRAY\n"); + + os_mutex_enter(array->mutex); + + ut_a(array->n_slots > 0); + ut_a(array->n_segments > 0); + + n_reserved = 0; + + for (i = 0; i < array->n_slots; i++) { + slot = os_aio_array_get_nth_slot(array, i); + + if (slot->reserved) { + n_reserved++; + printf("Reserved slot, messages %lx %lx\n", + slot->message1, slot->message2); + ut_a(slot->len > 0); + } + } + + ut_a(array->n_reserved == n_reserved); + + printf("Total of %lu reserved aio slots\n", n_reserved); + + os_mutex_exit(array->mutex); + + if (array == os_aio_read_array) { + array = os_aio_write_array; + + goto loop; + } + + if (array == os_aio_write_array) { + array = os_aio_ibuf_array; + + goto loop; + } + + if (array == os_aio_ibuf_array) { + array = os_aio_log_array; + + goto loop; + } + + if (array == os_aio_log_array) { + array = os_aio_sync_array; + + goto loop; + } +} + +/************************************************************************** +Checks that all slots in the system have been freed, that is, there are +no pending io operations. */ + +ibool +os_aio_all_slots_free(void) +/*=======================*/ + /* out: TRUE if all free */ +{ + os_aio_array_t* array; + ulint n_res = 0; + + array = os_aio_read_array; + + os_mutex_enter(array->mutex); + + n_res += array->n_reserved; + + os_mutex_exit(array->mutex); + + array = os_aio_write_array; + + os_mutex_enter(array->mutex); + + n_res += array->n_reserved; + + os_mutex_exit(array->mutex); + + array = os_aio_ibuf_array; + + os_mutex_enter(array->mutex); + + n_res += array->n_reserved; + + os_mutex_exit(array->mutex); + + array = os_aio_log_array; + + os_mutex_enter(array->mutex); + + n_res += array->n_reserved; + + os_mutex_exit(array->mutex); + + array = os_aio_sync_array; + + os_mutex_enter(array->mutex); + + n_res += array->n_reserved; + + os_mutex_exit(array->mutex); + + if (n_res == 0) { + + return(TRUE); + } + + return(FALSE); +} diff --git a/innobase/os/os0fileold.c b/innobase/os/os0fileold.c new file mode 100644 index 00000000000..a9554727f0e --- /dev/null +++ b/innobase/os/os0fileold.c @@ -0,0 +1,1956 @@ +/****************************************************** +The interface to the operating system file i/o primitives + +(c) 1995 Innobase Oy + +Created 10/21/1995 Heikki Tuuri +*******************************************************/ + +#include "os0file.h" +#include "os0sync.h" +#include "ut0mem.h" + +#ifndef __WIN__ +#include <errno.h> +#endif + +/* We use these mutexes to protect lseek + file i/o operation, if the +OS does not provide an atomic pread or pwrite, or similar */ +#define OS_FILE_N_SEEK_MUTEXES 16 +os_mutex_t os_file_seek_mutexes[OS_FILE_N_SEEK_MUTEXES]; + +/* In simulated aio, merge at most this many consecutive i/os */ +#define OS_AIO_MERGE_N_CONSECUTIVE 32 + +/* If this flag is TRUE, then we will use the native aio of the +OS (provided we compiled Innobase with it in), otherwise we will +use simulated aio we build below with threads */ + +bool os_aio_use_native_aio = FALSE; + +/* The aio array slot structure */ +typedef struct os_aio_slot_struct os_aio_slot_t; + +struct os_aio_slot_struct{ + bool is_read; /* TRUE if a read operation */ + ulint pos; /* index of the slot in the aio + array */ + bool reserved; /* TRUE if this slot is reserved */ + ulint len; /* length of the block to read or + write */ + byte* buf; /* buffer used in i/o */ + ulint type; /* OS_FILE_READ or OS_FILE_WRITE */ + ulint offset; /* 32 low bits of file offset in + bytes */ + ulint offset_high; /* 32 high bits of file offset */ + os_file_t file; /* file where to read or write */ + char* name; /* file name or path */ + bool io_already_done;/* used only in simulated aio: + TRUE if the physical i/o already + made and only the slot message + needs to be passed to the caller + of os_aio_simulated_handle */ + void* message1; /* message which is given by the */ + void* message2; /* the requester of an aio operation + and which can be used to identify + which pending aio operation was + completed */ +#ifdef WIN_ASYNC_IO + OVERLAPPED control; /* Windows control block for the + aio request */ +#elif defined(POSIX_ASYNC_IO) + struct aiocb control; /* Posix control block for aio + request */ +#endif +}; + +/* The aio array structure */ +typedef struct os_aio_array_struct os_aio_array_t; + +struct os_aio_array_struct{ + os_mutex_t mutex; /* the mutex protecting the aio array */ + os_event_t not_full; /* The event which is set to signaled + state when there is space in the aio + outside the ibuf segment */ + ulint n_slots; /* Total number of slots in the aio array. + This must be divisible by n_threads. */ + ulint n_segments;/* Number of segments in the aio array of + pending aio requests. A thread can wait + separately for any one of the segments. */ + ulint n_reserved;/* Number of reserved slots in the + aio array outside the ibuf segment */ + os_aio_slot_t* slots; /* Pointer to the slots in the array */ + os_event_t* events; /* Pointer to an array of event handles + where we copied the handles from slots, + in the same order. This can be used in + WaitForMultipleObjects; used only in + Windows */ +}; + +/* Array of events used in simulated aio */ +os_event_t* os_aio_segment_wait_events = NULL; + +/* The aio arrays for non-ibuf i/o and ibuf i/o, as well as sync aio. These +are NULL when the module has not yet been initialized. */ +os_aio_array_t* os_aio_read_array = NULL; +os_aio_array_t* os_aio_write_array = NULL; +os_aio_array_t* os_aio_ibuf_array = NULL; +os_aio_array_t* os_aio_log_array = NULL; +os_aio_array_t* os_aio_sync_array = NULL; + +ulint os_aio_n_segments = ULINT_UNDEFINED; + +/*************************************************************************** +Retrieves the last error number if an error occurs in a file io function. +The number should be retrieved before any other OS calls (because they may +overwrite the error number). If the number is not known to this program, +the OS error number + 100 is returned. */ + +ulint +os_file_get_last_error(void) +/*========================*/ + /* out: error number, or OS error number + 100 */ +{ + ulint err; + +#ifdef __WIN__ + + err = (ulint) GetLastError(); + + if (err == ERROR_FILE_NOT_FOUND) { + return(OS_FILE_NOT_FOUND); + } else if (err == ERROR_DISK_FULL) { + return(OS_FILE_DISK_FULL); + } else if (err == ERROR_FILE_EXISTS) { + return(OS_FILE_ALREADY_EXISTS); + } else { + return(100 + err); + } +#else + err = (ulint) errno; + + printf("%lu\n", err); + perror("os0file:"); + + if (err == ENOSPC ) { + return(OS_FILE_DISK_FULL); +#ifdef POSIX_ASYNC_IO + } else if (err == EAGAIN) { + return(OS_FILE_AIO_RESOURCES_RESERVED); +#endif + } else if (err == ENOENT) { + return(OS_FILE_NOT_FOUND); + } else if (err == EEXIST) { + return(OS_FILE_ALREADY_EXISTS); + } else { + return(100 + err); + } +#endif +} + +/******************************************************************** +Does error handling when a file operation fails. If we have run out +of disk space, then the user can clean the disk. If we do not find +a specified file, then the user can copy it to disk. */ +static +bool +os_file_handle_error( +/*=================*/ + /* out: TRUE if we should retry the operation */ + os_file_t file, /* in: file pointer */ + char* name) /* in: name of a file or NULL */ +{ + int input_char; + ulint err; + + err = os_file_get_last_error(); + + if (err == OS_FILE_DISK_FULL) { +ask_again: + printf("\n"); + if (name) { + printf( + "Innobase encountered a problem with file %s.\n", + name); + } + printf("Disk is full. Try to clean the disk to free space\n"); + printf("before answering the following: How to continue?\n"); + printf("(Y == freed some space: try again)\n"); + printf("(N == crash the database: will restart it)?\n"); +ask_with_no_question: + input_char = getchar(); + + if (input_char == (int) 'N') { + ut_error; + + return(FALSE); + } else if (input_char == (int) 'Y') { + + return(TRUE); + } else if (input_char == (int) '\n') { + + goto ask_with_no_question; + } else { + goto ask_again; + } + } else if (err == OS_FILE_AIO_RESOURCES_RESERVED) { + + return(TRUE); + } else { + ut_error; + } + + return(FALSE); +} + +/******************************************************************** +Opens an existing file or creates a new. */ + +os_file_t +os_file_create( +/*===========*/ + /* out, own: handle to the file, not defined if error, + error number can be retrieved with os_get_last_error */ + char* name, /* in: name of the file or path as a null-terminated + string */ + ulint create_mode, /* in: OS_FILE_OPEN if an existing file is opened + (if does not exist, error), or OS_FILE_CREATE if a new + file is created (if exists, error), OS_FILE_OVERWRITE + if a new is created or an old overwritten */ + ulint purpose,/* in: OS_FILE_AIO, if asynchronous, non-buffered i/o + is desired, OS_FILE_NORMAL, if any normal file */ + bool* success)/* out: TRUE if succeed, FALSE if error */ +{ +#ifdef __WIN__ + os_file_t file; + DWORD create_flag; + DWORD attributes; + bool retry; + +try_again: + ut_a(name); + + if (create_mode == OS_FILE_OPEN) { + create_flag = OPEN_EXISTING; + } else if (create_mode == OS_FILE_CREATE) { + create_flag = CREATE_NEW; + } else if (create_mode == OS_FILE_OVERWRITE) { + create_flag = CREATE_ALWAYS; + } else { + create_flag = 0; + ut_error; + } + + if (purpose == OS_FILE_AIO) { + /* use asynchronous (overlapped) io and no buffering + of writes in the OS */ + attributes = 0; +#ifdef WIN_ASYNC_IO + if (os_aio_use_native_aio) { + attributes = attributes | FILE_FLAG_OVERLAPPED; + } +#endif +#ifdef UNIV_NON_BUFFERED_IO + attributes = attributes | FILE_FLAG_NO_BUFFERING; +#endif + } else if (purpose == OS_FILE_NORMAL) { + attributes = 0 +#ifdef UNIV_NON_BUFFERED_IO + | FILE_FLAG_NO_BUFFERING +#endif + ; + } else { + attributes = 0; + ut_error; + } + + file = CreateFile(name, + GENERIC_READ | GENERIC_WRITE, /* read and write + access */ + FILE_SHARE_READ,/* file can be read by other + processes */ + NULL, /* default security attributes */ + create_flag, + attributes, + NULL); /* no template file */ + + if (file == INVALID_HANDLE_VALUE) { + *success = FALSE; + + if (create_mode != OS_FILE_OPEN + && os_file_get_last_error() == OS_FILE_DISK_FULL) { + + retry = os_file_handle_error(file, name); + + if (retry) { + goto try_again; + } + } + } else { + *success = TRUE; + } + + return(file); +#else + os_file_t file; + int create_flag; + bool retry; + +try_again: + ut_a(name); + + if (create_mode == OS_FILE_OPEN) { + create_flag = O_RDWR; + } else if (create_mode == OS_FILE_CREATE) { + create_flag = O_RDWR | O_CREAT | O_EXCL; + } else if (create_mode == OS_FILE_OVERWRITE) { + create_flag = O_RDWR | O_CREAT | O_TRUNC; + } else { + create_flag = 0; + ut_error; + } + + UT_NOT_USED(purpose); + + if (create_mode == OS_FILE_CREATE) { + + file = open(name, create_flag, S_IRWXU | S_IRWXG | S_IRWXO); + } else { + file = open(name, create_flag); + } + + if (file == -1) { + *success = FALSE; + + printf("Error in opening file %s, errno %lu\n", name, + (ulint)errno); + perror("os0file.c:"); + + if (create_mode != OS_FILE_OPEN + && errno == ENOSPC) { + + retry = os_file_handle_error(file, name); + + if (retry) { + goto try_again; + } + } + } else { + *success = TRUE; + } + + return(file); +#endif +} + +/*************************************************************************** +Closes a file handle. In case of error, error number can be retrieved with +os_file_get_last_error. */ + +bool +os_file_close( +/*==========*/ + /* out: TRUE if success */ + os_file_t file) /* in, own: handle to a file */ +{ +#ifdef __WIN__ + BOOL ret; + + ut_a(file); + + ret = CloseHandle(file); + + if (ret) { + return(TRUE); + } + + return(FALSE); +#else + int ret; + + ret = close(file); + + if (ret == -1) { + return(FALSE); + } + + return(TRUE); +#endif +} + +/*************************************************************************** +Gets a file size. */ + +bool +os_file_get_size( +/*=============*/ + /* out: TRUE if success */ + os_file_t file, /* in: handle to a file */ + ulint* size, /* out: least significant 32 bits of file + size */ + ulint* size_high)/* out: most significant 32 bits of size */ +{ +#ifdef __WIN__ + DWORD high; + DWORD low; + + low = GetFileSize(file, &high); + + if ((low == 0xFFFFFFFF) && (GetLastError() != NO_ERROR)) { + return(FALSE); + } + + *size = low; + *size_high = high; + + return(TRUE); +#else + *size = (ulint) lseek(file, 0, SEEK_END); + *size_high = 0; + + return(TRUE); +#endif +} + +/*************************************************************************** +Sets a file size. This function can be used to extend or truncate a file. */ + +bool +os_file_set_size( +/*=============*/ + /* out: TRUE if success */ + char* name, /* in: name of the file or path as a + null-terminated string */ + os_file_t file, /* in: handle to a file */ + ulint size, /* in: least significant 32 bits of file + size */ + ulint size_high)/* in: most significant 32 bits of size */ +{ +#ifdef __WIN__ + DWORD high; + DWORD low; + DWORD ret; + BOOL ret2; + DWORD err; + bool retry; + +try_again: + low = size; + high = size_high; + + ret = SetFilePointer(file, low, &high, FILE_BEGIN); + + if (ret == 0xFFFFFFFF && GetLastError() != NO_ERROR) { + err = GetLastError(); + + goto error_handling; + } + + ret2 = SetEndOfFile(file); + + if (ret2) { + ret2 = os_file_flush(file); + } + + if (ret2) { + return(TRUE); + } +#else + ulint offset; + ulint n_bytes; + ulint low; + ssize_t ret; + bool retry; + ulint i; + byte buf[UNIV_PAGE_SIZE * 8]; + + /* Write buffer full of zeros */ + for (i = 0; i < UNIV_PAGE_SIZE * 8; i++) { + buf[i] = '\0'; + } + +try_again: + low = size; +#if (UNIV_WORD_SIZE == 8) + low = low + (size_high << 32); +#endif + while (offset < low) { + if (low - offset < UNIV_PAGE_SIZE * 8) { + n_bytes = low - offset; + } else { + n_bytes = UNIV_PAGE_SIZE * 8; + } + + ret = pwrite(file, buf, n_bytes, offset); + + if (ret != n_bytes) { + goto error_handling; + } + offset += n_bytes; + } + + ret = os_file_flush(file); + + if (ret) { + return(TRUE); + } +#endif + +error_handling: + retry = os_file_handle_error(file, name); + + if (retry) { + goto try_again; + } + + ut_error; +} + +/*************************************************************************** +Flushes the write buffers of a given file to the disk. */ + +bool +os_file_flush( +/*==========*/ + /* out: TRUE if success */ + os_file_t file) /* in, own: handle to a file */ +{ +#ifdef __WIN__ + BOOL ret; + + ut_a(file); + + ret = FlushFileBuffers(file); + + if (ret) { + return(TRUE); + } + + return(FALSE); +#else + int ret; + + ret = fsync(file); + + if (ret == 0) { + return(TRUE); + } + + return(FALSE); +#endif +} + + +#ifndef __WIN__ +/*********************************************************************** +Does a synchronous read operation in Posix. */ +static +ssize_t +os_file_pread( +/*==========*/ + /* out: number of bytes read, -1 if error */ + os_file_t file, /* in: handle to a file */ + void* buf, /* in: buffer where to read */ + ulint n, /* in: number of bytes to read */ + ulint offset) /* in: offset from where to read */ +{ +#ifdef HAVE_PREAD + return(pread(file, buf, n, (off_t) offset)); +#else + ssize_t ret; + ulint i; + + /* Protect the seek / read operation with a mutex */ + i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES; + + os_mutex_enter(os_file_seek_mutexes[i]); + + ret = lseek(file, (off_t) offset, 0); + + if (ret < 0) { + os_mutex_exit(os_file_seek_mutexes[i]); + + return(ret); + } + + ret = read(file, buf, n); + + os_mutex_exit(os_file_seek_mutexes[i]); + + return(ret); +#endif +} + +/*********************************************************************** +Does a synchronous write operation in Posix. */ +static +ssize_t +os_file_pwrite( +/*===========*/ + /* out: number of bytes written, -1 if error */ + os_file_t file, /* in: handle to a file */ + void* buf, /* in: buffer from where to write */ + ulint n, /* in: number of bytes to write */ + ulint offset) /* in: offset where to write */ +{ +#ifdef HAVE_PWRITE + return(pwrite(file, buf, n, (off_t) offset)); +#else + ssize_t ret; + ulint i; + + /* Protect the seek / write operation with a mutex */ + i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES; + + os_mutex_enter(os_file_seek_mutexes[i]); + + ret = lseek(file, (off_t) offset, 0); + + if (ret < 0) { + os_mutex_exit(os_file_seek_mutexes[i]); + + return(ret); + } + + ret = write(file, buf, n); + + os_mutex_exit(os_file_seek_mutexes[i]); + + return(ret); +#endif +} +#endif + +/*********************************************************************** +Requests a synchronous positioned read operation. */ + +bool +os_file_read( +/*=========*/ + /* out: TRUE if request was + successful, FALSE if fail */ + os_file_t file, /* in: handle to a file */ + void* buf, /* in: buffer where to read */ + ulint offset, /* in: least significant 32 bits of file + offset where to read */ + ulint offset_high, /* in: most significant 32 bits of + offset */ + ulint n) /* in: number of bytes to read */ +{ +#ifdef __WIN__ + BOOL ret; + DWORD len; + DWORD ret2; + DWORD err; + DWORD low; + DWORD high; + bool retry; + ulint i; + +try_again: + ut_ad(file); + ut_ad(buf); + ut_ad(n > 0); + + low = offset; + high = offset_high; + + /* Protect the seek / read operation with a mutex */ + i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES; + + os_mutex_enter(os_file_seek_mutexes[i]); + + ret2 = SetFilePointer(file, low, &high, FILE_BEGIN); + + if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) { + err = GetLastError(); + + os_mutex_exit(os_file_seek_mutexes[i]); + + goto error_handling; + } + + ret = ReadFile(file, buf, n, &len, NULL); + + os_mutex_exit(os_file_seek_mutexes[i]); + + if (ret && len == n) { + return(TRUE); + } + + err = GetLastError(); +#else + bool retry; + ssize_t ret; + ulint i; + +#if (UNIV_WORD_SIZE == 8) + offset = offset + (offset_high << 32); +#endif +try_again: + /* Protect the seek / read operation with a mutex */ + i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES; + + os_mutex_enter(os_file_seek_mutexes[i]); + + ret = os_file_pread(file, buf, n, (off_t) offset); + + if (ret == n) { + os_mutex_exit(os_file_seek_mutexes[i]); + + return(TRUE); + } +#endif +error_handling: + retry = os_file_handle_error(file, NULL); + + if (retry) { + goto try_again; + } + + ut_error; + + return(FALSE); +} + +/*********************************************************************** +Requests a synchronous write operation. */ + +bool +os_file_write( +/*==========*/ + /* out: TRUE if request was + successful, FALSE if fail */ + char* name, /* in: name of the file or path as a + null-terminated string */ + os_file_t file, /* in: handle to a file */ + void* buf, /* in: buffer from which to write */ + ulint offset, /* in: least significant 32 bits of file + offset where to write */ + ulint offset_high, /* in: most significant 32 bits of + offset */ + ulint n) /* in: number of bytes to write */ +{ +#ifdef __WIN__ + BOOL ret; + DWORD len; + DWORD ret2; + DWORD err; + DWORD low; + DWORD high; + bool retry; + ulint i; + +try_again: + ut_ad(file); + ut_ad(buf); + ut_ad(n > 0); + + low = offset; + high = offset_high; + + /* Protect the seek / write operation with a mutex */ + i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES; + + os_mutex_enter(os_file_seek_mutexes[i]); + + ret2 = SetFilePointer(file, low, &high, FILE_BEGIN); + + if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) { + err = GetLastError(); + + os_mutex_exit(os_file_seek_mutexes[i]); + + goto error_handling; + } + + ret = WriteFile(file, buf, n, &len, NULL); + + os_mutex_exit(os_file_seek_mutexes[i]); + + if (ret && len == n) { + return(TRUE); + } +#else + bool retry; + ssize_t ret; + +#if (UNIV_WORD_SIZE == 8) + offset = offset + (offset_high << 32); +#endif +try_again: + ret = pwrite(file, buf, n, (off_t) offset); + + if (ret == n) { + return(TRUE); + } +#endif + +error_handling: + retry = os_file_handle_error(file, name); + + if (retry) { + goto try_again; + } + + ut_error; + + return(FALSE); +} + +/******************************************************************** +Returns a pointer to the nth slot in the aio array. */ +static +os_aio_slot_t* +os_aio_array_get_nth_slot( +/*======================*/ + /* out: pointer to slot */ + os_aio_array_t* array, /* in: aio array */ + ulint index) /* in: index of the slot */ +{ + ut_a(index < array->n_slots); + + return((array->slots) + index); +} + +/**************************************************************************** +Creates an aio wait array. */ +static +os_aio_array_t* +os_aio_array_create( +/*================*/ + /* out, own: aio array */ + ulint n, /* in: maximum number of pending aio operations + allowed; n must be divisible by n_segments */ + ulint n_segments) /* in: number of segments in the aio array */ +{ + os_aio_array_t* array; + ulint i; + os_aio_slot_t* slot; +#ifdef WIN_ASYNC_IO + OVERLAPPED* over; +#endif + ut_a(n > 0); + ut_a(n_segments > 0); + ut_a(n % n_segments == 0); + + array = ut_malloc(sizeof(os_aio_array_t)); + + array->mutex = os_mutex_create(NULL); + array->not_full = os_event_create(NULL); + array->n_slots = n; + array->n_segments = n_segments; + array->n_reserved = 0; + array->slots = ut_malloc(n * sizeof(os_aio_slot_t)); + array->events = ut_malloc(n * sizeof(os_event_t)); + + for (i = 0; i < n; i++) { + slot = os_aio_array_get_nth_slot(array, i); + + slot->pos = i; + slot->reserved = FALSE; +#ifdef WIN_ASYNC_IO + over = &(slot->control); + + over->hEvent = os_event_create(NULL); + + *((array->events) + i) = over->hEvent; +#elif defined(POSIX_ASYNC_IO) + slot->ready = os_event_create(NULL); +#endif + } + + return(array); +} + +/**************************************************************************** +Initializes the asynchronous io system. Creates separate aio array for +non-ibuf read and write, a third aio array for the ibuf i/o, with just one +segment, two aio arrays for log reads and writes with one segment, and a +synchronous aio array of the specified size. The combined number of segments +in the three first aio arrays is the parameter n_segments given to the +function. The caller must create an i/o handler thread for each segment in +the four first arrays, but not for the sync aio array. */ + +void +os_aio_init( +/*========*/ + ulint n, /* in: maximum number of pending aio operations + allowed; n must be divisible by n_segments */ + ulint n_segments, /* in: combined number of segments in the four + first aio arrays; must be >= 4 */ + ulint n_slots_sync) /* in: number of slots in the sync aio array */ +{ + ulint n_read_segs; + ulint n_write_segs; + ulint n_per_seg; + ulint i; + + ut_ad(n % n_segments == 0); + ut_ad(n_segments >= 4); + + n_per_seg = n / n_segments; + n_write_segs = (n_segments - 2) / 2; + n_read_segs = n_segments - 2 - n_write_segs; + + os_aio_read_array = os_aio_array_create(n_read_segs * n_per_seg, + n_read_segs); + os_aio_write_array = os_aio_array_create(n_write_segs * n_per_seg, + n_write_segs); + os_aio_ibuf_array = os_aio_array_create(n_per_seg, 1); + + os_aio_log_array = os_aio_array_create(n_per_seg, 1); + + os_aio_sync_array = os_aio_array_create(n_slots_sync, 1); + + os_aio_n_segments = n_segments; + +#if !(defined(WIN_ASYNC_IO) || defined(POSIX_ASYNC_IO)) + os_aio_use_native_aio = FALSE; +#endif + os_aio_validate(); + + for (i = 0; i < OS_FILE_N_SEEK_MUTEXES; i++) { + os_file_seek_mutexes[i] = os_mutex_create(NULL); + } + + os_aio_segment_wait_events = ut_malloc(n_segments * sizeof(void*)); + + for (i = 0; i < n_segments; i++) { + os_aio_segment_wait_events[i] = os_event_create(NULL); + } +} + +/************************************************************************** +Calculates segment number for a slot. */ +static +ulint +os_aio_get_segment_no_from_slot( +/*============================*/ + /* out: segment number (which is the number + used by, for example, i/o-handler threads) */ + os_aio_array_t* array, /* in: aio wait array */ + os_aio_slot_t* slot) /* in: slot in this array */ +{ + ulint segment; + ulint seg_len; + + if (array == os_aio_ibuf_array) { + segment = 0; + + } else if (array == os_aio_log_array) { + segment = 1; + + } else if (array == os_aio_read_array) { + seg_len = os_aio_read_array->n_slots / + os_aio_read_array->n_segments; + + segment = 2 + slot->pos / seg_len; + } else { + ut_a(array == os_aio_write_array); + seg_len = os_aio_write_array->n_slots / + os_aio_write_array->n_segments; + + segment = os_aio_read_array->n_segments + 2 + + slot->pos / seg_len; + } + + return(segment); +} + +/************************************************************************** +Calculates local segment number and aio array from global segment number. */ +static +ulint +os_aio_get_array_and_local_segment( +/*===============================*/ + /* out: local segment number within + the aio array */ + os_aio_array_t** array, /* out: aio wait array */ + ulint global_segment)/* in: global segment number */ +{ + ulint segment; + + ut_a(global_segment < os_aio_n_segments); + + if (global_segment == 0) { + *array = os_aio_ibuf_array; + segment = 0; + + } else if (global_segment == 1) { + *array = os_aio_log_array; + segment = 0; + + } else if (global_segment < os_aio_read_array->n_segments + 2) { + *array = os_aio_read_array; + + segment = global_segment - 2; + } else { + *array = os_aio_write_array; + + segment = global_segment - (os_aio_read_array->n_segments + 2); + } + + return(segment); +} + +/*********************************************************************** +Gets an integer value designating a specified aio array. This is used +to give numbers to signals in Posix aio. */ +static +ulint +os_aio_get_array_no( +/*================*/ + os_aio_array_t* array) /* in: aio array */ +{ + if (array == os_aio_ibuf_array) { + + return(0); + + } else if (array == os_aio_log_array) { + + return(1); + + } else if (array == os_aio_read_array) { + + return(2); + } else if (array == os_aio_write_array) { + + return(3); + } else { + ut_a(0); + + return(0); + } +} + +/*********************************************************************** +Gets the aio array for its number. */ +static +os_aio_array_t* +os_aio_get_array_from_no( +/*=====================*/ + /* out: aio array */ + ulint n) /* in: array number */ +{ + if (n == 0) { + return(os_aio_ibuf_array); + } else if (n == 1) { + + return(os_aio_log_array); + } else if (n == 2) { + + return(os_aio_read_array); + } else if (n == 3) { + + return(os_aio_write_array); + } else { + ut_a(0); + + return(NULL); + } +} + +/*********************************************************************** +Requests for a slot in the aio array. If no slot is available, waits until +not_full-event becomes signaled. */ +static +os_aio_slot_t* +os_aio_array_reserve_slot( +/*======================*/ + /* out: pointer to slot */ + ulint type, /* in: OS_FILE_READ or OS_FILE_WRITE */ + os_aio_array_t* array, /* in: aio array */ + void* message1,/* in: message to be passed along with + the aio operation */ + void* message2,/* in: message to be passed along with + the aio operation */ + os_file_t file, /* in: file handle */ + char* name, /* in: name of the file or path as a + null-terminated string */ + void* buf, /* in: buffer where to read or from which + to write */ + ulint offset, /* in: least significant 32 bits of file + offset */ + ulint offset_high, /* in: most significant 32 bits of + offset */ + ulint len) /* in: length of the block to read or write */ +{ + os_aio_slot_t* slot; +#ifdef WIN_ASYNC_IO + OVERLAPPED* control; +#elif POSIX_ASYNC_IO + struct aiocb* control; + ulint type; +#endif + ulint i; +loop: + os_mutex_enter(array->mutex); + + if (array->n_reserved == array->n_slots) { + os_mutex_exit(array->mutex); + os_event_wait(array->not_full); + + goto loop; + } + + for (i = 0;; i++) { + slot = os_aio_array_get_nth_slot(array, i); + + if (slot->reserved == FALSE) { + break; + } + } + + array->n_reserved++; + + if (array->n_reserved == array->n_slots) { + os_event_reset(array->not_full); + } + + slot->reserved = TRUE; + slot->message1 = message1; + slot->message2 = message2; + slot->file = file; + slot->name = name; + slot->len = len; + slot->type = type; + slot->buf = buf; + slot->offset = offset; + slot->offset_high = offset_high; + slot->io_already_done = FALSE; + +#ifdef WIN_ASYNC_IO + control = &(slot->control); + control->Offset = (DWORD)offset; + control->OffsetHigh = (DWORD)offset_high; + os_event_reset(control->hEvent); + +#elif POSIX_ASYNC_IO + +#if (UNIV_WORD_SIZE == 8) + offset = offset + (offset_high << 32); +#else + ut_a(offset_high == 0); +#endif + control = &(slot->control); + control->aio_fildes = file; + control->aio_buf = buf; + control->aio_nbytes = len; + control->aio_offset = offset; + control->aio_reqprio = 0; + control->aio_sigevent.sigev_notify = SIGEV_SIGNAL; + control->aio_sigevent.signo = + SIGRTMAX + 1 + os_aio_get_array_no(array); + /* TODO: How to choose the signal numbers? */ + control->aio_sigevent.sigev_value.sival_ptr = slot; +#endif + os_mutex_exit(array->mutex); + + return(slot); +} + +/*********************************************************************** +Frees a slot in the aio array. */ +static +void +os_aio_array_free_slot( +/*===================*/ + os_aio_array_t* array, /* in: aio array */ + os_aio_slot_t* slot) /* in: pointer to slot */ +{ + ut_ad(array); + ut_ad(slot); + + os_mutex_enter(array->mutex); + + ut_ad(slot->reserved); + + slot->reserved = FALSE; + + array->n_reserved--; + + if (array->n_reserved == array->n_slots - 1) { + os_event_set(array->not_full); + } + +#ifdef WIN_ASYNC_IO + os_event_reset(slot->control.hEvent); +#endif + os_mutex_exit(array->mutex); +} + +/************************************************************************** +Wakes up a simulated aio i/o-handler thread if it has something to do. */ +static +void +os_aio_simulated_wake_handler_thread( +/*=================================*/ + ulint global_segment) /* in: the number of the segment in the aio + arrays */ +{ + os_aio_array_t* array; + ulint segment; + os_aio_slot_t* slot; + ulint n; + ulint i; + + ut_ad(!os_aio_use_native_aio); + + segment = os_aio_get_array_and_local_segment(&array, global_segment); + + n = array->n_slots / array->n_segments; + + /* Look through n slots after the segment * n'th slot */ + + os_mutex_enter(array->mutex); + + for (i = 0; i < n; i++) { + slot = os_aio_array_get_nth_slot(array, i + segment * n); + + if (slot->reserved) { + /* Found an i/o request */ + + break; + } + } + + os_mutex_exit(array->mutex); + + if (i < n) { + os_event_set(os_aio_segment_wait_events[global_segment]); + } +} + +/************************************************************************** +Wakes up simulated aio i/o-handler threads if they have something to do. */ + +void +os_aio_simulated_wake_handler_threads(void) +/*=======================================*/ +{ + ulint i; + + if (os_aio_use_native_aio) { + /* We do not use simulated aio: do nothing */ + + return; + } + + for (i = 0; i < os_aio_n_segments; i++) { + os_aio_simulated_wake_handler_thread(i); + } +} + +/*********************************************************************** +Requests an asynchronous i/o operation. */ + +bool +os_aio( +/*===*/ + /* out: TRUE if request was queued + successfully, FALSE if fail */ + ulint type, /* in: OS_FILE_READ or OS_FILE_WRITE */ + ulint mode, /* in: OS_AIO_NORMAL, ..., possibly ORed + to OS_AIO_SIMULATED_WAKE_LATER: the + last flag advises this function not to wake + i/o-handler threads, but the caller will + do the waking explicitly later, in this + way the caller can post several requests in + a batch; NOTE that the batch must not be + so big that it exhausts the slots in aio + arrays! NOTE that a simulated batch + may introduce hidden chances of deadlocks, + because i/os are not actually handled until + all have been posted: use with great + caution! */ + char* name, /* in: name of the file or path as a + null-terminated string */ + os_file_t file, /* in: handle to a file */ + void* buf, /* in: buffer where to read or from which + to write */ + ulint offset, /* in: least significant 32 bits of file + offset where to read or write */ + ulint offset_high, /* in: most significant 32 bits of + offset */ + ulint n, /* in: number of bytes to read or write */ + void* message1,/* in: messages for the aio handler (these + can be used to identify a completed aio + operation); if mode is OS_AIO_SYNC, these + are ignored */ + void* message2) +{ + os_aio_array_t* array; + os_aio_slot_t* slot; +#ifdef WIN_ASYNC_IO + BOOL ret = TRUE; + DWORD len = n; + void* dummy_mess1; + void* dummy_mess2; +#endif + ulint err = 0; + bool retry; + ulint wake_later; + + ut_ad(file); + ut_ad(buf); + ut_ad(n > 0); + ut_ad(n % OS_FILE_LOG_BLOCK_SIZE == 0); + ut_ad((ulint)buf % OS_FILE_LOG_BLOCK_SIZE == 0) + ut_ad(offset % OS_FILE_LOG_BLOCK_SIZE == 0); + ut_ad(os_aio_validate()); + + wake_later = mode & OS_AIO_SIMULATED_WAKE_LATER; + mode = mode & (~OS_AIO_SIMULATED_WAKE_LATER); + + if (mode == OS_AIO_SYNC +#ifdef WIN_ASYNC_IO + && !os_aio_use_native_aio +#endif + ) { + /* This is actually an ordinary synchronous read or write: + no need to use an i/o-handler thread. NOTE that if we use + Windows async i/o, Windows does not allow us to use + ordinary synchronous os_file_read etc. on the same file, + therefore we have built a special mechanism for synchronous + wait in the Windows case. */ + + if (type == OS_FILE_READ) { + return(os_file_read(file, buf, offset, offset_high, n)); + } + + ut_a(type == OS_FILE_WRITE); + + return(os_file_write(name, file, buf, offset, offset_high, n)); + } + +try_again: + if (mode == OS_AIO_NORMAL) { + if (type == OS_FILE_READ) { + array = os_aio_read_array; + } else { + array = os_aio_write_array; + } + } else if (mode == OS_AIO_IBUF) { + ut_ad(type == OS_FILE_READ); + + array = os_aio_ibuf_array; + } else if (mode == OS_AIO_LOG) { + + array = os_aio_log_array; + } else if (mode == OS_AIO_SYNC) { + array = os_aio_sync_array; + } else { + ut_error; + } + + slot = os_aio_array_reserve_slot(type, array, message1, message2, file, + name, buf, offset, offset_high, n); + if (type == OS_FILE_READ) { + if (os_aio_use_native_aio) { +#ifdef WIN_ASYNC_IO + ret = ReadFile(file, buf, (DWORD)n, &len, + &(slot->control)); +#elif defined(POSIX_ASYNC_IO) + err = (ulint) aio_read(&(slot->control)); +#endif + } else { + if (!wake_later) { + os_aio_simulated_wake_handler_thread( + os_aio_get_segment_no_from_slot(array, slot)); + } + } + } else if (type == OS_FILE_WRITE) { + if (os_aio_use_native_aio) { +#ifdef WIN_ASYNC_IO + ret = WriteFile(file, buf, (DWORD)n, &len, + &(slot->control)); +#elif defined(POSIX_ASYNC_IO) + err = (ulint) aio_write(&(slot->control)); +#endif + } else { + if (!wake_later) { + os_aio_simulated_wake_handler_thread( + os_aio_get_segment_no_from_slot(array, slot)); + } + } + } else { + ut_error; + } + +#ifdef WIN_ASYNC_IO + if ((ret && len == n) + || (!ret && GetLastError() == ERROR_IO_PENDING)) { + + /* aio was queued successfully! */ + + if (mode == OS_AIO_SYNC) { + /* We want a synchronous i/o operation on a file + where we also use async i/o: in Windows we must + use the same wait mechanism as for async i/o */ + + return(os_aio_windows_handle(ULINT_UNDEFINED, + slot->pos, + &dummy_mess1, &dummy_mess2)); + } + + return(TRUE); + } +#else + if (err == 0) { + /* aio was queued successfully! */ + + return(TRUE); + } +#endif + os_aio_array_free_slot(array, slot); + + retry = os_file_handle_error(file, name); + + if (retry) { + + goto try_again; + } + + ut_error; + + return(FALSE); +} + +#ifdef WIN_ASYNC_IO +/************************************************************************** +This function is only used in Windows asynchronous i/o. +Waits for an aio operation to complete. This function is used to wait the +for completed requests. The aio array of pending requests is divided +into segments. The thread specifies which segment or slot it wants to wait +for. NOTE: this function will also take care of freeing the aio slot, +therefore no other thread is allowed to do the freeing! */ + +bool +os_aio_windows_handle( +/*==================*/ + /* out: TRUE if the aio operation succeeded */ + ulint segment, /* in: the number of the segment in the aio + arrays to wait for; segment 0 is the ibuf + i/o thread, segment 1 the log i/o thread, + then follow the non-ibuf read threads, and as + the last are the non-ibuf write threads; if + this is ULINT_UNDEFINED, then it means that + sync aio is used, and this parameter is + ignored */ + ulint pos, /* this parameter is used only in sync aio: + wait for the aio slot at this position */ + void** message1, /* out: the messages passed with the aio + request; note that also in the case where + the aio operation failed, these output + parameters are valid and can be used to + restart the operation, for example */ + void** message2) +{ + os_aio_array_t* array; + os_aio_slot_t* slot; + ulint n; + ulint i; + bool ret_val; + ulint err; + BOOL ret; + DWORD len; + + if (segment == ULINT_UNDEFINED) { + array = os_aio_sync_array; + segment = 0; + } else { + segment = os_aio_get_array_and_local_segment(&array, segment); + } + + /* NOTE! We only access constant fields in os_aio_array. Therefore + we do not have to acquire the protecting mutex yet */ + + ut_ad(os_aio_validate()); + ut_ad(segment < array->n_segments); + + n = array->n_slots / array->n_segments; + + if (array == os_aio_sync_array) { + ut_ad(pos < array->n_slots); + os_event_wait(array->events[pos]); + i = pos; + } else { + i = os_event_wait_multiple(n, (array->events) + segment * n); + } + + os_mutex_enter(array->mutex); + + slot = os_aio_array_get_nth_slot(array, i + segment * n); + + ut_a(slot->reserved); + + ret = GetOverlappedResult(slot->file, &(slot->control), &len, TRUE); + + *message1 = slot->message1; + *message2 = slot->message2; + + if (ret && len == slot->len) { + ret_val = TRUE; + } else { + err = GetLastError(); + ut_error; + + ret_val = FALSE; + } + + os_mutex_exit(array->mutex); + + os_aio_array_free_slot(array, slot); + + return(ret_val); +} +#endif + +#ifdef POSIX_ASYNC_IO +/************************************************************************** +This function is only used in Posix asynchronous i/o. Waits for an aio +operation to complete. */ + +bool +os_aio_posix_handle( +/*================*/ + /* out: TRUE if the aio operation succeeded */ + ulint array_no, /* in: array number 0 - 3 */ + void** message1, /* out: the messages passed with the aio + request; note that also in the case where + the aio operation failed, these output + parameters are valid and can be used to + restart the operation, for example */ + void** message2) +{ + os_aio_array_t* array; + os_aio_slot_t* slot; + siginfo_t info; + sigset_t sigset; + int ret; + + sigemptyset(&sigset); + sigaddset(&sigset, SIGRTMAX + 1 + array_no); + + ret = sigwaitinfo(&sigset, &info); + + if (ret != SIGRTMAX + 1 + array_no) { + + ut_a(0); + + return(FALSE); + } + + array = os_aio_get_array_from_no(array_no); + + os_mutex_enter(array->mutex); + + slot = siginfo.si_value.sival_ptr; + + ut_a(slot->reserved); + + *message1 = slot->message1; + *message2 = slot->message2; + + os_mutex_exit(array->mutex); + + os_aio_array_free_slot(array, slot); + + return(TRUE); +} +#endif + +/************************************************************************** +Does simulated aio. This function should be called by an i/o-handler +thread. */ + +bool +os_aio_simulated_handle( +/*====================*/ + /* out: TRUE if the aio operation succeeded */ + ulint global_segment, /* in: the number of the segment in the aio + arrays to wait for; segment 0 is the ibuf + i/o thread, segment 1 the log i/o thread, + then follow the non-ibuf read threads, and as + the last are the non-ibuf write threads */ + void** message1, /* out: the messages passed with the aio + request; note that also in the case where + the aio operation failed, these output + parameters are valid and can be used to + restart the operation, for example */ + void** message2) +{ + os_aio_array_t* array; + ulint segment; + os_aio_slot_t* slot; + os_aio_slot_t* slot2; + os_aio_slot_t* consecutive_ios[OS_AIO_MERGE_N_CONSECUTIVE]; + ulint n_consecutive; + ulint total_len; + ulint offs; + byte* combined_buf; + bool ret; + ulint n; + ulint i; + + segment = os_aio_get_array_and_local_segment(&array, global_segment); + +restart: + /* NOTE! We only access constant fields in os_aio_array. Therefore + we do not have to acquire the protecting mutex yet */ + + ut_ad(os_aio_validate()); + ut_ad(segment < array->n_segments); + + n = array->n_slots / array->n_segments; + + /* Look through n slots after the segment * n'th slot */ + + os_mutex_enter(array->mutex); + + /* Check if there is a slot for which the i/o has already been + done */ + + for (i = 0; i < n; i++) { + slot = os_aio_array_get_nth_slot(array, i + segment * n); + + if (slot->reserved && slot->io_already_done) { + + goto slot_io_done; + } + } + + n_consecutive = 0; + + for (i = 0; i < n; i++) { + slot = os_aio_array_get_nth_slot(array, i + segment * n); + + if (slot->reserved) { + /* Found an i/o request */ + consecutive_ios[n_consecutive] = slot; + n_consecutive++; + + break; + } + } + + /* Check if there are several consecutive blocks to read or write */ + +consecutive_loop: + for (i = 0; i < n; i++) { + slot2 = os_aio_array_get_nth_slot(array, i + segment * n); + + if (slot2->reserved && slot2 != slot + && slot2->offset == slot->offset + slot->len + && slot->offset + slot->len > slot->offset /* check that + sum does not wrap over */ + && slot2->offset_high == slot->offset_high + && slot2->type == slot->type + && slot2->file == slot->file) { + + /* Found a consecutive i/o request */ + + consecutive_ios[n_consecutive] = slot2; + n_consecutive++; + + slot = slot2; + + if (n_consecutive < OS_AIO_MERGE_N_CONSECUTIVE) { + + goto consecutive_loop; + } else { + break; + } + } + } + + if (n_consecutive == 0) { + + /* No i/o requested at the moment */ + + goto wait_for_io; + } + + /* We have now collected n_consecutive i/o requests in the array; + allocate a single buffer which can hold all data, and perform the + i/o */ + + total_len = 0; + slot = consecutive_ios[0]; + + for (i = 0; i < n_consecutive; i++) { + total_len += consecutive_ios[i]->len; + } + + if (n_consecutive == 1) { + /* We can use the buffer of the i/o request */ + combined_buf = slot->buf; + } else { + combined_buf = ut_malloc(total_len); + + ut_a(combined_buf); + } + + /* We release the array mutex for the time of the i/o: NOTE that + this assumes that there is just one i/o-handler thread serving + a single segment of slots! */ + + os_mutex_exit(array->mutex); + + if (slot->type == OS_FILE_WRITE && n_consecutive > 1) { + /* Copy the buffers to the combined buffer */ + offs = 0; + + for (i = 0; i < n_consecutive; i++) { + + ut_memcpy(combined_buf + offs, consecutive_ios[i]->buf, + consecutive_ios[i]->len); + offs += consecutive_ios[i]->len; + } + } + + /* Do the i/o with ordinary, synchronous i/o functions: */ + if (slot->type == OS_FILE_WRITE) { + ret = os_file_write(slot->name, slot->file, combined_buf, + slot->offset, slot->offset_high, total_len); + } else { + ret = os_file_read(slot->file, combined_buf, + slot->offset, slot->offset_high, total_len); + } + + ut_a(ret); + + if (slot->type == OS_FILE_READ && n_consecutive > 1) { + /* Copy the combined buffer to individual buffers */ + offs = 0; + + for (i = 0; i < n_consecutive; i++) { + + ut_memcpy(consecutive_ios[i]->buf, combined_buf + offs, + consecutive_ios[i]->len); + offs += consecutive_ios[i]->len; + } + } + + if (n_consecutive > 1) { + ut_free(combined_buf); + } + + os_mutex_enter(array->mutex); + + /* Mark the i/os done in slots */ + + for (i = 0; i < n_consecutive; i++) { + consecutive_ios[i]->io_already_done = TRUE; + } + + /* We return the messages for the first slot now, and if there were + several slots, the messages will be returned with subsequent calls + of this function */ + +slot_io_done: + + ut_a(slot->reserved); + + *message1 = slot->message1; + *message2 = slot->message2; + + os_mutex_exit(array->mutex); + + os_aio_array_free_slot(array, slot); + + return(ret); + +wait_for_io: + /* We wait here until there again can be i/os in the segment + of this thread */ + + os_event_reset(os_aio_segment_wait_events[global_segment]); + + os_mutex_exit(array->mutex); + + os_event_wait(os_aio_segment_wait_events[global_segment]); + + goto restart; +} + +/************************************************************************** +Validates the consistency of an aio array. */ +static +bool +os_aio_array_validate( +/*==================*/ + /* out: TRUE if ok */ + os_aio_array_t* array) /* in: aio wait array */ +{ + os_aio_slot_t* slot; + ulint n_reserved = 0; + ulint i; + + ut_a(array); + + os_mutex_enter(array->mutex); + + ut_a(array->n_slots > 0); + ut_a(array->n_segments > 0); + + for (i = 0; i < array->n_slots; i++) { + slot = os_aio_array_get_nth_slot(array, i); + + if (slot->reserved) { + n_reserved++; + ut_a(slot->len > 0); + } + } + + ut_a(array->n_reserved == n_reserved); + + os_mutex_exit(array->mutex); + + return(TRUE); +} + +/************************************************************************** +Validates the consistency the aio system. */ + +bool +os_aio_validate(void) +/*=================*/ + /* out: TRUE if ok */ +{ + os_aio_array_validate(os_aio_read_array); + os_aio_array_validate(os_aio_write_array); + os_aio_array_validate(os_aio_ibuf_array); + os_aio_array_validate(os_aio_log_array); + os_aio_array_validate(os_aio_sync_array); + + return(TRUE); +} + +/************************************************************************** +Prints info of the aio arrays. */ + +void +os_aio_print(void) +/*==============*/ +{ + os_aio_array_t* array; + os_aio_slot_t* slot; + ulint n_reserved; + ulint i; + + array = os_aio_read_array; +loop: + ut_a(array); + + printf("INFO OF AN AIO ARRAY\n"); + + os_mutex_enter(array->mutex); + + ut_a(array->n_slots > 0); + ut_a(array->n_segments > 0); + + n_reserved = 0; + + for (i = 0; i < array->n_slots; i++) { + slot = os_aio_array_get_nth_slot(array, i); + + if (slot->reserved) { + n_reserved++; + printf("Reserved slot, messages %lx %lx\n", + slot->message1, slot->message2); + ut_a(slot->len > 0); + } + } + + ut_a(array->n_reserved == n_reserved); + + printf("Total of %lu reserved aio slots\n", n_reserved); + + os_mutex_exit(array->mutex); + + if (array == os_aio_read_array) { + array = os_aio_write_array; + + goto loop; + } + + if (array == os_aio_write_array) { + array = os_aio_ibuf_array; + + goto loop; + } + + if (array == os_aio_ibuf_array) { + array = os_aio_log_array; + + goto loop; + } + + if (array == os_aio_log_array) { + array = os_aio_sync_array; + + goto loop; + } +} + +/************************************************************************** +Checks that all slots in the system have been freed, that is, there are +no pending io operations. */ + +bool +os_aio_all_slots_free(void) +/*=======================*/ + /* out: TRUE if all free */ +{ + os_aio_array_t* array; + ulint n_res = 0; + + array = os_aio_read_array; + + os_mutex_enter(array->mutex); + + n_res += array->n_reserved; + + os_mutex_exit(array->mutex); + + array = os_aio_write_array; + + os_mutex_enter(array->mutex); + + n_res += array->n_reserved; + + os_mutex_exit(array->mutex); + + array = os_aio_ibuf_array; + + os_mutex_enter(array->mutex); + + n_res += array->n_reserved; + + os_mutex_exit(array->mutex); + + array = os_aio_log_array; + + os_mutex_enter(array->mutex); + + n_res += array->n_reserved; + + os_mutex_exit(array->mutex); + + array = os_aio_sync_array; + + os_mutex_enter(array->mutex); + + n_res += array->n_reserved; + + os_mutex_exit(array->mutex); + + if (n_res == 0) { + + return(TRUE); + } + + return(FALSE); +} diff --git a/innobase/os/os0proc.c b/innobase/os/os0proc.c new file mode 100644 index 00000000000..43a2db4d306 --- /dev/null +++ b/innobase/os/os0proc.c @@ -0,0 +1,150 @@ +/****************************************************** +The interface to the operating system +process control primitives + +(c) 1995 Innobase Oy + +Created 9/30/1995 Heikki Tuuri +*******************************************************/ + +#include "os0proc.h" +#ifdef UNIV_NONINL +#include "os0proc.ic" +#endif + +#ifdef __WIN__ +#include <windows.h> +#endif + +#include "ut0mem.h" + +/******************************************************************** +Allocates non-cacheable memory. */ + +void* +os_mem_alloc_nocache( +/*=================*/ + /* out: allocated memory */ + ulint n) /* in: number of bytes */ +{ +#ifdef __WIN__ + void* ptr; + + ptr = VirtualAlloc(NULL, n, MEM_COMMIT, + PAGE_READWRITE | PAGE_NOCACHE); + ut_a(ptr); + + return(ptr); +#else + return(ut_malloc(n)); +#endif +} + +#ifdef notdefined +/******************************************************************** +Creates a new process. */ + +ibool +os_process_create( +/*==============*/ + char* name, /* in: name of the executable to start + or its full path name */ + char* cmd, /* in: command line for the starting + process, or NULL if no command line + specified */ + os_process_t* proc, /* out: handle to the process */ + os_process_id_t* id) /* out: process id */ + +{ + BOOL ret; + PROCESS_INFORMATION pinfo; + STARTUPINFO sinfo; + + /* The following assignments are default for the startupinfo + structure */ + sinfo.cb = sizeof(STARTUPINFO); + sinfo.lpReserved = NULL; + sinfo.lpDesktop = NULL; + sinfo.cbReserved2 = 0; + sinfo.lpReserved = NULL; + + ret = CreateProcess(name, + cmd, + NULL, /* No security attributes */ + NULL, /* No thread security attrs */ + FALSE, /* Do not inherit handles */ + 0, /* No creation flags */ + NULL, /* No environment */ + NULL, /* Same current directory */ + &sinfo, + &pinfo); + + *proc = pinfo.hProcess; + *id = pinfo.dwProcessId; + + return(ret); +} + +/************************************************************************** +Exits a process. */ + +void +os_process_exit( +/*============*/ + ulint code) /* in: exit code */ +{ + ExitProcess((UINT)code); +} + +/************************************************************************** +Gets a process exit code. */ + +ibool +os_process_get_exit_code( +/*=====================*/ + /* out: TRUE if succeed, FALSE if fail */ + os_process_t proc, /* in: handle to the process */ + ulint* code) /* out: exit code */ +{ + DWORD ex_code; + BOOL ret; + + ret = GetExitCodeProcess(proc, &ex_code); + + *code = (ulint)ex_code; + + return(ret); +} +#endif /* notdedfined */ + +/******************************************************************** +Sets the priority boost for threads released from waiting within the current +process. */ + +void +os_process_set_priority_boost( +/*==========================*/ + ibool do_boost) /* in: TRUE if priority boost should be done, + FALSE if not */ +{ +#ifdef __WIN__ + ibool no_boost; + + if (do_boost) { + no_boost = FALSE; + } else { + no_boost = TRUE; + } + + ut_a(TRUE == 1); + +/* Does not do anything currently! + SetProcessPriorityBoost(GetCurrentProcess(), no_boost); +*/ + printf( + "Warning: process priority boost setting currently not functional!\n" + ); +#else + UT_NOT_USED(do_boost); +#endif +} diff --git a/innobase/os/os0shm.c b/innobase/os/os0shm.c new file mode 100644 index 00000000000..e03440cd4f4 --- /dev/null +++ b/innobase/os/os0shm.c @@ -0,0 +1,146 @@ +/****************************************************** +The interface to the operating system +shared memory primitives + +(c) 1995 Innobase Oy + +Created 9/23/1995 Heikki Tuuri +*******************************************************/ + +#include "os0shm.h" +#ifdef UNIV_NONINL +#include "os0shm.ic" +#endif + +#ifdef __WIN__ +#include "windows.h" + +typedef HANDLE os_shm_t; +#endif + +/******************************************************************** +Creates an area of shared memory. It can be named so that +different processes may access it in the same computer. +If an area with the same name already exists, returns +a handle to that area (where the size of the area is +not changed even if this call requests a different size). +To use the area, it first has to be mapped to the process +address space by os_shm_map. */ + +os_shm_t +os_shm_create( +/*==========*/ + /* out, own: handle to the shared + memory area, NULL if error */ + ulint size, /* in: area size < 4 GB */ + char* name) /* in: name of the area as a null-terminated + string */ +{ +#ifdef __WIN__ + os_shm_t shm; + + ut_a(name); + ut_a(size > 0); + ut_a(size < 0xFFFFFFFF); + + /* In Windows NT shared memory is created as a memory mapped + file */ + shm = CreateFileMapping((HANDLE)0xFFFFFFFF, /* use operating system + swap file as the backing + file */ + NULL, /* default security + descriptor */ + PAGE_READWRITE, /* allow reading and + writing */ + 0, /* size must be less + than 4 GB */ + (DWORD)size, + name); + return(shm); +#else + UT_NOT_USED(size); + UT_NOT_USED(name); + + return(NULL); +#endif +} + +/*************************************************************************** +Frees a shared memory area. The area can be freed only after it +has been unmapped in all the processes where it was mapped. */ + +ibool +os_shm_free( +/*========*/ + /* out: TRUE if success */ + os_shm_t shm) /* in, own: handle to a shared memory area */ +{ +#ifdef __WIN__ + + BOOL ret; + + ut_a(shm); + + ret = CloseHandle(shm); + + if (ret) { + return(TRUE); + } else { + return(FALSE); + } +#else + UT_NOT_USED(shm); +#endif +} + +/*************************************************************************** +Maps a shared memory area in the address space of a process. */ + +void* +os_shm_map( +/*=======*/ + /* out: address of the area, NULL if error */ + os_shm_t shm) /* in: handle to a shared memory area */ +{ +#ifdef __WIN__ + void* mem; + + ut_a(shm); + + mem = MapViewOfFile(shm, + FILE_MAP_ALL_ACCESS, /* read and write access + allowed */ + 0, /* map from start of */ + 0, /* area */ + 0); /* map the whole area */ + return(mem); +#else + UT_NOT_USED(shm); +#endif +} + +/*************************************************************************** +Unmaps a shared memory area from the address space of a process. */ + +ibool +os_shm_unmap( +/*=========*/ + /* out: TRUE if succeed */ + void* addr) /* in: address of the area */ +{ +#ifdef __WIN__ + BOOL ret; + + ut_a(addr); + + ret = UnmapViewOfFile(addr); + + if (ret) { + return(TRUE); + } else { + return(FALSE); + } +#else + UT_NOT_USED(addr); +#endif +} diff --git a/innobase/os/os0sync.c b/innobase/os/os0sync.c new file mode 100644 index 00000000000..1647dd982f3 --- /dev/null +++ b/innobase/os/os0sync.c @@ -0,0 +1,461 @@ +/****************************************************** +The interface to the operating system +synchronization primitives. + +(c) 1995 Innobase Oy + +Created 9/6/1995 Heikki Tuuri +*******************************************************/ + +#include "os0sync.h" +#ifdef UNIV_NONINL +#include "os0sync.ic" +#endif + +#ifdef __WIN__ +#include <windows.h> +#endif + +#include "ut0mem.h" + +/* Type definition for an operating system mutex struct */ +struct os_mutex_struct{ + void* handle; /* OS handle to mutex */ + ulint count; /* we use this counter to check + that the same thread does not + recursively lock the mutex: we + do not assume that the OS mutex + supports recursive locking, though + NT seems to do that */ +}; + +/************************************************************* +Creates an event semaphore, i.e., a semaphore which may +just have two states: signaled and nonsignaled. +The created event is manual reset: it must be reset +explicitly by calling sync_os_reset_event. */ + +os_event_t +os_event_create( +/*============*/ + /* out: the event handle */ + char* name) /* in: the name of the event, if NULL + the event is created without a name */ +{ +#ifdef __WIN__ + HANDLE event; + + event = CreateEvent(NULL, /* No security attributes */ + TRUE, /* Manual reset */ + FALSE, /* Initial state nonsignaled */ + name); + ut_a(event); + + return(event); +#else + os_event_t event; + + UT_NOT_USED(name); + + event = ut_malloc(sizeof(struct os_event_struct)); + + os_fast_mutex_init(&(event->os_mutex)); + os_fast_mutex_init(&(event->wait_mutex)); + + event->is_set = TRUE; + + return(event); +#endif +} + +/************************************************************* +Creates an auto-reset event semaphore, i.e., an event +which is automatically reset when a single thread is +released. */ + +os_event_t +os_event_create_auto( +/*=================*/ + /* out: the event handle */ + char* name) /* in: the name of the event, if NULL + the event is created without a name */ +{ +#ifdef __WIN__ + HANDLE event; + + event = CreateEvent(NULL, /* No security attributes */ + FALSE, /* Auto-reset */ + FALSE, /* Initial state nonsignaled */ + name); + ut_a(event); + + return(event); +#else + /* Does nothing in Posix because we do not need this with MySQL */ + + UT_NOT_USED(name); + + return(NULL); +#endif +} + +/************************************************************** +Sets an event semaphore to the signaled state: lets waiting threads +proceed. */ + +void +os_event_set( +/*=========*/ + os_event_t event) /* in: event to set */ +{ +#ifdef __WIN__ + ut_a(event); + ut_a(SetEvent(event)); +#else + ut_a(event); + + os_fast_mutex_lock(&(event->os_mutex)); + + if (event->is_set) { + /* Do nothing */ + } else { + os_fast_mutex_unlock(&(event->wait_mutex)); + event->is_set = TRUE; + } + + os_fast_mutex_unlock(&(event->os_mutex)); +#endif +} + +/************************************************************** +Resets an event semaphore to the nonsignaled state. Waiting threads will +stop to wait for the event. */ + +void +os_event_reset( +/*===========*/ + os_event_t event) /* in: event to reset */ +{ +#ifdef __WIN__ + ut_a(event); + + ut_a(ResetEvent(event)); +#else + ut_a(event); + + os_fast_mutex_lock(&(event->os_mutex)); + + if (!event->is_set) { + /* Do nothing */ + } else { + os_fast_mutex_lock(&(event->wait_mutex)); + event->is_set = FALSE; + } + + os_fast_mutex_unlock(&(event->os_mutex)); +#endif +} + +/************************************************************** +Frees an event object. */ + +void +os_event_free( +/*==========*/ + os_event_t event) /* in: event to free */ + +{ +#ifdef __WIN__ + ut_a(event); + + ut_a(CloseHandle(event)); +#else + ut_a(event); + + os_fast_mutex_free(&(event->os_mutex)); + os_fast_mutex_free(&(event->wait_mutex)); + + ut_free(event); +#endif +} + +/************************************************************** +Waits for an event object until it is in the signaled state. */ + +void +os_event_wait( +/*==========*/ + os_event_t event) /* in: event to wait */ +{ +#ifdef __WIN__ + DWORD err; + + ut_a(event); + + /* Specify an infinite time limit for waiting */ + err = WaitForSingleObject(event, INFINITE); + + ut_a(err == WAIT_OBJECT_0); +#else + os_fast_mutex_lock(&(event->wait_mutex)); + os_fast_mutex_unlock(&(event->wait_mutex)); +#endif +} + +/************************************************************** +Waits for an event object until it is in the signaled state or +a timeout is exceeded. */ + +ulint +os_event_wait_time( +/*===============*/ + /* out: 0 if success, OS_SYNC_TIME_EXCEEDED if + timeout was exceeded */ + os_event_t event, /* in: event to wait */ + ulint time) /* in: timeout in microseconds, or + OS_SYNC_INFINITE_TIME */ +{ +#ifdef __WIN__ + DWORD err; + + ut_a(event); + + if (time != OS_SYNC_INFINITE_TIME) { + err = WaitForSingleObject(event, time / 1000); + } else { + err = WaitForSingleObject(event, INFINITE); + } + + if (err == WAIT_OBJECT_0) { + + return(0); + } else if (err == WAIT_TIMEOUT) { + + return(OS_SYNC_TIME_EXCEEDED); + } else { + ut_error; + } +#else + UT_NOT_USED(time); + + /* In Posix this is just an ordinary, infinite wait */ + + os_event_wait(event); + + return(0); +#endif +} + +/************************************************************** +Waits for any event in an event array. Returns if even a single +one is signaled or becomes signaled. */ + +ulint +os_event_wait_multiple( +/*===================*/ + /* out: index of the event + which was signaled */ + ulint n, /* in: number of events in the + array */ + os_event_t* event_array) /* in: pointer to an array of event + handles */ +{ +#ifdef __WIN__ + DWORD index; + + ut_a(event_array); + ut_a(n > 0); + + index = WaitForMultipleObjects(n, + event_array, + FALSE, /* Wait for any 1 event */ + INFINITE); /* Infinite wait time + limit */ + ut_a(index >= WAIT_OBJECT_0); + ut_a(index < WAIT_OBJECT_0 + n); + + return(index - WAIT_OBJECT_0); +#else + ut_a(n == 0); + + /* In Posix we can only wait for a single event */ + + os_event_wait(*event_array); + + return(0); +#endif +} + +/************************************************************* +Creates an operating system mutex semaphore. +Because these are slow, the mutex semaphore of the database +itself (sync_mutex_t) should be used where possible. */ + +os_mutex_t +os_mutex_create( +/*============*/ + /* out: the mutex handle */ + char* name) /* in: the name of the mutex, if NULL + the mutex is created without a name */ +{ +#ifdef __WIN__ + HANDLE mutex; + os_mutex_t mutex_str; + + mutex = CreateMutex(NULL, /* No security attributes */ + FALSE, /* Initial state: no owner */ + name); + ut_a(mutex); + + mutex_str = ut_malloc(sizeof(os_mutex_str_t)); + + mutex_str->handle = mutex; + mutex_str->count = 0; + + return(mutex_str); +#else + os_fast_mutex_t* os_mutex; + os_mutex_t mutex_str; + + UT_NOT_USED(name); + + os_mutex = ut_malloc(sizeof(os_fast_mutex_t)); + + os_fast_mutex_init(os_mutex); + + mutex_str = ut_malloc(sizeof(os_mutex_str_t)); + + mutex_str->handle = os_mutex; + mutex_str->count = 0; + + return(mutex_str); +#endif +} + +/************************************************************** +Acquires ownership of a mutex semaphore. */ + +void +os_mutex_enter( +/*===========*/ + os_mutex_t mutex) /* in: mutex to acquire */ +{ +#ifdef __WIN__ + DWORD err; + + ut_a(mutex); + + /* Specify infinite time limit for waiting */ + err = WaitForSingleObject(mutex->handle, INFINITE); + + ut_a(err == WAIT_OBJECT_0); + + (mutex->count)++; + ut_a(mutex->count == 1); +#else + os_fast_mutex_lock(mutex->handle); + + (mutex->count)++; + + ut_a(mutex->count == 1); +#endif +} + +/************************************************************** +Releases ownership of a mutex. */ + +void +os_mutex_exit( +/*==========*/ + os_mutex_t mutex) /* in: mutex to release */ +{ +#ifdef __WIN__ + ut_a(mutex); + + ut_a(mutex->count == 1); + + (mutex->count)--; + + ut_a(ReleaseMutex(mutex->handle)); +#else + ut_a(mutex); + + ut_a(mutex->count == 1); + + (mutex->count)--; + + os_fast_mutex_unlock(mutex->handle); +#endif +} + +/************************************************************** +Frees a mutex object. */ + +void +os_mutex_free( +/*==========*/ + os_mutex_t mutex) /* in: mutex to free */ +{ +#ifdef __WIN__ + ut_a(mutex); + + ut_a(CloseHandle(mutex->handle)); + ut_free(mutex); +#else + os_fast_mutex_free(mutex->handle); + ut_free(mutex->handle); + ut_free(mutex); +#endif +} + +#ifndef _WIN32 +/************************************************************* +Initializes an operating system fast mutex semaphore. */ + +void +os_fast_mutex_init( +/*===============*/ + os_fast_mutex_t* fast_mutex) /* in: fast mutex */ +{ +#ifdef __WIN__ + ut_a(fast_mutex); + + InitializeCriticalSection((LPCRITICAL_SECTION) fast_mutex); +#else + pthread_mutex_init(fast_mutex, NULL); +#endif +} + +/************************************************************** +Acquires ownership of a fast mutex. */ + +void +os_fast_mutex_lock( +/*===============*/ + os_fast_mutex_t* fast_mutex) /* in: mutex to acquire */ +{ +#ifdef __WIN__ + EnterCriticalSection((LPCRITICAL_SECTION) fast_mutex); +#else + pthread_mutex_lock(fast_mutex); +#endif +} + +/************************************************************** +Frees a mutex object. */ + +void +os_fast_mutex_free( +/*===============*/ + os_fast_mutex_t* fast_mutex) /* in: mutex to free */ +{ +#ifdef __WIN__ + ut_a(fast_mutex); + + DeleteCriticalSection((LPCRITICAL_SECTION) fast_mutex); +#else + UT_NOT_USED(fast_mutex); + +#endif +} +#endif diff --git a/innobase/os/os0thread.c b/innobase/os/os0thread.c new file mode 100644 index 00000000000..a33613267ac --- /dev/null +++ b/innobase/os/os0thread.c @@ -0,0 +1,210 @@ +/****************************************************** +The interface to the operating system +process and thread control primitives + +(c) 1995 Innobase Oy + +Created 9/8/1995 Heikki Tuuri +*******************************************************/ + +#include "os0thread.h" +#ifdef UNIV_NONINL +#include "os0thread.ic" +#endif + +#ifdef __WIN__ +#include <windows.h> +#endif + +/********************************************************************* +Returns the thread identifier of current thread. */ + +os_thread_id_t +os_thread_get_curr_id(void) +/*=======================*/ +{ +#ifdef __WIN__ + return(GetCurrentThreadId()); +#else + return((os_thread_id_t) pthread_self()); +#endif +} + +/* Define a function pointer type to use in a typecast */ +typedef void* (*os_posix_f_t) (void*); + +/******************************************************************** +Creates a new thread of execution. The execution starts from +the function given. The start function takes a void* parameter +and returns an ulint. */ + +os_thread_t +os_thread_create( +/*=============*/ + /* out: handle to the thread */ + ulint (*start_f)(void*), /* in: pointer to function + from which to start */ + void* arg, /* in: argument to start + function */ + os_thread_id_t* thread_id) /* out: id of created + thread */ +{ +#ifdef __WIN__ + os_thread_t thread; + + thread = CreateThread(NULL, /* no security attributes */ + 0, /* default size stack */ + (LPTHREAD_START_ROUTINE)start_f, + arg, + 0, /* thread runs immediately */ + thread_id); + ut_a(thread); + + return(thread); +#else + int ret; + os_thread_t pthread; + + /* Note that below we cast the start function returning an integer + to a function returning a pointer: this may cause error + if the return value is used somewhere! */ + + ret = pthread_create(&pthread, NULL, (os_posix_f_t) start_f, arg); + + return(pthread); +#endif +} + +/********************************************************************* +Returns handle to the current thread. */ + +os_thread_t +os_thread_get_curr(void) +/*=======================*/ +{ +#ifdef __WIN__ + return(GetCurrentThread()); +#else + return(pthread_self()); +#endif +} + +/********************************************************************* +Converts a thread id to a ulint. */ + +ulint +os_thread_conv_id_to_ulint( +/*=======================*/ + /* out: converted to ulint */ + os_thread_id_t id) /* in: thread id */ +{ + return((ulint)id); +} + +/********************************************************************* +Advises the os to give up remainder of the thread's time slice. */ + +void +os_thread_yield(void) +/*=================*/ +{ +#ifdef __WIN__ + Sleep(0); +#else + os_thread_sleep(0); +#endif +} + +/********************************************************************* +The thread sleeps at least the time given in microseconds. */ + +void +os_thread_sleep( +/*============*/ + ulint tm) /* in: time in microseconds */ +{ +#ifdef __WIN__ + Sleep(tm / 1000); +#else + struct timeval t; + + t.tv_sec = 0; + t.tv_usec = tm; + + select(0, NULL, NULL, NULL, &t); +#endif +} + +/********************************************************************** +Sets a thread priority. */ + +void +os_thread_set_priority( +/*===================*/ + os_thread_t handle, /* in: OS handle to the thread */ + ulint pri) /* in: priority */ +{ +#ifdef __WIN__ + int os_pri; + + if (pri == OS_THREAD_PRIORITY_BACKGROUND) { + os_pri = THREAD_PRIORITY_BELOW_NORMAL; + } else if (pri == OS_THREAD_PRIORITY_NORMAL) { + os_pri = THREAD_PRIORITY_NORMAL; + } else if (pri == OS_THREAD_PRIORITY_ABOVE_NORMAL) { + os_pri = THREAD_PRIORITY_HIGHEST; + } else { + ut_error; + } + + ut_a(SetThreadPriority(handle, os_pri)); +#else + UT_NOT_USED(handle); + UT_NOT_USED(pri); +#endif +} + +/********************************************************************** +Gets a thread priority. */ + +ulint +os_thread_get_priority( +/*===================*/ + /* out: priority */ + os_thread_t handle) /* in: OS handle to the thread */ +{ +#ifdef __WIN__ + int os_pri; + ulint pri; + + os_pri = GetThreadPriority(handle); + + if (os_pri == THREAD_PRIORITY_BELOW_NORMAL) { + pri = OS_THREAD_PRIORITY_BACKGROUND; + } else if (os_pri == THREAD_PRIORITY_NORMAL) { + pri = OS_THREAD_PRIORITY_NORMAL; + } else if (os_pri == THREAD_PRIORITY_HIGHEST) { + pri = OS_THREAD_PRIORITY_ABOVE_NORMAL; + } else { + ut_error; + } + + return(pri); +#else + return(0); +#endif +} + +/********************************************************************** +Gets the last operating system error code for the calling thread. */ + +ulint +os_thread_get_last_error(void) +/*==========================*/ +{ +#ifdef __WIN__ + return(GetLastError()); +#else + return(0); +#endif +} diff --git a/innobase/os/os0trash.c b/innobase/os/os0trash.c new file mode 100644 index 00000000000..e896ac9f083 --- /dev/null +++ b/innobase/os/os0trash.c @@ -0,0 +1,43 @@ +/* Stores the old console mode when echo is turned off */ +ulint os_old_console_mode; + +/******************************************************************** +Turns off echo from console input. */ + +void +os_console_echo_off(void) +/*=====================*/ +{ + GetConsoleMode(stdio, &os_old_console_mode); + SetConsoleMode(stdio, ENABLE_PROCESSED_INPUT); +} + +/******************************************************************** +Turns on echo in console input. */ + +void +os_console_echo_on(void) +/*====================*/ +{ + SetConsoleMode(stdio, &os_old_console_mode); +} + +/******************************************************************** +Reads a character from the console. */ + +char +os_read_console(void) +/*=================*/ +{ + char input_char; + ulint n_chars; + + n_chars = 0; + + while (n_chars == 0) { + ReadConsole(stdio, &input_char, 1, &n_chars, NULL); + } + + return(input_char); +} + diff --git a/innobase/os/ts/makefile b/innobase/os/ts/makefile new file mode 100644 index 00000000000..0e145a14e7f --- /dev/null +++ b/innobase/os/ts/makefile @@ -0,0 +1,20 @@ + + + +include ..\..\makefile.i + +doall: tsos tsosaux + + +tsos: ..\os.lib tsos.c + $(CCOM) $(CFL) -I.. -I..\.. ..\..\ut.lib ..\..\sync.lib ..\..\mem.lib ..\os.lib tsos.c $(LFL) + +tsosaux: tsosaux.c + $(CCOM) $(CFL) -I.. -I..\.. ..\..\ut.lib ..\os.lib tsosaux.c $(LFL) + + + + + + + diff --git a/innobase/os/ts/tsos.c b/innobase/os/ts/tsos.c new file mode 100644 index 00000000000..ecc10b86d2f --- /dev/null +++ b/innobase/os/ts/tsos.c @@ -0,0 +1,793 @@ +/************************************************************************ +The test module for the operating system interface + +(c) 1995 Innobase Oy + +Created 9/27/1995 Heikki Tuuri +*************************************************************************/ + + +#include "../os0thread.h" +#include "../os0shm.h" +#include "../os0proc.h" +#include "../os0sync.h" +#include "../os0file.h" +#include "ut0ut.h" +#include "ut0mem.h" +#include "sync0sync.h" +#include "mem0mem.h" + +#define _WIN32_WINNT 0x0400 +#include "n:\program files\devstudio\vc\include\windows.h" +#include "n:\program files\devstudio\vc\include\winbase.h" + +ulint last_thr = 1; + +byte global_buf[4000000]; + +ulint* cache_buf; + +os_file_t file; +os_file_t file2; + +os_event_t gl_ready; + +mutex_t ios_mutex; +ulint ios; +ulint rnd = 9837497; + +/******************************************************************** +Start function for threads in test1. */ + +ulint +thread(void* arg) +/*==============*/ +{ + ulint i; + void* arg2; + ulint count = 0; + ulint n; + ulint rnd_loc; + byte local_buf[2000]; + + arg2 = arg; + + n = *((ulint*)arg); + +/* printf("Thread %lu started!\n", n); */ + + for (i = 0; i < 8000; i++) { + + rnd_loc = rnd; + rnd += 763482469; + + ut_memcpy(global_buf + (rnd_loc % 1500000) + 8200, local_buf, + 2000); + if (last_thr != n) { + count++; + last_thr = n; + } + + if (i % 32 == 0) { + os_thread_yield(); + } + } + + printf("Thread %lu exits: %lu thread switches noticed\n", n, count); + + return(0); +} + +/********************************************************************* +Test of the speed of wait for multiple events. */ + +void +testa1(void) +/*========*/ +{ + ulint i; + os_event_t arr[64]; + ulint tm, oldtm; + + printf("-------------------------------------------------\n"); + printf("TEST A1. Speed of waits\n"); + + for (i = 0; i < 64; i++) { + arr[i] = os_event_create(NULL); + ut_a(arr[i]); + } + + os_event_set(arr[1]); + + oldtm = ut_clock(); + + for (i = 0; i < 10000; i++) { + os_event_wait_multiple(4, arr); + } + + tm = ut_clock(); + + printf("Wall clock time for %lu multiple waits %lu millisecs\n", + i, tm - oldtm); + + oldtm = ut_clock(); + + for (i = 0; i < 10000; i++) { + os_event_wait(arr[1]); + } + + tm = ut_clock(); + + printf("Wall clock time for %lu single waits %lu millisecs\n", + i, tm - oldtm); + + + for (i = 0; i < 64; i++) { + os_event_free(arr[i]); + } +} + +/********************************************************************* +Test for threads. */ + +void +test1(void) +/*=======*/ +{ + os_thread_t thr[64]; + os_thread_id_t id[64]; + ulint n[64]; + ulint tm, oldtm; + ulint i, j; + + printf("-------------------------------------------\n"); + printf("OS-TEST 1. Test of thread switching through yield\n"); + + printf("Main thread %lu starts!\n", os_thread_get_curr_id()); + + for (j = 0; j < 2; j++) { + + oldtm = ut_clock(); + + for (i = 0; i < 64; i++) { + n[i] = i; + + thr[i] = os_thread_create(thread, n + i, id + i); +/* printf("Thread %lu created, id %lu \n", i, id[i]); */ + } + + for (i = 0; i < 64; i++) { + os_thread_wait(thr[i]); + } + + tm = ut_clock(); + printf("Wall clock time for test %lu milliseconds\n", tm - oldtm); + + oldtm = ut_clock(); + + for (i = 0; i < 64; i++) { + + thr[5] = os_thread_create(thread, n + 5, id + 5); + +/* printf("Thread created, id %lu \n", id[5]); */ + + os_thread_wait(thr[5]); + } + + tm = ut_clock(); + printf("Wall clock time for single thread test %lu milliseconds\n", + tm - oldtm); + } +} + +/********************************************************************* +Test for shared memory and process switching through yield. */ + +void +test2(void) +/*=======*/ +{ + os_shm_t shm; + ulint tm, oldtm; + ulint* pr_no; + ulint count; + ulint i; + bool ret; + os_process_t proc; + os_process_id_t proc_id; + + printf("-------------------------------------------\n"); + printf("OS-TEST 2. Test of process switching through yield\n"); + + shm = os_shm_create(1000, "TSOS_SHM"); + + pr_no = os_shm_map(shm); + + *pr_no = 1; + count = 0; + + ret = os_process_create("tsosaux.exe", NULL, &proc, &proc_id); + + printf("Last error: %lu\n", os_thread_get_last_error()); + + ut_a(ret); + + printf("Process 1 starts test!\n"); + + oldtm = ut_clock(); + + for (i = 0; i < 500000; i++) { + if (*pr_no != 1) { + count++; + *pr_no = 1; + } + + os_thread_yield(); + } + + tm = ut_clock(); + + printf("Process 1 finishes test: %lu process switches noticed\n", + count); + + printf("Wall clock time for test %lu milliseconds\n", tm - oldtm); + + os_shm_unmap(shm); + + os_shm_free(shm); +} + +#ifdef notdefined + +/********************************************************************* +Test for asynchronous file io. */ + +void +test3(void) +/*=======*/ +{ + ulint i; + ulint j; + void* mess; + bool ret; + void* buf; + ulint rnd; + ulint addr[64]; + ulint serv[64]; + ulint tm, oldtm; + + printf("-------------------------------------------\n"); + printf("OS-TEST 3. Test of asynchronous file io\n"); + + /* Align the buffer for file io */ + + buf = (void*)(((ulint)global_buf + 6300) & (~0xFFF)); + + rnd = ut_time(); + + rnd = rnd * 3416133; + + printf("rnd seed %lu\n", rnd % 4900); + + oldtm = ut_clock(); + + for (i = 0; i < 32; i++) { + + ret = os_aio_read(file, buf, 8192 * (rnd % 4900), 0, + 8192, (void*)i); + ut_a(ret); + rnd += 1; + ret = os_aio_wait(0, &mess); + ut_a(ret); + } + + tm = ut_clock(); + printf("Wall clock time for synchr. io %lu milliseconds\n", + tm - oldtm); + + rnd = rnd * 3416133; + + printf("rnd seed %lu\n", rnd % 5000); + + oldtm = ut_clock(); + + for (j = 0; j < 5; j++) { + + rnd = rnd + 3416133; + + for (i = 0; i < 16; i++) { + ret = os_aio_read(file, buf, 8192 * (rnd % 5000), 0, 8192, + (void*)i); + addr[i] = rnd % 5000; + ut_a(ret); + rnd += 1; + } + + + for (i = 0; i < 16; i++) { + ret = os_aio_read(file, buf, 8192 * (rnd % 5000), 0, 8192, + (void*)i); + addr[i] = rnd % 5000; + ut_a(ret); + rnd += 1; + } + + rnd = rnd + 3416133; + + for (i = 0; i < 32; i++) { + ret = os_aio_wait(0, &mess); + ut_a(ret); + ut_a((ulint)mess < 64); + serv[(ulint)mess] = i; + } + } + tm = ut_clock(); + printf("Wall clock time for aio %lu milliseconds\n", tm - oldtm); + + rnd = rnd * 3416133; + + printf("rnd seed %lu\n", rnd % 4900); + + oldtm = ut_clock(); + +for (j = 0; j < 5; j++) { + + rnd = rnd + 3416133; + + for (i = 0; i < 1; i++) { + ret = os_aio_read(file, buf, 8192 * (rnd % 4900), 0, + 64 * 8192, (void*)i); + ut_a(ret); + rnd += 4; + ret = os_aio_wait(0, &mess); + ut_a(ret); + ut_a((ulint)mess < 64); + } +} + tm = ut_clock(); + printf("Wall clock time for synchr. io %lu milliseconds\n", + tm - oldtm); + + +/* + for (i = 0; i < 63; i++) { + printf("read %lu addr %lu served as %lu\n", + i, addr[i], serv[i]); + } +*/ + + ut_a(ret); +} + +/************************************************************************ +Io-handler thread function. */ + +ulint +handler_thread( +/*===========*/ + void* arg) +{ + ulint segment; + void* mess; + ulint i; + bool ret; + + segment = *((ulint*)arg); + + printf("Thread %lu starts\n", segment); + + for (i = 0;; i++) { + ret = os_aio_wait(segment, &mess); + + mutex_enter(&ios_mutex); + ios++; + mutex_exit(&ios_mutex); + + ut_a(ret); +/* printf("Message for thread %lu %lu\n", segment, + (ulint)mess); */ + if ((ulint)mess == 3333) { + os_event_set(gl_ready); + } + } + + return(0); +} + +/************************************************************************ +Test of io-handler threads */ + +void +test4(void) +/*=======*/ +{ + ulint i; + ulint j; + bool ret; + void* buf; + ulint rnd; + ulint tm, oldtm; + os_thread_t thr[5]; + os_thread_id_t id[5]; + ulint n[5]; + + printf("-------------------------------------------\n"); + printf("OS-TEST 4. Test of asynchronous file io\n"); + + /* Align the buffer for file io */ + + buf = (void*)(((ulint)global_buf + 6300) & (~0xFFF)); + + gl_ready = os_event_create(NULL); + ios = 0; + + sync_init(); + mem_init(); + + mutex_create(&ios_mutex); + + for (i = 0; i < 5; i++) { + n[i] = i; + + thr[i] = os_thread_create(handler_thread, n + i, id + i); + } + + rnd = 0; + + oldtm = ut_clock(); + +for (j = 0; j < 128; j++) { + + + for (i = 0; i < 32; i++) { + ret = os_aio_read(file, (byte*)buf + 8192 * (rnd % 100), + 8192 * (rnd % 4096), 0, + 8192, (void*)i); + ut_a(ret); + rnd += 1; + } + +/* + rnd += 67475941; + + for (i = 0; i < 1; i++) { + ret = os_aio_read(file2, buf, 8192 * (rnd % 5000), 0, + 8192, (void*)i); + ut_a(ret); + rnd += 1; + } +*/ +} + ret = os_aio_read(file, buf, 8192 * (rnd % 4096), 0, 8192, + (void*)3333); + ut_a(ret); + + ut_a(!os_aio_all_slots_free()); +/* + printf("Starting flush!\n"); + ret = os_file_flush(file); + ut_a(ret); + printf("Ending flush!\n"); +*/ + tm = ut_clock(); + + printf("All ios queued! N ios: %lu\n", ios); + + printf("Wall clock time for test %lu milliseconds\n", tm - oldtm); + + os_event_wait(gl_ready); + + tm = ut_clock(); + printf("N ios: %lu\n", ios); + printf("Wall clock time for test %lu milliseconds\n", tm - oldtm); + + os_thread_sleep(2000000); + + printf("N ios: %lu\n", ios); + + ut_a(os_aio_all_slots_free()); +} + +/************************************************************************* +Initializes the asyncronous io system for tests. */ + +void +init_aio(void) +/*==========*/ +{ + bool ret; + ulint i; + void* buf; + void* mess; + + buf = (void*)(((ulint)global_buf + 6300) & (~0xFFF)); + + os_aio_init(160, 5); + file = os_file_create("j:\\tsfile2", OS_FILE_CREATE, OS_FILE_TABLESPACE, + &ret); + + if (ret == FALSE) { + ut_a(os_file_get_last_error() == OS_FILE_ALREADY_EXISTS); + + file = os_file_create("j:\\tsfile2", OS_FILE_OPEN, + OS_FILE_TABLESPACE, &ret); + + ut_a(ret); + } else { + + for (i = 0; i < 4100; i++) { + ret = os_aio_write(file, buf, 8192 * i, 0, 8192, NULL); + ut_a(ret); + + ret = os_aio_wait(0, &mess); + + ut_a(ret); + ut_a(mess == NULL); + } + } + + file2 = os_file_create("F:\\tmp\\tsfile", OS_FILE_CREATE, + OS_FILE_TABLESPACE, + &ret); + + if (ret == FALSE) { + ut_a(os_file_get_last_error() == OS_FILE_ALREADY_EXISTS); + + file2 = os_file_create("F:\\tmp\\tsfile", OS_FILE_OPEN, + OS_FILE_TABLESPACE, &ret); + + ut_a(ret); + } else { + + for (i = 0; i < 5000; i++) { + ret = os_aio_write(file2, buf, 8192 * i, 0, 8192, NULL); + ut_a(ret); + + ret = os_aio_wait(0, &mess); + + ut_a(ret); + ut_a(mess == NULL); + } + } +} + +/************************************************************************ +Test of synchronous io */ + +void +test5(void) +/*=======*/ +{ + ulint i, j, k; + bool ret; + void* buf; + ulint rnd = 0; + ulint tm = 0; + ulint oldtm = 0; + os_file_t files[1000]; + char name[5]; + ulint err; + + printf("-------------------------------------------\n"); + printf("OS-TEST 5. Test of creating and opening of many files\n"); + + /* Align the buffer for file io */ + + buf = (void*)(((ulint)global_buf + 6300) & (~0xFFF)); + + name[2] = '.'; + name[3] = 'd'; + name[4] = '\0'; + + oldtm = ut_clock(); + + for (j = 0; j < 20; j++) { + for (i = 0; i < 20; i++) { + name[0] = (char)(i + (ulint)'A'); + name[1] = (char)(j + (ulint)'A'); + files[j * 20 + i] = os_file_create(name, OS_FILE_CREATE, + OS_FILE_NORMAL, &ret); + if (!ret) { + err = os_file_get_last_error(); + } + ut_a(ret); + } + } + + for (k = 0; k < i * j; k++) { + ret = os_file_close(files[k]); + ut_a(ret); + } + + for (j = 0; j < 20; j++) { + for (i = 0; i < 20; i++) { + name[0] = (char)(i + (ulint)'A'); + name[1] = (char)(j + (ulint)'A'); + ret = os_file_delete(name); + ut_a(ret); + } + } + + tm = ut_clock(); + printf("Wall clock time for test %lu milliseconds\n", tm - oldtm); +} + +/************************************************************************ +Test of synchronous io */ + +void +test6(void) +/*=======*/ +{ + ulint i, j; + bool ret; + void* buf; + ulint rnd = 0; + ulint tm = 0; + ulint oldtm = 0; + os_file_t s_file; + + printf("-------------------------------------------\n"); + printf("OS-TEST 6. Test of synchronous io\n"); + + buf = (void*)(((ulint)global_buf + 6300) & (~0xFFF)); + + ret = os_file_close(file); + ut_a(ret); + + ret = os_file_close(file2); + ut_a(ret); + + s_file = os_file_create("tsfile", OS_FILE_OPEN, + OS_FILE_NORMAL, &ret); + if (!ret) { + printf("Error no %lu\n", os_file_get_last_error()); + } + + ut_a(ret); + + rnd = ut_time() * 6346353; + + oldtm = ut_clock(); + + for (j = 0; j < 100; j++) { + + rnd += 8072791; + + for (i = 0; i < 32; i++) { + ret = os_file_read(s_file, buf, 8192 * (rnd % 5000), 0, + 8192); + ut_a(ret); + rnd += 1; + } + } + + tm = ut_clock(); + + printf("Wall clock time for test %lu milliseconds\n", tm - oldtm); +} + +/************************************************************************ +Test of file size operations. */ + +void +test7(void) +/*=======*/ +{ + bool ret; + os_file_t f; + ulint len; + ulint high; + + printf("-------------------------------------------\n"); + printf("OS-TEST 7. Test of setting and getting file size\n"); + + + f = os_file_create("sizefile", OS_FILE_CREATE, OS_FILE_TABLESPACE, + &ret); + ut_a(ret); + + ret = os_file_get_size(f, &len, &high); + ut_a(ret); + + ut_a(len == 0); + ut_a(high == 0); + + ret = os_file_set_size(f, 5000000, 0); + ut_a(ret); + + ret = os_file_get_size(f, &len, &high); + ut_a(ret); + + ut_a(len == 5000000); + ut_a(high == 0); + + ret = os_file_set_size(f, 4000000, 0); + ut_a(ret); + + ret = os_file_get_size(f, &len, &high); + ut_a(ret); + + ut_a(len == 4000000); + ut_a(high == 0); + + ret = os_file_close(f); + ut_a(ret); + + ret = os_file_delete("sizefile"); + ut_a(ret); +} +#endif + +/************************************************************************ +Main test function. */ + +void +main(void) +/*======*/ +{ + ulint tm, oldtm; + ulint i; + CRITICAL_SECTION cs; + ulint sum; + ulint rnd; + + cache_buf = VirtualAlloc(NULL, 4 * 1024, MEM_COMMIT, + PAGE_READWRITE /* | PAGE_NOCACHE */); + oldtm = ut_clock(); + + sum = 0; + rnd = 0; + + for (i = 0; i < 1000000; i++) { + + sum += cache_buf[rnd * (16)]; + + rnd += 1; + + if (rnd > 7) { + rnd = 0; + } + } + + tm = ut_clock(); + + printf("Wall clock time for cache test %lu milliseconds\n", tm - oldtm); + + InterlockedExchange(&i, 5); + + InitializeCriticalSection(&cs); + + oldtm = ut_clock(); + + for (i = 0; i < 10000000; i++) { + + TryEnterCriticalSection(&cs); + + LeaveCriticalSection(&cs); + } + + tm = ut_clock(); + printf("Wall clock time for test %lu milliseconds\n", tm - oldtm); + + testa1(); + + test1(); + +/* test2(); */ + +/* init_aio(); */ +/* + test3(); +*/ +/* test4(); + + test5(); + + test6(); + + test7(); */ + + printf("TESTS COMPLETED SUCCESSFULLY!\n"); +} + diff --git a/innobase/os/ts/tsosaux.c b/innobase/os/ts/tsosaux.c new file mode 100644 index 00000000000..8f7780844e9 --- /dev/null +++ b/innobase/os/ts/tsosaux.c @@ -0,0 +1,83 @@ +/************************************************************************ +The test module for the operating system interface +Auxiliary executable run alongside tsos.exe to test +process switching speed + +(c) 1995 Innobase Oy + +Created 9/27/1995 Heikki Tuuri +*************************************************************************/ + + +#include "../os0thread.h" +#include "../os0shm.h" +#include "../os0proc.h" +#include "ut0ut.h" + +/********************************************************************* +Test for shared memory and process switching through yield. */ + +void +test2(void) +/*=======*/ +{ + os_shm_t shm; + ulint tm, oldtm; + ulint* pr_no; + ulint count; + ulint i; + + printf("-------------------------------------------\n"); + printf("OS-TEST 2. Test of process switching through yield\n"); + + + shm = os_shm_create(1000, "TSOS_SHM"); + + pr_no = os_shm_map(shm); + + count = 0; + + printf("Process 2 starts test!\n"); + + oldtm = ut_clock(); + + for (i = 0; i < 100000; i++) { + if (*pr_no != 2) { + count++; + *pr_no = 2; + } + os_thread_yield(); + } + + tm = ut_clock(); + + printf("Process 2 finishes test: %lu process switches noticed\n", + count); + + printf("Wall clock time for test %lu milliseconds\n", tm - oldtm); + + + os_shm_unmap(shm); + + os_shm_free(shm); +} + +/************************************************************************ +Main test function. */ + +void +main(void) +/*======*/ +{ + ulint tm, oldtm; + + oldtm = ut_clock(); + + test2(); + + tm = ut_clock(); + printf("Wall clock time for test %lu milliseconds\n", tm - oldtm); + printf("TESTS COMPLETED SUCCESSFULLY!\n"); + + os_process_exit(0); +} |