summaryrefslogtreecommitdiff
path: root/storage/innobase/os
diff options
context:
space:
mode:
authorGuilhem Bichot <guilhem@mysql.com>2009-08-04 13:25:19 +0200
committerGuilhem Bichot <guilhem@mysql.com>2009-08-04 13:25:19 +0200
commitb57e4dbd88671df86e2cf39aff5178976d710b64 (patch)
tree32be2bfec3ca062c65566c60ecf59b673d1f97e9 /storage/innobase/os
parent1a0c2153a036296785dcdfa7b5f4974515616e11 (diff)
parent94efc1c6b084ed531b513e70fb66e7b7a1186b56 (diff)
downloadmariadb-git-b57e4dbd88671df86e2cf39aff5178976d710b64.tar.gz
Creation of mysql-trunk = {summit + "Innodb plugin replacing the builtin"}:
bzr branch mysql-5.1-performance-version mysql-trunk # Summit cd mysql-trunk bzr merge mysql-5.1-innodb_plugin # which is 5.1 + Innodb plugin bzr rm innobase # remove the builtin Next step: build, test fixes.
Diffstat (limited to 'storage/innobase/os')
-rw-r--r--storage/innobase/os/os0file.c4550
-rw-r--r--storage/innobase/os/os0proc.c674
-rw-r--r--storage/innobase/os/os0sync.c753
-rw-r--r--storage/innobase/os/os0thread.c358
4 files changed, 0 insertions, 6335 deletions
diff --git a/storage/innobase/os/os0file.c b/storage/innobase/os/os0file.c
deleted file mode 100644
index 4a9d3334e7d..00000000000
--- a/storage/innobase/os/os0file.c
+++ /dev/null
@@ -1,4550 +0,0 @@
-/******************************************************
-The interface to the operating system file i/o primitives
-
-(c) 1995 Innobase Oy
-
-Created 10/21/1995 Heikki Tuuri
-*******************************************************/
-
-#include "os0file.h"
-#include "os0sync.h"
-#include "os0thread.h"
-#include "ut0mem.h"
-#include "srv0srv.h"
-#include "srv0start.h"
-#include "fil0fil.h"
-#include "buf0buf.h"
-
-#if defined(UNIV_HOTBACKUP) && defined(__WIN__)
-/* Add includes for the _stat() call to compile on Windows */
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <errno.h>
-#endif /* UNIV_HOTBACKUP */
-
-#ifdef POSIX_ASYNC_IO
-/* We assume in this case that the OS has standard Posix aio (at least SunOS
-2.6, HP-UX 11i and AIX 4.3 have) */
-
-#endif
-
-/* This specifies the file permissions InnoDB uses when it creates files in
-Unix; the value of os_innodb_umask is initialized in ha_innodb.cc to
-my_umask */
-
-#ifndef __WIN__
-ulint os_innodb_umask = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP;
-#else
-ulint os_innodb_umask = 0;
-#endif
-
-#ifdef UNIV_DO_FLUSH
-/* If the following is set to TRUE, we do not call os_file_flush in every
-os_file_write. We can set this TRUE when the doublewrite buffer is used. */
-ibool os_do_not_call_flush_at_each_write = FALSE;
-#else
-/* We do not call os_file_flush in every os_file_write. */
-#endif /* UNIV_DO_FLUSH */
-
-/* We use these mutexes to protect lseek + file i/o operation, if the
-OS does not provide an atomic pread or pwrite, or similar */
-#define OS_FILE_N_SEEK_MUTEXES 16
-os_mutex_t os_file_seek_mutexes[OS_FILE_N_SEEK_MUTEXES];
-
-/* In simulated aio, merge at most this many consecutive i/os */
-#define OS_AIO_MERGE_N_CONSECUTIVE 64
-
-/* If this flag is TRUE, then we will use the native aio of the
-OS (provided we compiled Innobase with it in), otherwise we will
-use simulated aio we build below with threads */
-
-ibool os_aio_use_native_aio = FALSE;
-
-ibool os_aio_print_debug = FALSE;
-
-/* State for the state of an IO request in simulated AIO.
- Protocol for simulated aio:
- client requests IO: find slot with reserved = FALSE. Add entry with
- status = OS_AIO_NOT_ISSUED.
- IO thread wakes: find adjacent slots with reserved = TRUE and status =
- OS_AIO_NOT_ISSUED. Change status for slots to
- OS_AIO_ISSUED.
- IO operation completes: set status for slots to OS_AIO_DONE. set status
- for the first slot to OS_AIO_CLAIMED and return
- result for that slot.
- When there are multiple read and write threads, they all compete to execute
- the requests in the array (os_aio_array_t). This avoids the need to load
- balance requests at the time the request is made at the cost of waking all
- threads when a request is available.
-*/
-typedef enum {
- OS_AIO_NOT_ISSUED, /* Available to be processed by an IO thread. */
- OS_AIO_ISSUED, /* Being processed by an IO thread. */
- OS_AIO_DONE, /* Request processed. */
- OS_AIO_CLAIMED /* Result being returned to client. */
-} os_aio_status;
-
-/* The aio array slot structure */
-typedef struct os_aio_slot_struct os_aio_slot_t;
-
-struct os_aio_slot_struct{
- ibool is_read; /* TRUE if a read operation */
- ulint pos; /* index of the slot in the aio
- array */
- ibool reserved; /* TRUE if this slot is reserved */
- os_aio_status status; /* Status for current request. Valid when reserved
- is TRUE. Used only in simulated aio. */
- time_t reservation_time;/* time when reserved */
- ulint len; /* length of the block to read or
- write */
- byte* buf; /* buffer used in i/o */
- ulint type; /* OS_FILE_READ or OS_FILE_WRITE */
- ulint offset; /* 32 low bits of file offset in
- bytes */
- ulint offset_high; /* 32 high bits of file offset */
- os_file_t file; /* file where to read or write */
- const char* name; /* file name or path */
- fil_node_t* message1; /* message which is given by the */
- void* message2; /* the requester of an aio operation
- and which can be used to identify
- which pending aio operation was
- completed */
-#ifdef WIN_ASYNC_IO
- os_event_t event; /* event object we need in the
- OVERLAPPED struct */
- OVERLAPPED control; /* Windows control block for the
- aio request */
-#elif defined(POSIX_ASYNC_IO)
- struct aiocb control; /* Posix control block for aio
- request */
-#endif
-};
-
-/* The aio array structure */
-typedef struct os_aio_array_struct os_aio_array_t;
-
-struct os_aio_array_struct{
- os_mutex_t mutex; /* the mutex protecting the aio array */
- os_event_t not_full; /* The event which is set to the signaled
- state when there is space in the aio
- outside the ibuf segment */
- os_event_t is_empty; /* The event which is set to the signaled
- state when there are no pending i/os
- in this array */
- ulint n_slots; /* Total number of slots in the aio array.
- This must be divisible by n_threads. */
- ulint n_reserved;/* Number of reserved slots in the
- aio array outside the ibuf segment */
- os_aio_slot_t* slots; /* Pointer to the slots in the array */
-#ifdef __WIN__
- os_native_event_t* native_events;
- /* Pointer to an array of OS native event
- handles where we copied the handles from
- slots, in the same order. This can be used
- in WaitForMultipleObjects; used only in
- Windows */
-#endif
-};
-
-/* Array of events used in simulated aio */
-os_event_t* os_aio_segment_wait_events = NULL;
-
-/* Number of threads for reading and writing. */
-ulint os_aio_read_threads = 0;
-ulint os_aio_write_threads = 0;
-
-/* Number for the first global segment for reading. */
-const ulint os_aio_first_read_segment = 2;
-
-/* Number for the first global segment for writing. Set to
-2 + os_aio_read_write_threads. */
-ulint os_aio_first_write_segment = 0;
-
-/* The aio arrays for non-ibuf i/o and ibuf i/o, as well as sync aio. These
-are NULL when the module has not yet been initialized. */
-static os_aio_array_t* os_aio_read_array = NULL;
-static os_aio_array_t* os_aio_write_array = NULL;
-static os_aio_array_t* os_aio_ibuf_array = NULL;
-static os_aio_array_t* os_aio_log_array = NULL;
-static os_aio_array_t* os_aio_sync_array = NULL;
-
-/* Per thread buffer used for merged IO requests. Used by
-os_aio_simulated_handle so that a buffer doesn't have to be allocated
-for each request. */
-static char* os_aio_thread_buffer[SRV_MAX_N_IO_THREADS];
-static ulint os_aio_thread_buffer_size[SRV_MAX_N_IO_THREADS];
-
-/* Count pages read and written per thread */
-static ulint os_aio_thread_io_reads[SRV_MAX_N_IO_THREADS];
-static ulint os_aio_thread_io_writes[SRV_MAX_N_IO_THREADS];
-
-/* Number of IO operations done. One request can be for N pages. */
-static ulint os_aio_thread_io_requests[SRV_MAX_N_IO_THREADS];
-
-/* usecs spent blocked on an IO request */
-static double os_aio_thread_io_wait[SRV_MAX_N_IO_THREADS];
-/* max usecs spent blocked on an IO request */
-static double os_aio_thread_max_io_wait[SRV_MAX_N_IO_THREADS];
-
-/* Number of IO global segments. An IO handler thread is created for each
-global segment, except for the segment associated with os_aio_sync_array.
-Several segments can be associated with os_aio_{read,write}_array. One
-segment is created for each of the other arrays. This is also the number
-of valid entries in srv_io_thread_reads, srv_io_thread_writes,
-srv_io_thread_op_info, srv_io_thread_function and os_aio_segment_wait_events. */
-static ulint os_aio_n_segments = ULINT_UNDEFINED;
-
-/* Set to TRUE to temporarily block reads from being scheduled while a batch
-of read requests is added to allow them to be merged by the IO handler thread
-if they are adjacent. Declared volatile because we don't want this to be
-read from a register in a loop when another thread may change the value in
-memory.
-*/
-static volatile ibool os_aio_recommend_sleep_for_read_threads = FALSE;
-
-ulint os_n_file_reads = 0;
-ulint os_bytes_read_since_printout = 0;
-ulint os_n_file_writes = 0;
-ulint os_n_fsyncs = 0;
-ulint os_n_file_reads_old = 0;
-ulint os_n_file_writes_old = 0;
-ulint os_n_fsyncs_old = 0;
-time_t os_last_printout;
-
-ibool os_has_said_disk_full = FALSE;
-
-/* The mutex protecting the following counts of pending I/O operations */
-static os_mutex_t os_file_count_mutex;
-ulint os_file_n_pending_preads = 0;
-ulint os_file_n_pending_pwrites = 0;
-ulint os_n_pending_writes = 0;
-ulint os_n_pending_reads = 0;
-
-static double time_usecs() {
- ulint sec, ms;
- if (ut_usectime(&sec, &ms))
- return 0;
- else
- return sec * 1000000.0 + ms;
-}
-
-/***************************************************************************
-Gets the operating system version. Currently works only on Windows. */
-
-ulint
-os_get_os_version(void)
-/*===================*/
- /* out: OS_WIN95, OS_WIN31, OS_WINNT, OS_WIN2000 */
-{
-#ifdef __WIN__
- OSVERSIONINFO os_info;
-
- os_info.dwOSVersionInfoSize = sizeof(OSVERSIONINFO);
-
- ut_a(GetVersionEx(&os_info));
-
- if (os_info.dwPlatformId == VER_PLATFORM_WIN32s) {
- return(OS_WIN31);
- } else if (os_info.dwPlatformId == VER_PLATFORM_WIN32_WINDOWS) {
- return(OS_WIN95);
- } else if (os_info.dwPlatformId == VER_PLATFORM_WIN32_NT) {
- if (os_info.dwMajorVersion <= 4) {
- return(OS_WINNT);
- } else {
- return(OS_WIN2000);
- }
- } else {
- ut_error;
- return(0);
- }
-#else
- ut_error;
-
- return(0);
-#endif
-}
-
-/***************************************************************************
-Retrieves the last error number if an error occurs in a file io function.
-The number should be retrieved before any other OS calls (because they may
-overwrite the error number). If the number is not known to this program,
-the OS error number + 100 is returned. */
-
-ulint
-os_file_get_last_error(
-/*===================*/
- /* out: error number, or OS error
- number + 100 */
- ibool report_all_errors) /* in: TRUE if we want an error message
- printed of all errors */
-{
- ulint err;
-
-#ifdef __WIN__
-
- err = (ulint) GetLastError();
-
- if (report_all_errors
- || (err != ERROR_DISK_FULL && err != ERROR_FILE_EXISTS)) {
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Operating system error number %lu"
- " in a file operation.\n", (ulong) err);
-
- if (err == ERROR_PATH_NOT_FOUND) {
- fprintf(stderr,
- "InnoDB: The error means the system"
- " cannot find the path specified.\n");
-
- if (srv_is_being_started) {
- fprintf(stderr,
- "InnoDB: If you are installing InnoDB,"
- " remember that you must create\n"
- "InnoDB: directories yourself, InnoDB"
- " does not create them.\n");
- }
- } else if (err == ERROR_ACCESS_DENIED) {
- fprintf(stderr,
- "InnoDB: The error means mysqld does not have"
- " the access rights to\n"
- "InnoDB: the directory. It may also be"
- " you have created a subdirectory\n"
- "InnoDB: of the same name as a data file.\n");
- } else if (err == ERROR_SHARING_VIOLATION
- || err == ERROR_LOCK_VIOLATION) {
- fprintf(stderr,
- "InnoDB: The error means that another program"
- " is using InnoDB's files.\n"
- "InnoDB: This might be a backup or antivirus"
- " software or another instance\n"
- "InnoDB: of MySQL."
- " Please close it to get rid of this error.\n");
- } else {
- fprintf(stderr,
- "InnoDB: Some operating system error numbers"
- " are described at\n"
- "InnoDB: "
- "http://dev.mysql.com/doc/refman/5.1/en/"
- "operating-system-error-codes.html\n");
- }
- }
-
- fflush(stderr);
-
- if (err == ERROR_FILE_NOT_FOUND) {
- return(OS_FILE_NOT_FOUND);
- } else if (err == ERROR_DISK_FULL) {
- return(OS_FILE_DISK_FULL);
- } else if (err == ERROR_FILE_EXISTS) {
- return(OS_FILE_ALREADY_EXISTS);
- } else if (err == ERROR_SHARING_VIOLATION
- || err == ERROR_LOCK_VIOLATION) {
- return(OS_FILE_SHARING_VIOLATION);
- } else {
- return(100 + err);
- }
-#else
- err = (ulint) errno;
-
- if (report_all_errors
- || (err != ENOSPC && err != EEXIST)) {
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Operating system error number %lu"
- " in a file operation.\n", (ulong) err);
-
- if (err == ENOENT) {
- fprintf(stderr,
- "InnoDB: The error means the system"
- " cannot find the path specified.\n");
-
- if (srv_is_being_started) {
- fprintf(stderr,
- "InnoDB: If you are installing InnoDB,"
- " remember that you must create\n"
- "InnoDB: directories yourself, InnoDB"
- " does not create them.\n");
- }
- } else if (err == EACCES) {
- fprintf(stderr,
- "InnoDB: The error means mysqld does not have"
- " the access rights to\n"
- "InnoDB: the directory.\n");
- } else {
- if (strerror((int)err) != NULL) {
- fprintf(stderr,
- "InnoDB: Error number %lu"
- " means '%s'.\n",
- err, strerror((int)err));
- }
-
- fprintf(stderr,
- "InnoDB: Some operating system"
- " error numbers are described at\n"
- "InnoDB: "
- "http://dev.mysql.com/doc/refman/5.1/en/"
- "operating-system-error-codes.html\n");
- }
- }
-
- fflush(stderr);
-
- if (err == ENOSPC) {
- return(OS_FILE_DISK_FULL);
-#ifdef POSIX_ASYNC_IO
- } else if (err == EAGAIN) {
- return(OS_FILE_AIO_RESOURCES_RESERVED);
-#endif
- } else if (err == ENOENT) {
- return(OS_FILE_NOT_FOUND);
- } else if (err == EEXIST) {
- return(OS_FILE_ALREADY_EXISTS);
- } else if (err == EXDEV || err == ENOTDIR || err == EISDIR) {
- return(OS_FILE_PATH_ERROR);
- } else {
- return(100 + err);
- }
-#endif
-}
-
-/********************************************************************
-Does error handling when a file operation fails.
-Conditionally exits (calling exit(3)) based on should_exit value and the
-error type */
-
-static
-ibool
-os_file_handle_error_cond_exit(
-/*===========================*/
- /* out: TRUE if we should retry the
- operation */
- const char* name, /* in: name of a file or NULL */
- const char* operation, /* in: operation */
- ibool should_exit) /* in: call exit(3) if unknown error
- and this parameter is TRUE */
-{
- ulint err;
-
- err = os_file_get_last_error(FALSE);
-
- if (err == OS_FILE_DISK_FULL) {
- /* We only print a warning about disk full once */
-
- if (os_has_said_disk_full) {
-
- return(FALSE);
- }
-
- if (name) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Encountered a problem with"
- " file %s\n", name);
- }
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Disk is full. Try to clean the disk"
- " to free space.\n");
-
- os_has_said_disk_full = TRUE;
-
- fflush(stderr);
-
- return(FALSE);
- } else if (err == OS_FILE_AIO_RESOURCES_RESERVED) {
-
- return(TRUE);
- } else if (err == OS_FILE_ALREADY_EXISTS
- || err == OS_FILE_PATH_ERROR) {
-
- return(FALSE);
- } else if (err == OS_FILE_SHARING_VIOLATION) {
-
- os_thread_sleep(10000000); /* 10 sec */
- return(TRUE);
- } else {
- if (name) {
- fprintf(stderr, "InnoDB: File name %s\n", name);
- }
-
- fprintf(stderr, "InnoDB: File operation call: '%s'.\n",
- operation);
-
- if (should_exit) {
- fprintf(stderr, "InnoDB: Cannot continue operation.\n");
-
- fflush(stderr);
-
- exit(1);
- }
- }
-
- return(FALSE);
-}
-
-/********************************************************************
-Does error handling when a file operation fails. */
-static
-ibool
-os_file_handle_error(
-/*=================*/
- /* out: TRUE if we should retry the
- operation */
- const char* name, /* in: name of a file or NULL */
- const char* operation)/* in: operation */
-{
- /* exit in case of unknown error */
- return(os_file_handle_error_cond_exit(name, operation, TRUE));
-}
-
-/********************************************************************
-Does error handling when a file operation fails. */
-static
-ibool
-os_file_handle_error_no_exit(
-/*=========================*/
- /* out: TRUE if we should retry the
- operation */
- const char* name, /* in: name of a file or NULL */
- const char* operation)/* in: operation */
-{
- /* don't exit in case of unknown error */
- return(os_file_handle_error_cond_exit(name, operation, FALSE));
-}
-
-#undef USE_FILE_LOCK
-#define USE_FILE_LOCK
-#if defined(UNIV_HOTBACKUP) || defined(__WIN__) || defined(__NETWARE__)
-/* InnoDB Hot Backup does not lock the data files.
- * On Windows, mandatory locking is used.
- */
-# undef USE_FILE_LOCK
-#endif
-#ifdef USE_FILE_LOCK
-/********************************************************************
-Obtain an exclusive lock on a file. */
-static
-int
-os_file_lock(
-/*=========*/
- /* out: 0 on success */
- int fd, /* in: file descriptor */
- const char* name) /* in: file name */
-{
- struct flock lk;
- lk.l_type = F_WRLCK;
- lk.l_whence = SEEK_SET;
- lk.l_start = lk.l_len = 0;
- if (fcntl(fd, F_SETLK, &lk) == -1) {
- fprintf(stderr,
- "InnoDB: Unable to lock %s, error: %d\n", name, errno);
-
- if (errno == EAGAIN || errno == EACCES) {
- fprintf(stderr,
- "InnoDB: Check that you do not already have"
- " another mysqld process\n"
- "InnoDB: using the same InnoDB data"
- " or log files.\n");
- }
-
- return(-1);
- }
-
- return(0);
-}
-#endif /* USE_FILE_LOCK */
-
-/********************************************************************
-Creates the seek mutexes used in positioned reads and writes. */
-
-void
-os_io_init_simple(void)
-/*===================*/
-{
- ulint i;
-
- os_file_count_mutex = os_mutex_create(NULL);
-
- for (i = 0; i < OS_FILE_N_SEEK_MUTEXES; i++) {
- os_file_seek_mutexes[i] = os_mutex_create(NULL);
- }
-}
-
-#if !defined(UNIV_HOTBACKUP) && !defined(__NETWARE__)
-/*************************************************************************
-Creates a temporary file that will be deleted on close.
-This function is defined in ha_innodb.cc. */
-
-int
-innobase_mysql_tmpfile(void);
-/*========================*/
- /* out: temporary file descriptor, or < 0 on error */
-#endif /* !UNIV_HOTBACKUP && !__NETWARE__ */
-
-/***************************************************************************
-Creates a temporary file. This function is like tmpfile(3), but
-the temporary file is created in the MySQL temporary directory.
-On Netware, this function is like tmpfile(3), because the C run-time
-library of Netware does not expose the delete-on-close flag. */
-
-FILE*
-os_file_create_tmpfile(void)
-/*========================*/
- /* out: temporary file handle, or NULL on error */
-{
-#ifdef UNIV_HOTBACKUP
- ut_error;
-
- return(NULL);
-#else
-# ifdef __NETWARE__
- FILE* file = tmpfile();
-# else /* __NETWARE__ */
- FILE* file = NULL;
- int fd = innobase_mysql_tmpfile();
-
- if (fd >= 0) {
- file = fdopen(fd, "w+b");
- }
-# endif /* __NETWARE__ */
-
- if (!file) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Error: unable to create temporary file;"
- " errno: %d\n", errno);
-# ifndef __NETWARE__
- if (fd >= 0) {
- close(fd);
- }
-# endif /* !__NETWARE__ */
- }
-
- return(file);
-#endif /* UNIV_HOTBACKUP */
-}
-
-/***************************************************************************
-The os_file_opendir() function opens a directory stream corresponding to the
-directory named by the dirname argument. The directory stream is positioned
-at the first entry. In both Unix and Windows we automatically skip the '.'
-and '..' items at the start of the directory listing. */
-
-os_file_dir_t
-os_file_opendir(
-/*============*/
- /* out: directory stream, NULL if
- error */
- const char* dirname, /* in: directory name; it must not
- contain a trailing '\' or '/' */
- ibool error_is_fatal) /* in: TRUE if we should treat an
- error as a fatal error; if we try to
- open symlinks then we do not wish a
- fatal error if it happens not to be
- a directory */
-{
- os_file_dir_t dir;
-#ifdef __WIN__
- LPWIN32_FIND_DATA lpFindFileData;
- char path[OS_FILE_MAX_PATH + 3];
-
- ut_a(strlen(dirname) < OS_FILE_MAX_PATH);
-
- strcpy(path, dirname);
- strcpy(path + strlen(path), "\\*");
-
- /* Note that in Windows opening the 'directory stream' also retrieves
- the first entry in the directory. Since it is '.', that is no problem,
- as we will skip over the '.' and '..' entries anyway. */
-
- lpFindFileData = ut_malloc(sizeof(WIN32_FIND_DATA));
-
- dir = FindFirstFile((LPCTSTR) path, lpFindFileData);
-
- ut_free(lpFindFileData);
-
- if (dir == INVALID_HANDLE_VALUE) {
-
- if (error_is_fatal) {
- os_file_handle_error(dirname, "opendir");
- }
-
- return(NULL);
- }
-
- return(dir);
-#else
- dir = opendir(dirname);
-
- if (dir == NULL && error_is_fatal) {
- os_file_handle_error(dirname, "opendir");
- }
-
- return(dir);
-#endif
-}
-
-/***************************************************************************
-Closes a directory stream. */
-
-int
-os_file_closedir(
-/*=============*/
- /* out: 0 if success, -1 if failure */
- os_file_dir_t dir) /* in: directory stream */
-{
-#ifdef __WIN__
- BOOL ret;
-
- ret = FindClose(dir);
-
- if (!ret) {
- os_file_handle_error_no_exit(NULL, "closedir");
-
- return(-1);
- }
-
- return(0);
-#else
- int ret;
-
- ret = closedir(dir);
-
- if (ret) {
- os_file_handle_error_no_exit(NULL, "closedir");
- }
-
- return(ret);
-#endif
-}
-
-/***************************************************************************
-This function returns information of the next file in the directory. We jump
-over the '.' and '..' entries in the directory. */
-
-int
-os_file_readdir_next_file(
-/*======================*/
- /* out: 0 if ok, -1 if error, 1 if at the end
- of the directory */
- const char* dirname,/* in: directory name or path */
- os_file_dir_t dir, /* in: directory stream */
- os_file_stat_t* info) /* in/out: buffer where the info is returned */
-{
-#ifdef __WIN__
- LPWIN32_FIND_DATA lpFindFileData;
- BOOL ret;
-
- lpFindFileData = ut_malloc(sizeof(WIN32_FIND_DATA));
-next_file:
- ret = FindNextFile(dir, lpFindFileData);
-
- if (ret) {
- ut_a(strlen((char *) lpFindFileData->cFileName)
- < OS_FILE_MAX_PATH);
-
- if (strcmp((char *) lpFindFileData->cFileName, ".") == 0
- || strcmp((char *) lpFindFileData->cFileName, "..") == 0) {
-
- goto next_file;
- }
-
- strcpy(info->name, (char *) lpFindFileData->cFileName);
-
- info->size = (ib_longlong)(lpFindFileData->nFileSizeLow)
- + (((ib_longlong)(lpFindFileData->nFileSizeHigh))
- << 32);
-
- if (lpFindFileData->dwFileAttributes
- & FILE_ATTRIBUTE_REPARSE_POINT) {
- /* TODO: test Windows symlinks */
- /* TODO: MySQL has apparently its own symlink
- implementation in Windows, dbname.sym can
- redirect a database directory:
- http://dev.mysql.com/doc/refman/5.1/en/
- windows-symbolic-links.html */
- info->type = OS_FILE_TYPE_LINK;
- } else if (lpFindFileData->dwFileAttributes
- & FILE_ATTRIBUTE_DIRECTORY) {
- info->type = OS_FILE_TYPE_DIR;
- } else {
- /* It is probably safest to assume that all other
- file types are normal. Better to check them rather
- than blindly skip them. */
-
- info->type = OS_FILE_TYPE_FILE;
- }
- }
-
- ut_free(lpFindFileData);
-
- if (ret) {
- return(0);
- } else if (GetLastError() == ERROR_NO_MORE_FILES) {
-
- return(1);
- } else {
- os_file_handle_error_no_exit(dirname,
- "readdir_next_file");
- return(-1);
- }
-#else
- struct dirent* ent;
- char* full_path;
- int ret;
- struct stat statinfo;
-#ifdef HAVE_READDIR_R
- char dirent_buf[sizeof(struct dirent)
- + _POSIX_PATH_MAX + 100];
- /* In /mysys/my_lib.c, _POSIX_PATH_MAX + 1 is used as
- the max file name len; but in most standards, the
- length is NAME_MAX; we add 100 to be even safer */
-#endif
-
-next_file:
-
-#ifdef HAVE_READDIR_R
- ret = readdir_r(dir, (struct dirent*)dirent_buf, &ent);
-
- if (ret != 0) {
- fprintf(stderr,
- "InnoDB: cannot read directory %s, error %lu\n",
- dirname, (ulong)ret);
-
- return(-1);
- }
-
- if (ent == NULL) {
- /* End of directory */
-
- return(1);
- }
-
- ut_a(strlen(ent->d_name) < _POSIX_PATH_MAX + 100 - 1);
-#else
- ent = readdir(dir);
-
- if (ent == NULL) {
-
- return(1);
- }
-#endif
- ut_a(strlen(ent->d_name) < OS_FILE_MAX_PATH);
-
- if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0) {
-
- goto next_file;
- }
-
- strcpy(info->name, ent->d_name);
-
- full_path = ut_malloc(strlen(dirname) + strlen(ent->d_name) + 10);
-
- sprintf(full_path, "%s/%s", dirname, ent->d_name);
-
- ret = stat(full_path, &statinfo);
-
- if (ret) {
- os_file_handle_error_no_exit(full_path, "stat");
-
- ut_free(full_path);
-
- return(-1);
- }
-
- info->size = (ib_longlong)statinfo.st_size;
-
- if (S_ISDIR(statinfo.st_mode)) {
- info->type = OS_FILE_TYPE_DIR;
- } else if (S_ISLNK(statinfo.st_mode)) {
- info->type = OS_FILE_TYPE_LINK;
- } else if (S_ISREG(statinfo.st_mode)) {
- info->type = OS_FILE_TYPE_FILE;
- } else {
- info->type = OS_FILE_TYPE_UNKNOWN;
- }
-
- ut_free(full_path);
-
- return(0);
-#endif
-}
-
-/*********************************************************************
-This function attempts to create a directory named pathname. The new directory
-gets default permissions. On Unix the permissions are (0770 & ~umask). If the
-directory exists already, nothing is done and the call succeeds, unless the
-fail_if_exists arguments is true. */
-
-ibool
-os_file_create_directory(
-/*=====================*/
- /* out: TRUE if call succeeds,
- FALSE on error */
- const char* pathname, /* in: directory name as
- null-terminated string */
- ibool fail_if_exists) /* in: if TRUE, pre-existing directory
- is treated as an error. */
-{
-#ifdef __WIN__
- BOOL rcode;
-
- rcode = CreateDirectory((LPCTSTR) pathname, NULL);
- if (!(rcode != 0
- || (GetLastError() == ERROR_ALREADY_EXISTS
- && !fail_if_exists))) {
- /* failure */
- os_file_handle_error(pathname, "CreateDirectory");
-
- return(FALSE);
- }
-
- return (TRUE);
-#else
- int rcode;
-
- rcode = mkdir(pathname, 0770);
-
- if (!(rcode == 0 || (errno == EEXIST && !fail_if_exists))) {
- /* failure */
- os_file_handle_error(pathname, "mkdir");
-
- return(FALSE);
- }
-
- return (TRUE);
-#endif
-}
-
-/********************************************************************
-A simple function to open or create a file. */
-
-os_file_t
-os_file_create_simple(
-/*==================*/
- /* out, own: handle to the file, not defined
- if error, error number can be retrieved with
- os_file_get_last_error */
- const char* name, /* in: name of the file or path as a
- null-terminated string */
- ulint create_mode,/* in: OS_FILE_OPEN if an existing file is
- opened (if does not exist, error), or
- OS_FILE_CREATE if a new file is created
- (if exists, error), or
- OS_FILE_CREATE_PATH if new file
- (if exists, error) and subdirectories along
- its path are created (if needed)*/
- ulint access_type,/* in: OS_FILE_READ_ONLY or
- OS_FILE_READ_WRITE */
- ibool* success)/* out: TRUE if succeed, FALSE if error */
-{
-#ifdef __WIN__
- os_file_t file;
- DWORD create_flag;
- DWORD access;
- DWORD attributes = 0;
- ibool retry;
-
-try_again:
- ut_a(name);
-
- if (create_mode == OS_FILE_OPEN) {
- create_flag = OPEN_EXISTING;
- } else if (create_mode == OS_FILE_CREATE) {
- create_flag = CREATE_NEW;
- } else if (create_mode == OS_FILE_CREATE_PATH) {
- /* create subdirs along the path if needed */
- *success = os_file_create_subdirs_if_needed(name);
- if (!*success) {
- ut_error;
- }
- create_flag = CREATE_NEW;
- create_mode = OS_FILE_CREATE;
- } else {
- create_flag = 0;
- ut_error;
- }
-
- if (access_type == OS_FILE_READ_ONLY) {
- access = GENERIC_READ;
- } else if (access_type == OS_FILE_READ_WRITE) {
- access = GENERIC_READ | GENERIC_WRITE;
- } else {
- access = 0;
- ut_error;
- }
-
- file = CreateFile((LPCTSTR) name,
- access,
- FILE_SHARE_READ | FILE_SHARE_WRITE,
- /* file can be read and written also
- by other processes */
- NULL, /* default security attributes */
- create_flag,
- attributes,
- NULL); /* no template file */
-
- if (file == INVALID_HANDLE_VALUE) {
- *success = FALSE;
-
- retry = os_file_handle_error(name,
- create_mode == OS_FILE_OPEN ?
- "open" : "create");
- if (retry) {
- goto try_again;
- }
- } else {
- *success = TRUE;
- }
-
- return(file);
-#else /* __WIN__ */
- os_file_t file;
- int create_flag;
- ibool retry;
-
-try_again:
- ut_a(name);
-
- if (create_mode == OS_FILE_OPEN) {
- if (access_type == OS_FILE_READ_ONLY) {
- create_flag = O_RDONLY;
- } else {
- create_flag = O_RDWR;
- }
- } else if (create_mode == OS_FILE_CREATE) {
- create_flag = O_RDWR | O_CREAT | O_EXCL;
- } else if (create_mode == OS_FILE_CREATE_PATH) {
- /* create subdirs along the path if needed */
- *success = os_file_create_subdirs_if_needed(name);
- if (!*success) {
- return (-1);
- }
- create_flag = O_RDWR | O_CREAT | O_EXCL;
- create_mode = OS_FILE_CREATE;
- } else {
- create_flag = 0;
- ut_error;
- }
-
- if (create_mode == OS_FILE_CREATE) {
- file = open(name, create_flag, S_IRUSR | S_IWUSR
- | S_IRGRP | S_IWGRP);
- } else {
- file = open(name, create_flag);
- }
-
- if (file == -1) {
- *success = FALSE;
-
- retry = os_file_handle_error(name,
- create_mode == OS_FILE_OPEN ?
- "open" : "create");
- if (retry) {
- goto try_again;
- }
-#ifdef USE_FILE_LOCK
- } else if (access_type == OS_FILE_READ_WRITE
- && os_file_lock(file, name)) {
- *success = FALSE;
- close(file);
- file = -1;
-#endif
- } else {
- *success = TRUE;
- }
-
- return(file);
-#endif /* __WIN__ */
-}
-
-/********************************************************************
-A simple function to open or create a file. */
-
-os_file_t
-os_file_create_simple_no_error_handling(
-/*====================================*/
- /* out, own: handle to the file, not defined
- if error, error number can be retrieved with
- os_file_get_last_error */
- const char* name, /* in: name of the file or path as a
- null-terminated string */
- ulint create_mode,/* in: OS_FILE_OPEN if an existing file
- is opened (if does not exist, error), or
- OS_FILE_CREATE if a new file is created
- (if exists, error) */
- ulint access_type,/* in: OS_FILE_READ_ONLY,
- OS_FILE_READ_WRITE, or
- OS_FILE_READ_ALLOW_DELETE; the last option is
- used by a backup program reading the file */
- ibool* success)/* out: TRUE if succeed, FALSE if error */
-{
-#ifdef __WIN__
- os_file_t file;
- DWORD create_flag;
- DWORD access;
- DWORD attributes = 0;
- DWORD share_mode = FILE_SHARE_READ | FILE_SHARE_WRITE;
-
- ut_a(name);
-
- if (create_mode == OS_FILE_OPEN) {
- create_flag = OPEN_EXISTING;
- } else if (create_mode == OS_FILE_CREATE) {
- create_flag = CREATE_NEW;
- } else {
- create_flag = 0;
- ut_error;
- }
-
- if (access_type == OS_FILE_READ_ONLY) {
- access = GENERIC_READ;
- } else if (access_type == OS_FILE_READ_WRITE) {
- access = GENERIC_READ | GENERIC_WRITE;
- } else if (access_type == OS_FILE_READ_ALLOW_DELETE) {
- access = GENERIC_READ;
- share_mode = FILE_SHARE_DELETE | FILE_SHARE_READ
- | FILE_SHARE_WRITE; /* A backup program has to give
- mysqld the maximum freedom to
- do what it likes with the
- file */
- } else {
- access = 0;
- ut_error;
- }
-
- file = CreateFile((LPCTSTR) name,
- access,
- share_mode,
- NULL, /* default security attributes */
- create_flag,
- attributes,
- NULL); /* no template file */
-
- if (file == INVALID_HANDLE_VALUE) {
- *success = FALSE;
- } else {
- *success = TRUE;
- }
-
- return(file);
-#else /* __WIN__ */
- os_file_t file;
- int create_flag;
-
- ut_a(name);
-
- if (create_mode == OS_FILE_OPEN) {
- if (access_type == OS_FILE_READ_ONLY) {
- create_flag = O_RDONLY;
- } else {
- create_flag = O_RDWR;
- }
- } else if (create_mode == OS_FILE_CREATE) {
- create_flag = O_RDWR | O_CREAT | O_EXCL;
- } else {
- create_flag = 0;
- ut_error;
- }
-
- if (create_mode == OS_FILE_CREATE) {
- file = open(name, create_flag, S_IRUSR | S_IWUSR
- | S_IRGRP | S_IWGRP);
- } else {
- file = open(name, create_flag);
- }
-
- if (file == -1) {
- *success = FALSE;
-#ifdef USE_FILE_LOCK
- } else if (access_type == OS_FILE_READ_WRITE
- && os_file_lock(file, name)) {
- *success = FALSE;
- close(file);
- file = -1;
-#endif
- } else {
- *success = TRUE;
- }
-
- return(file);
-#endif /* __WIN__ */
-}
-
-/********************************************************************
-Tries to disable OS caching on an opened file descriptor. */
-
-void
-os_file_set_nocache(
-/*================*/
- int fd, /* in: file descriptor to alter */
- const char* file_name, /* in: used in the diagnostic message */
- const char* operation_name) /* in: used in the diagnostic message,
- we call os_file_set_nocache()
- immediately after opening or creating
- a file, so this is either "open" or
- "create" */
-{
- /* some versions of Solaris may not have DIRECTIO_ON */
-#if defined(UNIV_SOLARIS) && defined(DIRECTIO_ON)
- if (directio(fd, DIRECTIO_ON) == -1) {
- int errno_save;
- errno_save = (int)errno;
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Failed to set DIRECTIO_ON "
- "on file %s: %s: %s, continuing anyway\n",
- file_name, operation_name, strerror(errno_save));
- }
-#elif defined(O_DIRECT)
- if (fcntl(fd, F_SETFL, O_DIRECT) == -1) {
- int errno_save;
- errno_save = (int)errno;
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Failed to set O_DIRECT "
- "on file %s: %s: %s, continuing anyway\n",
- file_name, operation_name, strerror(errno_save));
- if (errno_save == EINVAL) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: O_DIRECT is known to result in "
- "'Invalid argument' on Linux on tmpfs, "
- "see MySQL Bug#26662\n");
- }
- }
-#endif
-}
-
-/********************************************************************
-Opens an existing file or creates a new. */
-
-os_file_t
-os_file_create(
-/*===========*/
- /* out, own: handle to the file, not defined
- if error, error number can be retrieved with
- os_file_get_last_error */
- const char* name, /* in: name of the file or path as a
- null-terminated string */
- ulint create_mode,/* in: OS_FILE_OPEN if an existing file
- is opened (if does not exist, error), or
- OS_FILE_CREATE if a new file is created
- (if exists, error),
- OS_FILE_OVERWRITE if a new file is created
- or an old overwritten;
- OS_FILE_OPEN_RAW, if a raw device or disk
- partition should be opened */
- ulint purpose,/* in: OS_FILE_AIO, if asynchronous,
- non-buffered i/o is desired,
- OS_FILE_NORMAL, if any normal file;
- NOTE that it also depends on type, os_aio_..
- and srv_.. variables whether we really use
- async i/o or unbuffered i/o: look in the
- function source code for the exact rules */
- ulint type, /* in: OS_DATA_FILE or OS_LOG_FILE */
- ibool* success)/* out: TRUE if succeed, FALSE if error */
-{
-#ifdef __WIN__
- os_file_t file;
- DWORD share_mode = FILE_SHARE_READ;
- DWORD create_flag;
- DWORD attributes;
- ibool retry;
-try_again:
- ut_a(name);
-
- if (create_mode == OS_FILE_OPEN_RAW) {
- create_flag = OPEN_EXISTING;
- share_mode = FILE_SHARE_WRITE;
- } else if (create_mode == OS_FILE_OPEN
- || create_mode == OS_FILE_OPEN_RETRY) {
- create_flag = OPEN_EXISTING;
- } else if (create_mode == OS_FILE_CREATE) {
- create_flag = CREATE_NEW;
- } else if (create_mode == OS_FILE_OVERWRITE) {
- create_flag = CREATE_ALWAYS;
- } else {
- create_flag = 0;
- ut_error;
- }
-
- if (purpose == OS_FILE_AIO) {
- /* If specified, use asynchronous (overlapped) io and no
- buffering of writes in the OS */
- attributes = 0;
-#ifdef WIN_ASYNC_IO
- if (os_aio_use_native_aio) {
- attributes = attributes | FILE_FLAG_OVERLAPPED;
- }
-#endif
-#ifdef UNIV_NON_BUFFERED_IO
- if (type == OS_LOG_FILE && srv_flush_log_at_trx_commit == 2) {
- /* Do not use unbuffered i/o to log files because
- value 2 denotes that we do not flush the log at every
- commit, but only once per second */
- } else if (srv_win_file_flush_method
- == SRV_WIN_IO_UNBUFFERED) {
- attributes = attributes | FILE_FLAG_NO_BUFFERING;
- }
-#endif
- } else if (purpose == OS_FILE_NORMAL) {
- attributes = 0;
-#ifdef UNIV_NON_BUFFERED_IO
- if (type == OS_LOG_FILE && srv_flush_log_at_trx_commit == 2) {
- /* Do not use unbuffered i/o to log files because
- value 2 denotes that we do not flush the log at every
- commit, but only once per second */
- } else if (srv_win_file_flush_method
- == SRV_WIN_IO_UNBUFFERED) {
- attributes = attributes | FILE_FLAG_NO_BUFFERING;
- }
-#endif
- } else {
- attributes = 0;
- ut_error;
- }
-
- file = CreateFile((LPCTSTR) name,
- GENERIC_READ | GENERIC_WRITE, /* read and write
- access */
- share_mode, /* File can be read also by other
- processes; we must give the read
- permission because of ibbackup. We do
- not give the write permission to
- others because if one would succeed to
- start 2 instances of mysqld on the
- SAME files, that could cause severe
- database corruption! When opening
- raw disk partitions, Microsoft manuals
- say that we must give also the write
- permission. */
- NULL, /* default security attributes */
- create_flag,
- attributes,
- NULL); /* no template file */
-
- if (file == INVALID_HANDLE_VALUE) {
- *success = FALSE;
-
- /* When srv_file_per_table is on, file creation failure may not
- be critical to the whole instance. Do not crash the server in
- case of unknown errors. */
- if (srv_file_per_table) {
- retry = os_file_handle_error_no_exit(name,
- create_mode == OS_FILE_CREATE ?
- "create" : "open");
- } else {
- retry = os_file_handle_error(name,
- create_mode == OS_FILE_CREATE ?
- "create" : "open");
- }
-
- if (retry) {
- goto try_again;
- }
- } else {
- *success = TRUE;
- }
-
- return(file);
-#else /* __WIN__ */
- os_file_t file;
- int create_flag;
- ibool retry;
- const char* mode_str = NULL;
- const char* type_str = NULL;
- const char* purpose_str = NULL;
-
-try_again:
- ut_a(name);
-
- if (create_mode == OS_FILE_OPEN || create_mode == OS_FILE_OPEN_RAW
- || create_mode == OS_FILE_OPEN_RETRY) {
- mode_str = "OPEN";
- create_flag = O_RDWR;
- } else if (create_mode == OS_FILE_CREATE) {
- mode_str = "CREATE";
- create_flag = O_RDWR | O_CREAT | O_EXCL;
- } else if (create_mode == OS_FILE_OVERWRITE) {
- mode_str = "OVERWRITE";
- create_flag = O_RDWR | O_CREAT | O_TRUNC;
- } else {
- create_flag = 0;
- ut_error;
- }
-
- if (type == OS_LOG_FILE) {
- type_str = "LOG";
- } else if (type == OS_DATA_FILE) {
- type_str = "DATA";
- } else {
- ut_error;
- }
-
- if (purpose == OS_FILE_AIO) {
- purpose_str = "AIO";
- } else if (purpose == OS_FILE_NORMAL) {
- purpose_str = "NORMAL";
- } else {
- ut_error;
- }
-
-#if 0
- fprintf(stderr, "Opening file %s, mode %s, type %s, purpose %s\n",
- name, mode_str, type_str, purpose_str);
-#endif
-#ifdef O_SYNC
- /* We let O_SYNC only affect log files; note that we map O_DSYNC to
- O_SYNC because the datasync options seemed to corrupt files in 2001
- in both Linux and Solaris */
- if (type == OS_LOG_FILE
- && srv_unix_file_flush_method == SRV_UNIX_O_DSYNC) {
-
-# if 0
- fprintf(stderr, "Using O_SYNC for file %s\n", name);
-# endif
-
- create_flag = create_flag | O_SYNC;
- }
-#endif /* O_SYNC */
-
- file = open(name, create_flag, os_innodb_umask);
-
- if (file == -1) {
- *success = FALSE;
-
- /* When srv_file_per_table is on, file creation failure may not
- be critical to the whole instance. Do not crash the server in
- case of unknown errors. */
- if (srv_file_per_table) {
- retry = os_file_handle_error_no_exit(name,
- create_mode == OS_FILE_CREATE ?
- "create" : "open");
- } else {
- retry = os_file_handle_error(name,
- create_mode == OS_FILE_CREATE ?
- "create" : "open");
- }
-
- if (retry) {
- goto try_again;
- } else {
- return(file /* -1 */);
- }
- }
- /* else */
-
- *success = TRUE;
-
- /* We disable OS caching (O_DIRECT) only on data files */
- if (type != OS_LOG_FILE
- && srv_unix_file_flush_method == SRV_UNIX_O_DIRECT) {
-
- os_file_set_nocache(file, name, mode_str);
- }
-
-#ifdef USE_FILE_LOCK
- if (create_mode != OS_FILE_OPEN_RAW && os_file_lock(file, name)) {
-
- if (create_mode == OS_FILE_OPEN_RETRY) {
- int i;
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Retrying to lock"
- " the first data file\n",
- stderr);
- for (i = 0; i < 100; i++) {
- os_thread_sleep(1000000);
- if (!os_file_lock(file, name)) {
- *success = TRUE;
- return(file);
- }
- }
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Unable to open the first data file\n",
- stderr);
- }
-
- *success = FALSE;
- close(file);
- file = -1;
- }
-#endif /* USE_FILE_LOCK */
-
- return(file);
-#endif /* __WIN__ */
-}
-
-/***************************************************************************
-Deletes a file if it exists. The file has to be closed before calling this. */
-
-ibool
-os_file_delete_if_exists(
-/*=====================*/
- /* out: TRUE if success */
- const char* name) /* in: file path as a null-terminated string */
-{
-#ifdef __WIN__
- BOOL ret;
- ulint count = 0;
-loop:
- /* In Windows, deleting an .ibd file may fail if ibbackup is copying
- it */
-
- ret = DeleteFile((LPCTSTR)name);
-
- if (ret) {
- return(TRUE);
- }
-
- if (GetLastError() == ERROR_FILE_NOT_FOUND) {
- /* the file does not exist, this not an error */
-
- return(TRUE);
- }
-
- count++;
-
- if (count > 100 && 0 == (count % 10)) {
- fprintf(stderr,
- "InnoDB: Warning: cannot delete file %s\n"
- "InnoDB: Are you running ibbackup"
- " to back up the file?\n", name);
-
- os_file_get_last_error(TRUE); /* print error information */
- }
-
- os_thread_sleep(1000000); /* sleep for a second */
-
- if (count > 2000) {
-
- return(FALSE);
- }
-
- goto loop;
-#else
- int ret;
-
- ret = unlink((const char*)name);
-
- if (ret != 0 && errno != ENOENT) {
- os_file_handle_error_no_exit(name, "delete");
-
- return(FALSE);
- }
-
- return(TRUE);
-#endif
-}
-
-/***************************************************************************
-Deletes a file. The file has to be closed before calling this. */
-
-ibool
-os_file_delete(
-/*===========*/
- /* out: TRUE if success */
- const char* name) /* in: file path as a null-terminated string */
-{
-#ifdef __WIN__
- BOOL ret;
- ulint count = 0;
-loop:
- /* In Windows, deleting an .ibd file may fail if ibbackup is copying
- it */
-
- ret = DeleteFile((LPCTSTR)name);
-
- if (ret) {
- return(TRUE);
- }
-
- if (GetLastError() == ERROR_FILE_NOT_FOUND) {
- /* If the file does not exist, we classify this as a 'mild'
- error and return */
-
- return(FALSE);
- }
-
- count++;
-
- if (count > 100 && 0 == (count % 10)) {
- fprintf(stderr,
- "InnoDB: Warning: cannot delete file %s\n"
- "InnoDB: Are you running ibbackup"
- " to back up the file?\n", name);
-
- os_file_get_last_error(TRUE); /* print error information */
- }
-
- os_thread_sleep(1000000); /* sleep for a second */
-
- if (count > 2000) {
-
- return(FALSE);
- }
-
- goto loop;
-#else
- int ret;
-
- ret = unlink((const char*)name);
-
- if (ret != 0) {
- os_file_handle_error_no_exit(name, "delete");
-
- return(FALSE);
- }
-
- return(TRUE);
-#endif
-}
-
-/***************************************************************************
-Renames a file (can also move it to another directory). It is safest that the
-file is closed before calling this function. */
-
-ibool
-os_file_rename(
-/*===========*/
- /* out: TRUE if success */
- const char* oldpath,/* in: old file path as a null-terminated
- string */
- const char* newpath)/* in: new file path */
-{
-#ifdef __WIN__
- BOOL ret;
-
- ret = MoveFile((LPCTSTR)oldpath, (LPCTSTR)newpath);
-
- if (ret) {
- return(TRUE);
- }
-
- os_file_handle_error_no_exit(oldpath, "rename");
-
- return(FALSE);
-#else
- int ret;
-
- ret = rename((const char*)oldpath, (const char*)newpath);
-
- if (ret != 0) {
- os_file_handle_error_no_exit(oldpath, "rename");
-
- return(FALSE);
- }
-
- return(TRUE);
-#endif
-}
-
-/***************************************************************************
-Closes a file handle. In case of error, error number can be retrieved with
-os_file_get_last_error. */
-
-ibool
-os_file_close(
-/*==========*/
- /* out: TRUE if success */
- os_file_t file) /* in, own: handle to a file */
-{
-#ifdef __WIN__
- BOOL ret;
-
- ut_a(file);
-
- ret = CloseHandle(file);
-
- if (ret) {
- return(TRUE);
- }
-
- os_file_handle_error(NULL, "close");
-
- return(FALSE);
-#else
- int ret;
-
- ret = close(file);
-
- if (ret == -1) {
- os_file_handle_error(NULL, "close");
-
- return(FALSE);
- }
-
- return(TRUE);
-#endif
-}
-
-/***************************************************************************
-Closes a file handle. */
-
-ibool
-os_file_close_no_error_handling(
-/*============================*/
- /* out: TRUE if success */
- os_file_t file) /* in, own: handle to a file */
-{
-#ifdef __WIN__
- BOOL ret;
-
- ut_a(file);
-
- ret = CloseHandle(file);
-
- if (ret) {
- return(TRUE);
- }
-
- return(FALSE);
-#else
- int ret;
-
- ret = close(file);
-
- if (ret == -1) {
-
- return(FALSE);
- }
-
- return(TRUE);
-#endif
-}
-
-/***************************************************************************
-Gets a file size. */
-
-ibool
-os_file_get_size(
-/*=============*/
- /* out: TRUE if success */
- os_file_t file, /* in: handle to a file */
- ulint* size, /* out: least significant 32 bits of file
- size */
- ulint* size_high)/* out: most significant 32 bits of size */
-{
-#ifdef __WIN__
- DWORD high;
- DWORD low;
-
- low = GetFileSize(file, &high);
-
- if ((low == 0xFFFFFFFF) && (GetLastError() != NO_ERROR)) {
- return(FALSE);
- }
-
- *size = low;
- *size_high = high;
-
- return(TRUE);
-#else
- off_t offs;
-
- offs = lseek(file, 0, SEEK_END);
-
- if (offs == ((off_t)-1)) {
-
- return(FALSE);
- }
-
- if (sizeof(off_t) > 4) {
- *size = (ulint)(offs & 0xFFFFFFFFUL);
- *size_high = (ulint)(offs >> 32);
- } else {
- *size = (ulint) offs;
- *size_high = 0;
- }
-
- return(TRUE);
-#endif
-}
-
-/***************************************************************************
-Gets file size as a 64-bit integer ib_longlong. */
-
-ib_longlong
-os_file_get_size_as_iblonglong(
-/*===========================*/
- /* out: size in bytes, -1 if error */
- os_file_t file) /* in: handle to a file */
-{
- ulint size;
- ulint size_high;
- ibool success;
-
- success = os_file_get_size(file, &size, &size_high);
-
- if (!success) {
-
- return(-1);
- }
-
- return((((ib_longlong)size_high) << 32) + (ib_longlong)size);
-}
-
-/***************************************************************************
-Write the specified number of zeros to a newly created file. */
-
-ibool
-os_file_set_size(
-/*=============*/
- /* out: TRUE if success */
- const char* name, /* in: name of the file or path as a
- null-terminated string */
- os_file_t file, /* in: handle to a file */
- ulint size, /* in: least significant 32 bits of file
- size */
- ulint size_high)/* in: most significant 32 bits of size */
-{
- ib_longlong current_size;
- ib_longlong desired_size;
- ibool ret;
- byte* buf;
- byte* buf2;
- ulint buf_size;
-
- ut_a(size == (size & 0xFFFFFFFF));
-
- current_size = 0;
- desired_size = (ib_longlong)size + (((ib_longlong)size_high) << 32);
-
- /* Write up to 1 megabyte at a time. */
- buf_size = ut_min(64, (ulint) (desired_size / UNIV_PAGE_SIZE))
- * UNIV_PAGE_SIZE;
- buf2 = ut_malloc(buf_size + UNIV_PAGE_SIZE);
-
- /* Align the buffer for possible raw i/o */
- buf = ut_align(buf2, UNIV_PAGE_SIZE);
-
- /* Write buffer full of zeros */
- memset(buf, 0, buf_size);
-
- if (desired_size >= (ib_longlong)(100 * 1024 * 1024)) {
-
- fprintf(stderr, "InnoDB: Progress in MB:");
- }
-
- while (current_size < desired_size) {
- ulint n_bytes;
-
- if (desired_size - current_size < (ib_longlong) buf_size) {
- n_bytes = (ulint) (desired_size - current_size);
- } else {
- n_bytes = buf_size;
- }
-
- ret = os_file_write(name, file, buf,
- (ulint)(current_size & 0xFFFFFFFF),
- (ulint)(current_size >> 32),
- n_bytes);
- if (!ret) {
- ut_free(buf2);
- goto error_handling;
- }
-
- /* Print about progress for each 100 MB written */
- if ((ib_longlong) (current_size + n_bytes) / (ib_longlong)(100 * 1024 * 1024)
- != current_size / (ib_longlong)(100 * 1024 * 1024)) {
-
- fprintf(stderr, " %lu00",
- (ulong) ((current_size + n_bytes)
- / (ib_longlong)(100 * 1024 * 1024)));
- }
-
- current_size += n_bytes;
- }
-
- if (desired_size >= (ib_longlong)(100 * 1024 * 1024)) {
-
- fprintf(stderr, "\n");
- }
-
- ut_free(buf2);
-
- ret = os_file_flush(file);
-
- if (ret) {
- return(TRUE);
- }
-
-error_handling:
- return(FALSE);
-}
-
-/***************************************************************************
-Truncates a file at its current position. */
-
-ibool
-os_file_set_eof(
-/*============*/
- /* out: TRUE if success */
- FILE* file) /* in: file to be truncated */
-{
-#ifdef __WIN__
- HANDLE h = (HANDLE) _get_osfhandle(fileno(file));
- return(SetEndOfFile(h));
-#else /* __WIN__ */
- return(!ftruncate(fileno(file), ftell(file)));
-#endif /* __WIN__ */
-}
-
-#ifndef __WIN__
-/***************************************************************************
-Wrapper to fsync(2) that retries the call on some errors.
-Returns the value 0 if successful; otherwise the value -1 is returned and
-the global variable errno is set to indicate the error. */
-
-static
-int
-os_file_fsync(
-/*==========*/
- /* out: 0 if success, -1 otherwise */
- os_file_t file) /* in: handle to a file */
-{
- int ret;
- int failures;
- ibool retry;
-
- failures = 0;
-
- do {
- ret = fsync(file);
-
- os_n_fsyncs++;
-
- if (ret == -1 && errno == ENOLCK) {
-
- if (failures % 100 == 0) {
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: fsync(): "
- "No locks available; retrying\n");
- }
-
- os_thread_sleep(200000 /* 0.2 sec */);
-
- failures++;
-
- retry = TRUE;
- } else {
-
- retry = FALSE;
- }
- } while (retry);
-
- return(ret);
-}
-#endif /* !__WIN__ */
-
-/***************************************************************************
-Flushes the write buffers of a given file to the disk. */
-
-ibool
-os_file_flush(
-/*==========*/
- /* out: TRUE if success */
- os_file_t file) /* in, own: handle to a file */
-{
-#ifdef __WIN__
- BOOL ret;
-
- ut_a(file);
-
- os_n_fsyncs++;
-
- ret = FlushFileBuffers(file);
-
- if (ret) {
- return(TRUE);
- }
-
- /* Since Windows returns ERROR_INVALID_FUNCTION if the 'file' is
- actually a raw device, we choose to ignore that error if we are using
- raw disks */
-
- if (srv_start_raw_disk_in_use && GetLastError()
- == ERROR_INVALID_FUNCTION) {
- return(TRUE);
- }
-
- os_file_handle_error(NULL, "flush");
-
- /* It is a fatal error if a file flush does not succeed, because then
- the database can get corrupt on disk */
- ut_error;
-
- return(FALSE);
-#else
- int ret;
-
-#if defined(HAVE_DARWIN_THREADS)
-# ifndef F_FULLFSYNC
- /* The following definition is from the Mac OS X 10.3 <sys/fcntl.h> */
-# define F_FULLFSYNC 51 /* fsync + ask the drive to flush to the media */
-# elif F_FULLFSYNC != 51
-# error "F_FULLFSYNC != 51: ABI incompatibility with Mac OS X 10.3"
-# endif
- /* Apple has disabled fsync() for internal disk drives in OS X. That
- caused corruption for a user when he tested a power outage. Let us in
- OS X use a nonstandard flush method recommended by an Apple
- engineer. */
-
- if (!srv_have_fullfsync) {
- /* If we are not on an operating system that supports this,
- then fall back to a plain fsync. */
-
- ret = os_file_fsync(file);
- } else {
- ret = fcntl(file, F_FULLFSYNC, NULL);
-
- if (ret) {
- /* If we are not on a file system that supports this,
- then fall back to a plain fsync. */
- ret = os_file_fsync(file);
- }
- }
-#else
- ret = os_file_fsync(file);
-#endif
-
- if (ret == 0) {
- return(TRUE);
- }
-
- /* Since Linux returns EINVAL if the 'file' is actually a raw device,
- we choose to ignore that error if we are using raw disks */
-
- if (srv_start_raw_disk_in_use && errno == EINVAL) {
-
- return(TRUE);
- }
-
- ut_print_timestamp(stderr);
-
- fprintf(stderr,
- " InnoDB: Error: the OS said file flush did not succeed\n");
-
- os_file_handle_error(NULL, "flush");
-
- /* It is a fatal error if a file flush does not succeed, because then
- the database can get corrupt on disk */
- ut_error;
-
- return(FALSE);
-#endif
-}
-
-#ifndef __WIN__
-/***********************************************************************
-Does a synchronous read operation in Posix. */
-static
-ssize_t
-os_file_pread(
-/*==========*/
- /* out: number of bytes read, -1 if error */
- os_file_t file, /* in: handle to a file */
- void* buf, /* in: buffer where to read */
- ulint n, /* in: number of bytes to read */
- ulint offset, /* in: least significant 32 bits of file
- offset from where to read */
- ulint offset_high) /* in: most significant 32 bits of
- offset */
-{
- off_t offs;
- ssize_t n_bytes;
-
- ut_a((offset & 0xFFFFFFFFUL) == offset);
-
- /* If off_t is > 4 bytes in size, then we assume we can pass a
- 64-bit address */
-
- if (sizeof(off_t) > 4) {
- offs = (off_t)offset + (((off_t)offset_high) << 32);
-
- } else {
- offs = (off_t)offset;
-
- if (offset_high > 0) {
- fprintf(stderr,
- "InnoDB: Error: file read at offset > 4 GB\n");
- }
- }
-
- os_n_file_reads++;
-
-#if defined(HAVE_PREAD) && !defined(HAVE_BROKEN_PREAD)
- os_mutex_enter(os_file_count_mutex);
- os_file_n_pending_preads++;
- os_n_pending_reads++;
- os_mutex_exit(os_file_count_mutex);
-
- n_bytes = pread(file, buf, (ssize_t)n, offs);
-
- os_mutex_enter(os_file_count_mutex);
- os_file_n_pending_preads--;
- os_n_pending_reads--;
- os_mutex_exit(os_file_count_mutex);
-
- return(n_bytes);
-#else
- {
- off_t ret_offset;
- ssize_t ret;
- ulint i;
-
- os_mutex_enter(os_file_count_mutex);
- os_n_pending_reads++;
- os_mutex_exit(os_file_count_mutex);
-
- /* Protect the seek / read operation with a mutex */
- i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
-
- os_mutex_enter(os_file_seek_mutexes[i]);
-
- ret_offset = lseek(file, offs, SEEK_SET);
-
- if (ret_offset < 0) {
- ret = -1;
- } else {
- ret = read(file, buf, (ssize_t)n);
- }
-
- os_mutex_exit(os_file_seek_mutexes[i]);
-
- os_mutex_enter(os_file_count_mutex);
- os_n_pending_reads--;
- os_mutex_exit(os_file_count_mutex);
-
- return(ret);
- }
-#endif
-}
-
-/***********************************************************************
-Does a synchronous write operation in Posix. */
-static
-ssize_t
-os_file_pwrite(
-/*===========*/
- /* out: number of bytes written, -1 if error */
- os_file_t file, /* in: handle to a file */
- const void* buf, /* in: buffer from where to write */
- ulint n, /* in: number of bytes to write */
- ulint offset, /* in: least significant 32 bits of file
- offset where to write */
- ulint offset_high) /* in: most significant 32 bits of
- offset */
-{
- ssize_t ret;
- off_t offs;
-
- ut_a((offset & 0xFFFFFFFFUL) == offset);
-
- /* If off_t is > 4 bytes in size, then we assume we can pass a
- 64-bit address */
-
- if (sizeof(off_t) > 4) {
- offs = (off_t)offset + (((off_t)offset_high) << 32);
- } else {
- offs = (off_t)offset;
-
- if (offset_high > 0) {
- fprintf(stderr,
- "InnoDB: Error: file write"
- " at offset > 4 GB\n");
- }
- }
-
- os_n_file_writes++;
-
-#if defined(HAVE_PWRITE) && !defined(HAVE_BROKEN_PREAD)
- os_mutex_enter(os_file_count_mutex);
- os_file_n_pending_pwrites++;
- os_n_pending_writes++;
- os_mutex_exit(os_file_count_mutex);
-
- ret = pwrite(file, buf, (ssize_t)n, offs);
-
- os_mutex_enter(os_file_count_mutex);
- os_file_n_pending_pwrites--;
- os_n_pending_writes--;
- os_mutex_exit(os_file_count_mutex);
-
-# ifdef UNIV_DO_FLUSH
- if (srv_unix_file_flush_method != SRV_UNIX_LITTLESYNC
- && srv_unix_file_flush_method != SRV_UNIX_NOSYNC
- && !os_do_not_call_flush_at_each_write) {
-
- /* Always do fsync to reduce the probability that when
- the OS crashes, a database page is only partially
- physically written to disk. */
-
- ut_a(TRUE == os_file_flush(file));
- }
-# endif /* UNIV_DO_FLUSH */
-
- return(ret);
-#else
- {
- off_t ret_offset;
- ulint i;
-
- os_mutex_enter(os_file_count_mutex);
- os_n_pending_writes++;
- os_mutex_exit(os_file_count_mutex);
-
- /* Protect the seek / write operation with a mutex */
- i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
-
- os_mutex_enter(os_file_seek_mutexes[i]);
-
- ret_offset = lseek(file, offs, SEEK_SET);
-
- if (ret_offset < 0) {
- ret = -1;
-
- goto func_exit;
- }
-
- ret = write(file, buf, (ssize_t)n);
-
-# ifdef UNIV_DO_FLUSH
- if (srv_unix_file_flush_method != SRV_UNIX_LITTLESYNC
- && srv_unix_file_flush_method != SRV_UNIX_NOSYNC
- && !os_do_not_call_flush_at_each_write) {
-
- /* Always do fsync to reduce the probability that when
- the OS crashes, a database page is only partially
- physically written to disk. */
-
- ut_a(TRUE == os_file_flush(file));
- }
-# endif /* UNIV_DO_FLUSH */
-
-func_exit:
- os_mutex_exit(os_file_seek_mutexes[i]);
-
- os_mutex_enter(os_file_count_mutex);
- os_n_pending_writes--;
- os_mutex_exit(os_file_count_mutex);
-
- return(ret);
- }
-#endif
-}
-#endif
-
-/***********************************************************************
-Requests a synchronous positioned read operation. */
-
-ibool
-os_file_read(
-/*=========*/
- /* out: TRUE if request was
- successful, FALSE if fail */
- os_file_t file, /* in: handle to a file */
- void* buf, /* in: buffer where to read */
- ulint offset, /* in: least significant 32 bits of file
- offset where to read */
- ulint offset_high, /* in: most significant 32 bits of
- offset */
- ulint n) /* in: number of bytes to read */
-{
-#ifdef __WIN__
- BOOL ret;
- DWORD len;
- DWORD ret2;
- DWORD low;
- DWORD high;
- ibool retry;
- ulint i;
-
- ut_a((offset & 0xFFFFFFFFUL) == offset);
-
- os_n_file_reads++;
- os_bytes_read_since_printout += n;
-
-try_again:
- ut_ad(file);
- ut_ad(buf);
- ut_ad(n > 0);
-
- low = (DWORD) offset;
- high = (DWORD) offset_high;
-
- os_mutex_enter(os_file_count_mutex);
- os_n_pending_reads++;
- os_mutex_exit(os_file_count_mutex);
-
- /* Protect the seek / read operation with a mutex */
- i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
-
- os_mutex_enter(os_file_seek_mutexes[i]);
-
- ret2 = SetFilePointer(file, low, &high, FILE_BEGIN);
-
- if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) {
-
- os_mutex_exit(os_file_seek_mutexes[i]);
-
- os_mutex_enter(os_file_count_mutex);
- os_n_pending_reads--;
- os_mutex_exit(os_file_count_mutex);
-
- goto error_handling;
- }
-
- ret = ReadFile(file, buf, (DWORD) n, &len, NULL);
-
- os_mutex_exit(os_file_seek_mutexes[i]);
-
- os_mutex_enter(os_file_count_mutex);
- os_n_pending_reads--;
- os_mutex_exit(os_file_count_mutex);
-
- if (ret && len == n) {
- return(TRUE);
- }
-#else
- ibool retry;
- ssize_t ret;
-
- os_bytes_read_since_printout += n;
-
-try_again:
- ret = os_file_pread(file, buf, n, offset, offset_high);
-
- if ((ulint)ret == n) {
-
- return(TRUE);
- }
-
- fprintf(stderr,
- "InnoDB: Error: tried to read %lu bytes at offset %lu %lu.\n"
- "InnoDB: Was only able to read %ld.\n",
- (ulong)n, (ulong)offset_high,
- (ulong)offset, (long)ret);
-#endif
-#ifdef __WIN__
-error_handling:
-#endif
- retry = os_file_handle_error(NULL, "read");
-
- if (retry) {
- goto try_again;
- }
-
- fprintf(stderr,
- "InnoDB: Fatal error: cannot read from file."
- " OS error number %lu.\n",
-#ifdef __WIN__
- (ulong) GetLastError()
-#else
- (ulong) errno
-#endif
- );
- fflush(stderr);
-
- ut_error;
-
- return(FALSE);
-}
-
-/***********************************************************************
-Requests a synchronous positioned read operation. This function does not do
-any error handling. In case of error it returns FALSE. */
-
-ibool
-os_file_read_no_error_handling(
-/*===========================*/
- /* out: TRUE if request was
- successful, FALSE if fail */
- os_file_t file, /* in: handle to a file */
- void* buf, /* in: buffer where to read */
- ulint offset, /* in: least significant 32 bits of file
- offset where to read */
- ulint offset_high, /* in: most significant 32 bits of
- offset */
- ulint n) /* in: number of bytes to read */
-{
-#ifdef __WIN__
- BOOL ret;
- DWORD len;
- DWORD ret2;
- DWORD low;
- DWORD high;
- ibool retry;
- ulint i;
-
- ut_a((offset & 0xFFFFFFFFUL) == offset);
-
- os_n_file_reads++;
- os_bytes_read_since_printout += n;
-
-try_again:
- ut_ad(file);
- ut_ad(buf);
- ut_ad(n > 0);
-
- low = (DWORD) offset;
- high = (DWORD) offset_high;
-
- os_mutex_enter(os_file_count_mutex);
- os_n_pending_reads++;
- os_mutex_exit(os_file_count_mutex);
-
- /* Protect the seek / read operation with a mutex */
- i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
-
- os_mutex_enter(os_file_seek_mutexes[i]);
-
- ret2 = SetFilePointer(file, low, &high, FILE_BEGIN);
-
- if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) {
-
- os_mutex_exit(os_file_seek_mutexes[i]);
-
- os_mutex_enter(os_file_count_mutex);
- os_n_pending_reads--;
- os_mutex_exit(os_file_count_mutex);
-
- goto error_handling;
- }
-
- ret = ReadFile(file, buf, (DWORD) n, &len, NULL);
-
- os_mutex_exit(os_file_seek_mutexes[i]);
-
- os_mutex_enter(os_file_count_mutex);
- os_n_pending_reads--;
- os_mutex_exit(os_file_count_mutex);
-
- if (ret && len == n) {
- return(TRUE);
- }
-#else
- ibool retry;
- ssize_t ret;
-
- os_bytes_read_since_printout += n;
-
-try_again:
- ret = os_file_pread(file, buf, n, offset, offset_high);
-
- if ((ulint)ret == n) {
-
- return(TRUE);
- }
-#endif
-#ifdef __WIN__
-error_handling:
-#endif
- retry = os_file_handle_error_no_exit(NULL, "read");
-
- if (retry) {
- goto try_again;
- }
-
- return(FALSE);
-}
-
-/***********************************************************************
-Rewind file to its start, read at most size - 1 bytes from it to str, and
-NUL-terminate str. All errors are silently ignored. This function is
-mostly meant to be used with temporary files. */
-
-void
-os_file_read_string(
-/*================*/
- FILE* file, /* in: file to read from */
- char* str, /* in: buffer where to read */
- ulint size) /* in: size of buffer */
-{
- size_t flen;
-
- if (size == 0) {
- return;
- }
-
- rewind(file);
- flen = fread(str, 1, size - 1, file);
- str[flen] = '\0';
-}
-
-/***********************************************************************
-Requests a synchronous write operation. */
-
-ibool
-os_file_write(
-/*==========*/
- /* out: TRUE if request was
- successful, FALSE if fail */
- const char* name, /* in: name of the file or path as a
- null-terminated string */
- os_file_t file, /* in: handle to a file */
- const void* buf, /* in: buffer from which to write */
- ulint offset, /* in: least significant 32 bits of file
- offset where to write */
- ulint offset_high, /* in: most significant 32 bits of
- offset */
- ulint n) /* in: number of bytes to write */
-{
-#ifdef __WIN__
- BOOL ret;
- DWORD len;
- DWORD ret2;
- DWORD low;
- DWORD high;
- ulint i;
- ulint n_retries = 0;
- ulint err;
-
- ut_a((offset & 0xFFFFFFFF) == offset);
-
- os_n_file_writes++;
-
- ut_ad(file);
- ut_ad(buf);
- ut_ad(n > 0);
-retry:
- low = (DWORD) offset;
- high = (DWORD) offset_high;
-
- os_mutex_enter(os_file_count_mutex);
- os_n_pending_writes++;
- os_mutex_exit(os_file_count_mutex);
-
- /* Protect the seek / write operation with a mutex */
- i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
-
- os_mutex_enter(os_file_seek_mutexes[i]);
-
- ret2 = SetFilePointer(file, low, &high, FILE_BEGIN);
-
- if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) {
-
- os_mutex_exit(os_file_seek_mutexes[i]);
-
- os_mutex_enter(os_file_count_mutex);
- os_n_pending_writes--;
- os_mutex_exit(os_file_count_mutex);
-
- ut_print_timestamp(stderr);
-
- fprintf(stderr,
- " InnoDB: Error: File pointer positioning to"
- " file %s failed at\n"
- "InnoDB: offset %lu %lu. Operating system"
- " error number %lu.\n"
- "InnoDB: Some operating system error numbers"
- " are described at\n"
- "InnoDB: "
- "http://dev.mysql.com/doc/refman/5.1/en/"
- "operating-system-error-codes.html\n",
- name, (ulong) offset_high, (ulong) offset,
- (ulong) GetLastError());
-
- return(FALSE);
- }
-
- ret = WriteFile(file, buf, (DWORD) n, &len, NULL);
-
- /* Always do fsync to reduce the probability that when the OS crashes,
- a database page is only partially physically written to disk. */
-
-# ifdef UNIV_DO_FLUSH
- if (!os_do_not_call_flush_at_each_write) {
- ut_a(TRUE == os_file_flush(file));
- }
-# endif /* UNIV_DO_FLUSH */
-
- os_mutex_exit(os_file_seek_mutexes[i]);
-
- os_mutex_enter(os_file_count_mutex);
- os_n_pending_writes--;
- os_mutex_exit(os_file_count_mutex);
-
- if (ret && len == n) {
-
- return(TRUE);
- }
-
- /* If some background file system backup tool is running, then, at
- least in Windows 2000, we may get here a specific error. Let us
- retry the operation 100 times, with 1 second waits. */
-
- if (GetLastError() == ERROR_LOCK_VIOLATION && n_retries < 100) {
-
- os_thread_sleep(1000000);
-
- n_retries++;
-
- goto retry;
- }
-
- if (!os_has_said_disk_full) {
-
- err = (ulint)GetLastError();
-
- ut_print_timestamp(stderr);
-
- fprintf(stderr,
- " InnoDB: Error: Write to file %s failed"
- " at offset %lu %lu.\n"
- "InnoDB: %lu bytes should have been written,"
- " only %lu were written.\n"
- "InnoDB: Operating system error number %lu.\n"
- "InnoDB: Check that your OS and file system"
- " support files of this size.\n"
- "InnoDB: Check also that the disk is not full"
- " or a disk quota exceeded.\n",
- name, (ulong) offset_high, (ulong) offset,
- (ulong) n, (ulong) len, (ulong) err);
-
- if (strerror((int)err) != NULL) {
- fprintf(stderr,
- "InnoDB: Error number %lu means '%s'.\n",
- (ulong) err, strerror((int)err));
- }
-
- fprintf(stderr,
- "InnoDB: Some operating system error numbers"
- " are described at\n"
- "InnoDB: "
- "http://dev.mysql.com/doc/refman/5.1/en/"
- "operating-system-error-codes.html\n");
-
- os_has_said_disk_full = TRUE;
- }
-
- return(FALSE);
-#else
- ssize_t ret;
-
- ret = os_file_pwrite(file, buf, n, offset, offset_high);
-
- if ((ulint)ret == n) {
-
- return(TRUE);
- }
-
- if (!os_has_said_disk_full) {
-
- ut_print_timestamp(stderr);
-
- fprintf(stderr,
- " InnoDB: Error: Write to file %s failed"
- " at offset %lu %lu.\n"
- "InnoDB: %lu bytes should have been written,"
- " only %ld were written.\n"
- "InnoDB: Operating system error number %lu.\n"
- "InnoDB: Check that your OS and file system"
- " support files of this size.\n"
- "InnoDB: Check also that the disk is not full"
- " or a disk quota exceeded.\n",
- name, offset_high, offset, n, (long int)ret,
- (ulint)errno);
- if (strerror(errno) != NULL) {
- fprintf(stderr,
- "InnoDB: Error number %lu means '%s'.\n",
- (ulint)errno, strerror(errno));
- }
-
- fprintf(stderr,
- "InnoDB: Some operating system error numbers"
- " are described at\n"
- "InnoDB: "
- "http://dev.mysql.com/doc/refman/5.1/en/"
- "operating-system-error-codes.html\n");
-
- os_has_said_disk_full = TRUE;
- }
-
- return(FALSE);
-#endif
-}
-
-/***********************************************************************
-Check the existence and type of the given file. */
-
-ibool
-os_file_status(
-/*===========*/
- /* out: TRUE if call succeeded */
- const char* path, /* in: pathname of the file */
- ibool* exists, /* out: TRUE if file exists */
- os_file_type_t* type) /* out: type of the file (if it exists) */
-{
-#ifdef __WIN__
- int ret;
- struct _stat statinfo;
-
- ret = _stat(path, &statinfo);
- if (ret && (errno == ENOENT || errno == ENOTDIR)) {
- /* file does not exist */
- *exists = FALSE;
- return(TRUE);
- } else if (ret) {
- /* file exists, but stat call failed */
-
- os_file_handle_error_no_exit(path, "stat");
-
- return(FALSE);
- }
-
- if (_S_IFDIR & statinfo.st_mode) {
- *type = OS_FILE_TYPE_DIR;
- } else if (_S_IFREG & statinfo.st_mode) {
- *type = OS_FILE_TYPE_FILE;
- } else {
- *type = OS_FILE_TYPE_UNKNOWN;
- }
-
- *exists = TRUE;
-
- return(TRUE);
-#else
- int ret;
- struct stat statinfo;
-
- ret = stat(path, &statinfo);
- if (ret && (errno == ENOENT || errno == ENOTDIR)) {
- /* file does not exist */
- *exists = FALSE;
- return(TRUE);
- } else if (ret) {
- /* file exists, but stat call failed */
-
- os_file_handle_error_no_exit(path, "stat");
-
- return(FALSE);
- }
-
- if (S_ISDIR(statinfo.st_mode)) {
- *type = OS_FILE_TYPE_DIR;
- } else if (S_ISLNK(statinfo.st_mode)) {
- *type = OS_FILE_TYPE_LINK;
- } else if (S_ISREG(statinfo.st_mode)) {
- *type = OS_FILE_TYPE_FILE;
- } else {
- *type = OS_FILE_TYPE_UNKNOWN;
- }
-
- *exists = TRUE;
-
- return(TRUE);
-#endif
-}
-
-/***********************************************************************
-This function returns information about the specified file */
-
-ibool
-os_file_get_status(
-/*===============*/
- /* out: TRUE if stat
- information found */
- const char* path, /* in: pathname of the file */
- os_file_stat_t* stat_info) /* information of a file in a
- directory */
-{
-#ifdef __WIN__
- int ret;
- struct _stat statinfo;
-
- ret = _stat(path, &statinfo);
- if (ret && (errno == ENOENT || errno == ENOTDIR)) {
- /* file does not exist */
-
- return(FALSE);
- } else if (ret) {
- /* file exists, but stat call failed */
-
- os_file_handle_error_no_exit(path, "stat");
-
- return(FALSE);
- }
- if (_S_IFDIR & statinfo.st_mode) {
- stat_info->type = OS_FILE_TYPE_DIR;
- } else if (_S_IFREG & statinfo.st_mode) {
- stat_info->type = OS_FILE_TYPE_FILE;
- } else {
- stat_info->type = OS_FILE_TYPE_UNKNOWN;
- }
-
- stat_info->ctime = statinfo.st_ctime;
- stat_info->atime = statinfo.st_atime;
- stat_info->mtime = statinfo.st_mtime;
- stat_info->size = statinfo.st_size;
-
- return(TRUE);
-#else
- int ret;
- struct stat statinfo;
-
- ret = stat(path, &statinfo);
-
- if (ret && (errno == ENOENT || errno == ENOTDIR)) {
- /* file does not exist */
-
- return(FALSE);
- } else if (ret) {
- /* file exists, but stat call failed */
-
- os_file_handle_error_no_exit(path, "stat");
-
- return(FALSE);
- }
-
- if (S_ISDIR(statinfo.st_mode)) {
- stat_info->type = OS_FILE_TYPE_DIR;
- } else if (S_ISLNK(statinfo.st_mode)) {
- stat_info->type = OS_FILE_TYPE_LINK;
- } else if (S_ISREG(statinfo.st_mode)) {
- stat_info->type = OS_FILE_TYPE_FILE;
- } else {
- stat_info->type = OS_FILE_TYPE_UNKNOWN;
- }
-
- stat_info->ctime = statinfo.st_ctime;
- stat_info->atime = statinfo.st_atime;
- stat_info->mtime = statinfo.st_mtime;
- stat_info->size = statinfo.st_size;
-
- return(TRUE);
-#endif
-}
-
-/* path name separator character */
-#ifdef __WIN__
-# define OS_FILE_PATH_SEPARATOR '\\'
-#else
-# define OS_FILE_PATH_SEPARATOR '/'
-#endif
-
-/********************************************************************
-The function os_file_dirname returns a directory component of a
-null-terminated pathname string. In the usual case, dirname returns
-the string up to, but not including, the final '/', and basename
-is the component following the final '/'. Trailing '/' charac­
-ters are not counted as part of the pathname.
-
-If path does not contain a slash, dirname returns the string ".".
-
-Concatenating the string returned by dirname, a "/", and the basename
-yields a complete pathname.
-
-The return value is a copy of the directory component of the pathname.
-The copy is allocated from heap. It is the caller responsibility
-to free it after it is no longer needed.
-
-The following list of examples (taken from SUSv2) shows the strings
-returned by dirname and basename for different paths:
-
- path dirname basename
- "/usr/lib" "/usr" "lib"
- "/usr/" "/" "usr"
- "usr" "." "usr"
- "/" "/" "/"
- "." "." "."
- ".." "." ".."
-*/
-
-char*
-os_file_dirname(
-/*============*/
- /* out, own: directory component of the
- pathname */
- const char* path) /* in: pathname */
-{
- /* Find the offset of the last slash */
- const char* last_slash = strrchr(path, OS_FILE_PATH_SEPARATOR);
- if (!last_slash) {
- /* No slash in the path, return "." */
-
- return(mem_strdup("."));
- }
-
- /* Ok, there is a slash */
-
- if (last_slash == path) {
- /* last slash is the first char of the path */
-
- return(mem_strdup("/"));
- }
-
- /* Non-trivial directory component */
-
- return(mem_strdupl(path, last_slash - path));
-}
-
-/********************************************************************
-Creates all missing subdirectories along the given path. */
-
-ibool
-os_file_create_subdirs_if_needed(
-/*=============================*/
- /* out: TRUE if call succeeded
- FALSE otherwise */
- const char* path) /* in: path name */
-{
- char* subdir;
- ibool success, subdir_exists;
- os_file_type_t type;
-
- subdir = os_file_dirname(path);
- if (strlen(subdir) == 1
- && (*subdir == OS_FILE_PATH_SEPARATOR || *subdir == '.')) {
- /* subdir is root or cwd, nothing to do */
- mem_free(subdir);
-
- return(TRUE);
- }
-
- /* Test if subdir exists */
- success = os_file_status(subdir, &subdir_exists, &type);
- if (success && !subdir_exists) {
- /* subdir does not exist, create it */
- success = os_file_create_subdirs_if_needed(subdir);
- if (!success) {
- mem_free(subdir);
-
- return(FALSE);
- }
- success = os_file_create_directory(subdir, FALSE);
- }
-
- mem_free(subdir);
-
- return(success);
-}
-
-/********************************************************************
-Returns a pointer to the nth slot in the aio array. */
-static
-os_aio_slot_t*
-os_aio_array_get_nth_slot(
-/*======================*/
- /* out: pointer to slot */
- os_aio_array_t* array, /* in: aio array */
- ulint index) /* in: index of the slot */
-{
- ut_a(index < array->n_slots);
-
- return((array->slots) + index);
-}
-
-/****************************************************************************
-Creates an aio wait array. */
-static
-os_aio_array_t*
-os_aio_array_create(
-/*================*/
- /* out, own: aio array */
- ulint n) /* in: maximum number of pending aio operations
- allowed */
-{
- os_aio_array_t* array;
- ulint i;
- os_aio_slot_t* slot;
-#ifdef WIN_ASYNC_IO
- OVERLAPPED* over;
-#endif
- ut_a(n > 0);
-
- array = ut_malloc(sizeof(os_aio_array_t));
-
- array->mutex = os_mutex_create(NULL);
- array->not_full = os_event_create(NULL);
- array->is_empty = os_event_create(NULL);
-
- os_event_set(array->is_empty);
-
- array->n_slots = n;
- array->n_reserved = 0;
- array->slots = ut_malloc(n * sizeof(os_aio_slot_t));
-#ifdef __WIN__
- array->native_events = ut_malloc(n * sizeof(os_native_event_t));
-#endif
- for (i = 0; i < n; i++) {
- slot = os_aio_array_get_nth_slot(array, i);
-
- slot->pos = i;
- slot->reserved = FALSE;
-#ifdef WIN_ASYNC_IO
- slot->event = os_event_create(NULL);
-
- over = &(slot->control);
-
- over->hEvent = slot->event->handle;
-
- *((array->native_events) + i) = over->hEvent;
-#endif
- }
-
- return(array);
-}
-
-/****************************************************************************
-Initializes the asynchronous io system. Calls also os_io_init_simple.
-Creates an aio array for each of non-ibuf read, non-ibuf write, ibuf IO,
-log IO, and synchronous IO. The caller must create i/o handler thread for all
-but the synchronous aio array. Multiple threads can access the same array for
-the non-ibuf read (prefetch) and write (flush dirty buffer pages) arrays.
-Return the number of AIO handler threads. */
-
-ulint
-os_aio_init(
-/*========*/
- ulint ios_per_array, /* in: maximum number of pending aio operations
- allowed per array */
- ulint n_read_threads, /* in: number of read threads */
- ulint n_write_threads, /* in: number of write threads */
- ulint n_slots_sync) /* in: number of slots in the sync aio array */
-{
- ulint i;
- ulint n_segments = 2 + n_read_threads + n_write_threads;
-#ifdef POSIX_ASYNC_IO
- sigset_t sigset;
-#endif
- ut_a(ios_per_array >= OS_AIO_N_PENDING_IOS_PER_THREAD);
- ut_a(n_read_threads >= 1 && n_read_threads <= 64);
- ut_a(n_write_threads >= 1 && n_write_threads <= 64);
- ut_a(n_segments < SRV_MAX_N_IO_THREADS);
-
- os_io_init_simple();
-
- for (i = 0; i < n_segments; i++) {
- srv_set_io_thread_op_info(i, "not started yet");
- os_aio_thread_io_reads[i] = 0;
- os_aio_thread_io_writes[i] = 0;
- os_aio_thread_io_requests[i] = 0;
- os_aio_thread_buffer[i] = 0;
- os_aio_thread_buffer_size[i] = 0;
- os_aio_thread_io_wait[i] = 0;
- os_aio_thread_max_io_wait[i] = 0;
- }
-
- os_aio_read_threads = n_read_threads;
- os_aio_write_threads = n_write_threads;
- os_aio_first_write_segment = os_aio_first_read_segment + os_aio_read_threads;
-
- fprintf(stderr,
- "InnoDB: ios_per_array %lu read threads %lu write threads %lu\n",
- ios_per_array, os_aio_read_threads, os_aio_write_threads);
-
- os_aio_ibuf_array = os_aio_array_create(ios_per_array);
-
- srv_io_thread_function[0] = "insert buffer thread";
-
- os_aio_log_array = os_aio_array_create(ios_per_array);
-
- srv_io_thread_function[1] = "log thread";
-
- os_aio_read_array = os_aio_array_create(ios_per_array);
- for (i = os_aio_first_read_segment; i < os_aio_first_write_segment; i++) {
- ut_a(i < SRV_MAX_N_IO_THREADS);
- srv_io_thread_function[i] = "read thread";
- }
-
- os_aio_write_array = os_aio_array_create(ios_per_array);
- for (i = os_aio_first_write_segment; i < n_segments; i++) {
- ut_a(i < SRV_MAX_N_IO_THREADS);
- srv_io_thread_function[i] = "write thread";
- }
-
- os_aio_sync_array = os_aio_array_create(n_slots_sync);
-
- os_aio_n_segments = 2 + os_aio_read_threads + os_aio_write_threads;
-
- os_aio_validate();
-
- os_aio_segment_wait_events = ut_malloc(n_segments * sizeof(void*));
-
- for (i = 0; i < n_segments; i++) {
- os_aio_segment_wait_events[i] = os_event_create(NULL);
- }
-
- os_last_printout = time(NULL);
-
-#ifdef POSIX_ASYNC_IO
- /* Block aio signals from the current thread and its children:
- for this to work, the current thread must be the first created
- in the database, so that all its children will inherit its
- signal mask */
-
- /* TODO: to work MySQL needs the SIGALARM signal; the following
- will not work yet! */
- sigemptyset(&sigset);
- sigaddset(&sigset, SIGRTMIN + 1 + 0);
- sigaddset(&sigset, SIGRTMIN + 1 + 1);
- sigaddset(&sigset, SIGRTMIN + 1 + 2);
- sigaddset(&sigset, SIGRTMIN + 1 + 3);
-
- pthread_sigmask(SIG_BLOCK, &sigset, NULL); */
-#endif
- return os_aio_n_segments;
-}
-
-#ifdef WIN_ASYNC_IO
-/****************************************************************************
-Wakes up all async i/o threads in the array in Windows async i/o at
-shutdown. */
-static
-void
-os_aio_array_wake_win_aio_at_shutdown(
-/*==================================*/
- os_aio_array_t* array) /* in: aio array */
-{
- ulint i;
-
- for (i = 0; i < array->n_slots; i++) {
-
- os_event_set((array->slots + i)->event);
- }
-}
-#endif
-
-/****************************************************************************
-Wakes up all async i/o threads so that they know to exit themselves in
-shutdown. */
-
-void
-os_aio_wake_all_threads_at_shutdown(void)
-/*=====================================*/
-{
- ulint i;
-
-#ifdef WIN_ASYNC_IO
- /* This code wakes up all ai/o threads in Windows native aio */
- os_aio_array_wake_win_aio_at_shutdown(os_aio_read_array);
- os_aio_array_wake_win_aio_at_shutdown(os_aio_write_array);
- os_aio_array_wake_win_aio_at_shutdown(os_aio_ibuf_array);
- os_aio_array_wake_win_aio_at_shutdown(os_aio_log_array);
-#endif
- /* This loop wakes up all simulated ai/o threads */
-
- for (i = 0; i < os_aio_n_segments; i++) {
-
- os_event_set(os_aio_segment_wait_events[i]);
- }
-}
-
-/****************************************************************************
-Waits until there are no pending writes in os_aio_write_array. There can
-be other, synchronous, pending writes. */
-
-void
-os_aio_wait_until_no_pending_writes(void)
-/*=====================================*/
-{
- os_event_wait(os_aio_write_array->is_empty);
-}
-
-/**************************************************************************
-Calculates aio array from global segment number. */
-static
-os_aio_array_t*
-os_aio_get_array(
-/*===============================*/
- /* out: aio wait array */
- ulint global_segment)/* in: global segment number */
-{
- ut_a(global_segment < os_aio_n_segments);
-
- if (global_segment == 0) {
- return os_aio_ibuf_array;
-
- } else if (global_segment == 1) {
- return os_aio_log_array;
-
- } else if (global_segment < os_aio_first_write_segment) {
- return os_aio_read_array;
-
- } else {
- return os_aio_write_array;
- }
-}
-
-/***********************************************************************
-Gets an integer value designating a specified aio array. This is used
-to give numbers to signals in Posix aio. */
-
-#if !defined(WIN_ASYNC_IO) && defined(POSIX_ASYNC_IO)
-static
-ulint
-os_aio_get_array_no(
-/*================*/
- os_aio_array_t* array) /* in: aio array */
-{
- if (array == os_aio_ibuf_array) {
-
- return(0);
-
- } else if (array == os_aio_log_array) {
-
- return(1);
-
- } else if (array == os_aio_read_array) {
-
- return(2);
- } else if (array == os_aio_write_array) {
-
- return(3);
- } else {
- ut_error;
-
- return(0);
- }
-}
-
-/***********************************************************************
-Gets the aio array for its number. */
-static
-os_aio_array_t*
-os_aio_get_array_from_no(
-/*=====================*/
- /* out: aio array */
- ulint n) /* in: array number */
-{
- if (n == 0) {
- return(os_aio_ibuf_array);
- } else if (n == 1) {
-
- return(os_aio_log_array);
- } else if (n == 2) {
-
- return(os_aio_read_array);
- } else if (n == 3) {
-
- return(os_aio_write_array);
- } else {
- ut_error;
-
- return(NULL);
- }
-}
-#endif /* if !defined(WIN_ASYNC_IO) && defined(POSIX_ASYNC_IO) */
-
-/***********************************************************************
-Requests for a slot in the aio array. If no slot is available, waits until
-not_full-event becomes signaled. */
-static
-os_aio_slot_t*
-os_aio_array_reserve_slot(
-/*======================*/
- /* out: pointer to slot */
- ulint type, /* in: OS_FILE_READ or OS_FILE_WRITE */
- os_aio_array_t* array, /* in: aio array */
- fil_node_t* message1,/* in: message to be passed along with
- the aio operation */
- void* message2,/* in: message to be passed along with
- the aio operation */
- os_file_t file, /* in: file handle */
- const char* name, /* in: name of the file or path as a
- null-terminated string */
- void* buf, /* in: buffer where to read or from which
- to write */
- ulint offset, /* in: least significant 32 bits of file
- offset */
- ulint offset_high, /* in: most significant 32 bits of
- offset */
- ulint len) /* in: length of the block to read or write */
-{
- os_aio_slot_t* slot;
-#ifdef WIN_ASYNC_IO
- OVERLAPPED* control;
-
-#elif defined(POSIX_ASYNC_IO)
-
- struct aiocb* control;
-#endif
- ulint i;
-loop:
- os_mutex_enter(array->mutex);
-
- if (array->n_reserved == array->n_slots) {
- os_mutex_exit(array->mutex);
-
- if (!os_aio_use_native_aio) {
- /* If the handler threads are suspended, wake them
- so that we get more slots */
-
- os_aio_simulated_wake_handler_threads();
- }
-
- os_event_wait(array->not_full);
-
- goto loop;
- }
-
- for (i = 0;; i++) {
- slot = os_aio_array_get_nth_slot(array, i);
-
- if (slot->reserved == FALSE) {
- break;
- }
- }
- ut_a(i < array->n_slots);
- array->n_reserved++;
-
- if (array->n_reserved == 1) {
- os_event_reset(array->is_empty);
- }
-
- if (array->n_reserved == array->n_slots) {
- os_event_reset(array->not_full);
- }
-
- slot->reserved = TRUE;
- slot->reservation_time = time(NULL);
- slot->message1 = message1;
- slot->message2 = message2;
- slot->file = file;
- slot->name = name;
- slot->len = len;
- slot->type = type;
- slot->buf = buf;
- slot->offset = offset;
- slot->offset_high = offset_high;
- slot->status = OS_AIO_NOT_ISSUED;
-
-#ifdef WIN_ASYNC_IO
- control = &(slot->control);
- control->Offset = (DWORD)offset;
- control->OffsetHigh = (DWORD)offset_high;
- os_event_reset(slot->event);
-
-#elif defined(POSIX_ASYNC_IO)
-
-#if (UNIV_WORD_SIZE == 8)
- offset = offset + (offset_high << 32);
-#else
- ut_a(offset_high == 0);
-#endif
- control = &(slot->control);
- control->aio_fildes = file;
- control->aio_buf = buf;
- control->aio_nbytes = len;
- control->aio_offset = offset;
- control->aio_reqprio = 0;
- control->aio_sigevent.sigev_notify = SIGEV_SIGNAL;
- control->aio_sigevent.sigev_signo
- = SIGRTMIN + 1 + os_aio_get_array_no(array);
- /* TODO: How to choose the signal numbers? */
- /*
- fprintf(stderr, "AIO signal number %lu\n",
- (ulint) control->aio_sigevent.sigev_signo);
- */
- control->aio_sigevent.sigev_value.sival_ptr = slot;
-#endif
- os_mutex_exit(array->mutex);
-
- return(slot);
-}
-
-/***********************************************************************
-Frees a slot in the aio array. */
-static
-void
-os_aio_array_free_slot(
-/*===================*/
- os_aio_array_t* array, /* in: aio array */
- os_aio_slot_t* slot) /* in: pointer to slot */
-{
- ut_ad(array);
- ut_ad(slot);
-
- os_mutex_enter(array->mutex);
-
- ut_ad(slot->reserved);
-
- slot->reserved = FALSE;
- slot->status = OS_AIO_NOT_ISSUED;
-
- array->n_reserved--;
-
- if (array->n_reserved == array->n_slots - 1) {
- os_event_set(array->not_full);
- }
-
- if (array->n_reserved == 0) {
- os_event_set(array->is_empty);
- }
-
-#ifdef WIN_ASYNC_IO
- os_event_reset(slot->event);
-#endif
- os_mutex_exit(array->mutex);
-}
-
-/**************************************************************************
-Wakes up a simulated aio i/o-handler thread if it has something to do. */
-static
-void
-os_aio_simulated_wake_handler_thread(
-/*=================================*/
- os_aio_array_t* array) /* in: aio array for which wakeup is done */
-{
- os_aio_slot_t* slot;
- ulint n;
- ulint i;
-
- ut_ad(!os_aio_use_native_aio);
- n = array->n_slots;
-
- /* Look through n slots */
-
- os_mutex_enter(array->mutex);
-
- for (i = 0; i < n; i++) {
- slot = os_aio_array_get_nth_slot(array, i);
-
- if (slot->reserved &&
- (slot->status == OS_AIO_NOT_ISSUED ||
- slot->status == OS_AIO_DONE)) {
- /* Found an i/o request
- OS_AIO_NOT_ISSUED means the read or write request has
- * yet to be done. OS_AIO_DONE means the request has been
- * done but it was part of a set of requests merged into
- * one read or write call and was not the first block in
- * the request, so the handling of the IO completion for
- * that block has not been done. */
- break;
- }
- }
-
- os_mutex_exit(array->mutex);
-
- if (i < n) {
- if (array == os_aio_ibuf_array) {
- os_event_set(os_aio_segment_wait_events[0]);
-
- } else if (array == os_aio_log_array) {
- os_event_set(os_aio_segment_wait_events[1]);
-
- } else if (array == os_aio_read_array) {
- ulint x;
- for (x = os_aio_first_read_segment; x < os_aio_first_write_segment; x++)
- os_event_set(os_aio_segment_wait_events[x]);
-
- } else if (array == os_aio_write_array) {
- ulint x;
- for (x = os_aio_first_write_segment; x < os_aio_n_segments; x++)
- os_event_set(os_aio_segment_wait_events[x]);
-
- } else {
- ut_a(0);
- }
- }
-}
-
-/**************************************************************************
-Wakes up simulated aio i/o-handler threads if they have something to do. */
-
-void
-os_aio_simulated_wake_handler_threads(void)
-/*=======================================*/
-{
- if (os_aio_use_native_aio) {
- /* We do not use simulated aio: do nothing */
-
- return;
- }
-
- os_aio_recommend_sleep_for_read_threads = FALSE;
-
- os_aio_simulated_wake_handler_thread(os_aio_ibuf_array);
- os_aio_simulated_wake_handler_thread(os_aio_log_array);
- os_aio_simulated_wake_handler_thread(os_aio_read_array);
- os_aio_simulated_wake_handler_thread(os_aio_write_array);
-}
-
-/**************************************************************************
-This function can be called if one wants to post a batch of reads and
-prefers an i/o-handler thread to handle them all at once later. You must
-call os_aio_simulated_wake_handler_threads later to ensure the threads
-are not left sleeping! */
-
-void
-os_aio_simulated_put_read_threads_to_sleep(void)
-/*============================================*/
-{
- ulint g;
-
- /* TODO(mcallaghan): provide similar function for write? */
- os_aio_recommend_sleep_for_read_threads = TRUE;
-
- for (g = os_aio_first_read_segment; g < os_aio_first_write_segment; g++) {
- os_event_reset(os_aio_segment_wait_events[g]);
- }
-}
-
-/***********************************************************************
-Requests an asynchronous i/o operation. */
-
-ibool
-os_aio(
-/*===*/
- /* out: TRUE if request was queued
- successfully, FALSE if fail */
- ulint type, /* in: OS_FILE_READ or OS_FILE_WRITE */
- ulint mode, /* in: OS_AIO_NORMAL, ..., possibly ORed
- to OS_AIO_SIMULATED_WAKE_LATER: the
- last flag advises this function not to wake
- i/o-handler threads, but the caller will
- do the waking explicitly later, in this
- way the caller can post several requests in
- a batch; NOTE that the batch must not be
- so big that it exhausts the slots in aio
- arrays! NOTE that a simulated batch
- may introduce hidden chances of deadlocks,
- because i/os are not actually handled until
- all have been posted: use with great
- caution! */
- const char* name, /* in: name of the file or path as a
- null-terminated string */
- os_file_t file, /* in: handle to a file */
- void* buf, /* in: buffer where to read or from which
- to write */
- ulint offset, /* in: least significant 32 bits of file
- offset where to read or write */
- ulint offset_high, /* in: most significant 32 bits of
- offset */
- ulint n, /* in: number of bytes to read or write */
- fil_node_t* message1,/* in: messages for the aio handler (these
- can be used to identify a completed aio
- operation); if mode is OS_AIO_SYNC, these
- are ignored */
- void* message2)
-{
- os_aio_array_t* array;
- os_aio_slot_t* slot;
-#ifdef WIN_ASYNC_IO
- ibool retval;
- BOOL ret = TRUE;
- DWORD len = (DWORD) n;
- struct fil_node_struct * dummy_mess1;
- void* dummy_mess2;
- ulint dummy_type;
-#endif
- ulint err = 0;
- ibool retry;
- ulint wake_later;
-
- ut_ad(file);
- ut_ad(buf);
- ut_ad(n > 0);
- ut_ad(n % OS_FILE_LOG_BLOCK_SIZE == 0);
- ut_ad(offset % OS_FILE_LOG_BLOCK_SIZE == 0);
- ut_ad(os_aio_validate());
-
- wake_later = mode & OS_AIO_SIMULATED_WAKE_LATER;
- mode = mode & (~OS_AIO_SIMULATED_WAKE_LATER);
-
- if (mode == OS_AIO_SYNC
-#ifdef WIN_ASYNC_IO
- && !os_aio_use_native_aio
-#endif
- ) {
- /* This is actually an ordinary synchronous read or write:
- no need to use an i/o-handler thread. NOTE that if we use
- Windows async i/o, Windows does not allow us to use
- ordinary synchronous os_file_read etc. on the same file,
- therefore we have built a special mechanism for synchronous
- wait in the Windows case. */
-
- if (type == OS_FILE_READ) {
- return(os_file_read(file, buf, offset,
- offset_high, n));
- }
-
- ut_a(type == OS_FILE_WRITE);
-
- return(os_file_write(name, file, buf, offset, offset_high, n));
- }
-
-try_again:
- if (mode == OS_AIO_NORMAL) {
- if (type == OS_FILE_READ) {
- array = os_aio_read_array;
- } else {
- array = os_aio_write_array;
- }
- } else if (mode == OS_AIO_IBUF) {
- ut_ad(type == OS_FILE_READ);
- /* Reduce probability of deadlock bugs in connection with ibuf:
- do not let the ibuf i/o handler sleep */
-
- wake_later = FALSE;
-
- array = os_aio_ibuf_array;
- } else if (mode == OS_AIO_LOG) {
-
- array = os_aio_log_array;
- } else if (mode == OS_AIO_SYNC) {
- array = os_aio_sync_array;
- } else {
- array = NULL; /* Eliminate compiler warning */
- ut_error;
- }
-
- slot = os_aio_array_reserve_slot(type, array, message1, message2, file,
- name, buf, offset, offset_high, n);
- if (type == OS_FILE_READ) {
- if (os_aio_use_native_aio) {
-#ifdef WIN_ASYNC_IO
- os_n_file_reads++;
- os_bytes_read_since_printout += len;
-
- ret = ReadFile(file, buf, (DWORD)n, &len,
- &(slot->control));
-#elif defined(POSIX_ASYNC_IO)
- slot->control.aio_lio_opcode = LIO_READ;
- err = (ulint) aio_read(&(slot->control));
- fprintf(stderr, "Starting POSIX aio read %lu\n", err);
-#endif
- } else {
- if (!wake_later) {
- os_aio_simulated_wake_handler_thread(array);
- }
- }
- } else if (type == OS_FILE_WRITE) {
- if (os_aio_use_native_aio) {
-#ifdef WIN_ASYNC_IO
- os_n_file_writes++;
- ret = WriteFile(file, buf, (DWORD)n, &len,
- &(slot->control));
-#elif defined(POSIX_ASYNC_IO)
- slot->control.aio_lio_opcode = LIO_WRITE;
- err = (ulint) aio_write(&(slot->control));
- fprintf(stderr, "Starting POSIX aio write %lu\n", err);
-#endif
- } else {
- if (!wake_later) {
- os_aio_simulated_wake_handler_thread(array);
- }
- }
- } else {
- ut_error;
- }
-
-#ifdef WIN_ASYNC_IO
- if (os_aio_use_native_aio) {
- if ((ret && len == n)
- || (!ret && GetLastError() == ERROR_IO_PENDING)) {
- /* aio was queued successfully! */
-
- if (mode == OS_AIO_SYNC) {
- /* We want a synchronous i/o operation on a
- file where we also use async i/o: in Windows
- we must use the same wait mechanism as for
- async i/o */
-
- retval = os_aio_windows_handle(ULINT_UNDEFINED,
- slot->pos,
- &dummy_mess1,
- &dummy_mess2,
- &dummy_type);
-
- return(retval);
- }
-
- return(TRUE);
- }
-
- err = 1; /* Fall through the next if */
- }
-#endif
- if (err == 0) {
- /* aio was queued successfully! */
-
- return(TRUE);
- }
-
- os_aio_array_free_slot(array, slot);
-
- retry = os_file_handle_error(name,
- type == OS_FILE_READ
- ? "aio read" : "aio write");
- if (retry) {
-
- goto try_again;
- }
-
- return(FALSE);
-}
-
-#ifdef WIN_ASYNC_IO
-/**************************************************************************
-This function is only used in Windows asynchronous i/o.
-Waits for an aio operation to complete. This function is used to wait the
-for completed requests. The aio array of pending requests is divided
-into segments. The thread specifies which segment or slot it wants to wait
-for. NOTE: this function will also take care of freeing the aio slot,
-therefore no other thread is allowed to do the freeing! */
-
-ibool
-os_aio_windows_handle(
-/*==================*/
- /* out: TRUE if the aio operation succeeded */
- ulint global_segment, /* in: the number of the segment in the aio
- arrays to wait for; segment 0 is the ibuf
- i/o thread, segment 1 the log i/o thread,
- then follow the non-ibuf read threads, and as
- the last are the non-ibuf write threads; if
- this is ULINT_UNDEFINED, then it means that
- sync aio is used, and this parameter is
- ignored */
- ulint pos, /* this parameter is used only in sync aio:
- wait for the aio slot at this position */
- fil_node_t**message1, /* out: the messages passed with the aio
- request; note that also in the case where
- the aio operation failed, these output
- parameters are valid and can be used to
- restart the operation, for example */
- void** message2,
- ulint* type) /* out: OS_FILE_WRITE or ..._READ */
-{
- os_aio_array_t* array;
- os_aio_slot_t* slot;
- ulint n;
- ulint i;
- ibool ret_val;
- BOOL ret;
- DWORD len;
-
- if (global_segment == ULINT_UNDEFINED) {
- array = os_aio_sync_array;
- } else {
- array = os_aio_get_array(global_segment);
- }
-
- /* NOTE! We only access constant fields in os_aio_array. Therefore
- we do not have to acquire the protecting mutex yet */
-
- ut_ad(os_aio_validate());
-
- n = array->n_slots;
-
- if (array == os_aio_sync_array) {
- os_event_wait(os_aio_array_get_nth_slot(array, pos)->event);
- i = pos;
- } else {
- srv_set_io_thread_op_info(global_segment, "wait Windows aio");
- i = os_event_wait_multiple(n, (array->native_events));
- }
-
- os_mutex_enter(array->mutex);
-
- slot = os_aio_array_get_nth_slot(array, i);
-
- ut_a(slot->reserved);
-
- if (global_segment != ULINT_UNDEFINED) {
- srv_set_io_thread_op_info(global_segment,
- "get windows aio return value");
- }
-
- ret = GetOverlappedResult(slot->file, &(slot->control), &len, TRUE);
-
- *message1 = slot->message1;
- *message2 = slot->message2;
-
- *type = slot->type;
-
- if (ret && len == slot->len) {
- ret_val = TRUE;
-
-# ifdef UNIV_DO_FLUSH
- if (slot->type == OS_FILE_WRITE
- && !os_do_not_call_flush_at_each_write) {
- ut_a(TRUE == os_file_flush(slot->file));
- }
-# endif /* UNIV_DO_FLUSH */
- } else {
- os_file_handle_error(slot->name, "Windows aio");
-
- ret_val = FALSE;
- }
-
- os_mutex_exit(array->mutex);
-
- os_aio_array_free_slot(array, slot);
-
- return(ret_val);
-}
-#endif
-
-#ifdef POSIX_ASYNC_IO
-
-/**************************************************************************
-This function is only used in Posix asynchronous i/o. Waits for an aio
-operation to complete. */
-
-ibool
-os_aio_posix_handle(
-/*================*/
- /* out: TRUE if the aio operation succeeded */
- ulint array_no, /* in: array number 0 - 3 */
- fil_node_t**message1, /* out: the messages passed with the aio
- request; note that also in the case where
- the aio operation failed, these output
- parameters are valid and can be used to
- restart the operation, for example */
- void** message2)
-{
- os_aio_array_t* array;
- os_aio_slot_t* slot;
- siginfo_t info;
- sigset_t sigset;
- sigset_t proc_sigset;
- sigset_t thr_sigset;
- int ret;
- int i;
- int sig;
-
- sigemptyset(&sigset);
- sigaddset(&sigset, SIGRTMIN + 1 + array_no);
-
- pthread_sigmask(SIG_UNBLOCK, &sigset, NULL);
-
-#if 0
- sigprocmask(0, NULL, &proc_sigset);
- pthread_sigmask(0, NULL, &thr_sigset);
-
- for (i = 32 ; i < 40; i++) {
- fprintf(stderr, "%lu : %lu %lu\n", (ulint)i,
- (ulint) sigismember(&proc_sigset, i),
- (ulint) sigismember(&thr_sigset, i));
- }
-#endif
-
- ret = sigwaitinfo(&sigset, &info);
-
- if (sig != SIGRTMIN + 1 + array_no) {
-
- ut_error;
-
- return(FALSE);
- }
-
- fputs("Handling POSIX aio\n", stderr);
-
- array = os_aio_get_array_from_no(array_no);
-
- os_mutex_enter(array->mutex);
-
- slot = info.si_value.sival_ptr;
-
- ut_a(slot->reserved);
-
- *message1 = slot->message1;
- *message2 = slot->message2;
-
-# ifdef UNIV_DO_FLUSH
- if (slot->type == OS_FILE_WRITE
- && !os_do_not_call_flush_at_each_write) {
- ut_a(TRUE == os_file_flush(slot->file));
- }
-# endif /* UNIV_DO_FLUSH */
-
- os_mutex_exit(array->mutex);
-
- os_aio_array_free_slot(array, slot);
-
- return(TRUE);
-}
-#endif
-
-/**************************************************************************
-Do a 'last millisecond' check that the page end is sensible;
-reported page checksum errors from Linux seem to wipe over the page end. */
-static
-void
-os_file_check_page_trailers(
-/*========================*/
- byte* combined_buf, /* in: combined write buffer */
- ulint total_len) /* in: size of combined_buf, in bytes
- (a multiple of UNIV_PAGE_SIZE) */
-{
- ulint len;
-
- for (len = 0; len + UNIV_PAGE_SIZE <= total_len;
- len += UNIV_PAGE_SIZE) {
- byte* buf = combined_buf + len;
-
- if (UNIV_UNLIKELY
- (memcmp(buf + (FIL_PAGE_LSN + 4),
- buf + (UNIV_PAGE_SIZE
- - FIL_PAGE_END_LSN_OLD_CHKSUM + 4), 4))) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: ERROR: The page to be written"
- " seems corrupt!\n"
- "InnoDB: Writing a block of %lu bytes,"
- " currently at offset %lu\n",
- (ulong)total_len, (ulong)len);
- buf_page_print(buf);
- fprintf(stderr,
- "InnoDB: ERROR: The page to be written"
- " seems corrupt!\n");
- }
- }
-}
-
-/**************************************************************************
-Does simulated aio. This function should be called by an i/o-handler
-thread. */
-
-ibool
-os_aio_simulated_handle(
-/*====================*/
- /* out: TRUE if the aio operation succeeded */
- ulint global_segment, /* in: the number of the segment in the aio
- arrays to wait for; segment 0 is the ibuf
- i/o thread, segment 1 the log i/o thread,
- then follow the non-ibuf read threads, and as
- the last are the non-ibuf write threads */
- fil_node_t**message1, /* out: the messages passed with the aio
- request; note that also in the case where
- the aio operation failed, these output
- parameters are valid and can be used to
- restart the operation, for example */
- void** message2,
- ulint* type) /* out: OS_FILE_WRITE or ..._READ */
-{
- os_aio_array_t* array;
- os_aio_slot_t* slot;
- os_aio_slot_t* slot2;
- os_aio_slot_t* consecutive_ios[OS_AIO_MERGE_N_CONSECUTIVE];
- os_aio_slot_t* lowest_request;
- os_aio_slot_t* oldest_request;
- ulint n_consecutive;
- ulint total_len;
- ulint offs;
- ulint lowest_offset;
- ulint oldest_offset;
- ulint biggest_age;
- ulint age;
- byte* combined_buf;
- byte* combined_buf2;
- ibool ret;
- ulint n;
- ulint i;
-
- double start_usecs, stop_usecs, elapsed_usecs;
- time_t now;
- array = os_aio_get_array(global_segment);
-
-restart:
- /* NOTE! We only access constant fields in os_aio_array. Therefore
- we do not have to acquire the protecting mutex yet */
-
- srv_set_io_thread_op_info(global_segment,
- "looking for i/o requests (a)");
- ut_ad(os_aio_validate());
-
- n = array->n_slots;
-
- /* Look through n slots */
-
- if (array == os_aio_read_array
- && os_aio_recommend_sleep_for_read_threads) {
-
- /* Give other threads chance to add several i/os to the array
- at once. */
-
- goto recommended_sleep;
- }
-
- os_mutex_enter(array->mutex);
-
- srv_set_io_thread_op_info(global_segment,
- "looking for i/o requests (b)");
-
- /* Check if there is a slot for which the i/o has already been
- done */
-
- for (i = 0; i < n; i++) {
- slot = os_aio_array_get_nth_slot(array, i);
-
- if (slot->reserved && slot->status == OS_AIO_DONE) {
-
- if (os_aio_print_debug) {
- fprintf(stderr,
- "InnoDB: i/o for slot %lu"
- " already done, returning\n",
- (ulong) i);
- }
-
- ret = TRUE;
-
- goto slot_io_done;
- }
- }
-
- biggest_age = 0;
- now = time(NULL);
- oldest_request = lowest_request = NULL;
- oldest_offset = lowest_offset = ULINT_MAX;
-
- /* Find the oldest request and the request with the smallest offset */
- for (i = 0; i < n; i++) {
- slot = os_aio_array_get_nth_slot(array, i);
-
- if (slot->reserved && slot->status == OS_AIO_NOT_ISSUED) {
- age = (ulint)difftime(now, slot->reservation_time);
-
- /* If there are at least 2 seconds old requests, then pick the oldest
- one to prevent starvation. If several requests have the same age,
- then pick the one at the lowest offset. */
- if ((age >= 2 && age > biggest_age)
- || (age >= 2 && age == biggest_age
- && slot->offset < oldest_offset)) {
-
- /* Found an i/o request */
- biggest_age = age;
- oldest_request = slot;
- oldest_offset = slot->offset;
- }
-
- /* Look for an i/o request at the lowest offset in the array
- * (we ignore the high 32 bits of the offset) */
- if (slot->offset < lowest_offset) {
- /* Found an i/o request */
- lowest_request = slot;
-
-
-
- lowest_offset = slot->offset;
- }
- }
- }
-
- if (!lowest_request && !oldest_request) {
-
- /* No i/o requested at the moment */
-
- goto wait_for_io;
- }
-
- if (oldest_request) {
- slot = oldest_request;
- } else {
- slot = lowest_request;
- }
- consecutive_ios[0] = slot;
- n_consecutive = 1;
-
- /* Check if there are several consecutive blocks to read or write */
-
-consecutive_loop:
- for (i = 0; i < n; i++) {
- slot2 = os_aio_array_get_nth_slot(array, i);
-
- if (slot2->reserved && slot2 != slot
- && slot2->offset == slot->offset + slot->len
- /* check that sum does not wrap over */
- && slot->offset + slot->len > slot->offset
- && slot2->offset_high == slot->offset_high
- && slot2->type == slot->type
- && slot2->file == slot->file
- && slot2->status == OS_AIO_NOT_ISSUED) {
-
- /* Found a consecutive i/o request */
-
- consecutive_ios[n_consecutive] = slot2;
- n_consecutive++;
-
- slot = slot2;
-
- if (n_consecutive < OS_AIO_MERGE_N_CONSECUTIVE &&
- n_consecutive < srv_max_merged_io) {
-
- goto consecutive_loop;
- } else {
- break;
- }
- }
- }
-
- srv_set_io_thread_op_info(global_segment, "consecutive i/o requests");
-
- /* We have now collected n_consecutive i/o requests in the array;
- allocate a single buffer which can hold all data, and perform the
- i/o */
-
- total_len = 0;
- slot = consecutive_ios[0];
-
- for (i = 0; i < n_consecutive; i++) {
- total_len += consecutive_ios[i]->len;
- ut_a(consecutive_ios[i]->status == OS_AIO_NOT_ISSUED);
- consecutive_ios[i]->status = OS_AIO_ISSUED;
- }
-
- if (n_consecutive == 1) {
- /* We can use the buffer of the i/o request */
- combined_buf = slot->buf;
- combined_buf2 = NULL;
- } else {
- if ((total_len + UNIV_PAGE_SIZE) > os_aio_thread_buffer_size[global_segment]) {
-
- if (os_aio_thread_buffer[global_segment])
- ut_free(os_aio_thread_buffer[global_segment]);
-
- os_aio_thread_buffer[global_segment] = ut_malloc(total_len + UNIV_PAGE_SIZE);
-
- os_aio_thread_buffer_size[global_segment] = total_len + UNIV_PAGE_SIZE;
- }
- combined_buf2 = os_aio_thread_buffer[global_segment];
-
- ut_a(combined_buf2);
-
- combined_buf = ut_align(combined_buf2, UNIV_PAGE_SIZE);
- }
-
- /* We release the array mutex for the time of the i/o: NOTE that
- this assumes that there is just one i/o-handler thread serving
- a single segment of slots! */
-
- ut_a(slot->reserved);
- ut_a(slot->status == OS_AIO_ISSUED);
-
- os_mutex_exit(array->mutex);
-
- if (slot->type == OS_FILE_WRITE && n_consecutive > 1) {
- /* Copy the buffers to the combined buffer */
- offs = 0;
-
- for (i = 0; i < n_consecutive; i++) {
-
- ut_memcpy(combined_buf + offs, consecutive_ios[i]->buf,
- consecutive_ios[i]->len);
- offs += consecutive_ios[i]->len;
- }
- }
-
- srv_set_io_thread_op_info(global_segment, "doing file i/o");
-
- if (os_aio_print_debug) {
- fprintf(stderr,
- "InnoDB: doing i/o of type %lu at offset %lu %lu,"
- " length %lu\n",
- (ulong) slot->type, (ulong) slot->offset_high,
- (ulong) slot->offset, (ulong) total_len);
- }
-
- /* Do the i/o with ordinary, synchronous i/o functions: */
- if (slot->type == OS_FILE_WRITE) {
- os_aio_thread_io_writes[global_segment] += n_consecutive;
- if (array == os_aio_write_array) {
- if ((total_len % UNIV_PAGE_SIZE != 0)
- || (slot->offset % UNIV_PAGE_SIZE != 0)) {
- fprintf(stderr,
- "InnoDB: Error: trying a displaced"
- " write to %s %lu %lu, len %lu\n",
- slot->name, (ulong) slot->offset_high,
- (ulong) slot->offset,
- (ulong) total_len);
- ut_error;
- }
-
- os_file_check_page_trailers(combined_buf, total_len);
- }
- start_usecs = time_usecs();
- ret = os_file_write(slot->name, slot->file, combined_buf,
- slot->offset, slot->offset_high,
- total_len);
- stop_usecs = time_usecs();
- elapsed_usecs = stop_usecs - start_usecs;
- if (elapsed_usecs < 0) elapsed_usecs = 0;
-
- if (array == os_aio_write_array) {
- os_file_check_page_trailers(combined_buf, total_len);
- }
- } else {
- start_usecs = time_usecs();
- os_aio_thread_io_reads[global_segment] += n_consecutive;
- ret = os_file_read(slot->file, combined_buf,
- slot->offset, slot->offset_high, total_len);
- stop_usecs = time_usecs();
- elapsed_usecs = stop_usecs - start_usecs;
- if (elapsed_usecs < 0) elapsed_usecs = 0;
- }
- if (elapsed_usecs > os_aio_thread_max_io_wait[global_segment])
- os_aio_thread_max_io_wait[global_segment] = elapsed_usecs;
- os_aio_thread_io_wait[global_segment] += elapsed_usecs;
- os_aio_thread_io_requests[global_segment]++;
-
- ut_a(ret);
- srv_set_io_thread_op_info(global_segment, "file i/o done");
-
-#if 0
- fprintf(stderr,
- "aio: %lu consecutive %lu:th segment, first offs %lu blocks\n",
- n_consecutive, global_segment, slot->offset / UNIV_PAGE_SIZE);
-#endif
-
- if (slot->type == OS_FILE_READ && n_consecutive > 1) {
- /* Copy the combined buffer to individual buffers */
- offs = 0;
-
- for (i = 0; i < n_consecutive; i++) {
-
- ut_memcpy(consecutive_ios[i]->buf, combined_buf + offs,
- consecutive_ios[i]->len);
- offs += consecutive_ios[i]->len;
- }
- }
-
- os_mutex_enter(array->mutex);
-
- /* Mark the i/os done in slots */
-
- for (i = 0; i < n_consecutive; i++) {
- ut_a(consecutive_ios[i]->status == OS_AIO_ISSUED);
- consecutive_ios[i]->status = OS_AIO_DONE;
- }
-
- /* We return the messages for the first slot now, and if there were
- several slots, the messages will be returned with subsequent calls
- of this function */
-
-slot_io_done:
-
- ut_a(slot->reserved);
- ut_a(slot->status == OS_AIO_DONE);
- slot->status = OS_AIO_CLAIMED;
-
- *message1 = slot->message1;
- *message2 = slot->message2;
-
- *type = slot->type;
-
- os_mutex_exit(array->mutex);
-
- os_aio_array_free_slot(array, slot);
- srv_set_io_thread_op_info(global_segment, "exited handler");
-
- return(ret);
-
-wait_for_io:
- srv_set_io_thread_op_info(global_segment, "resetting wait event");
-
- /* We wait here until there again can be i/os in the segment
- of this thread */
-
- os_event_reset(os_aio_segment_wait_events[global_segment]);
-
- os_mutex_exit(array->mutex);
-
-recommended_sleep:
- srv_set_io_thread_op_info(global_segment, "waiting for i/o request");
-
- os_event_wait(os_aio_segment_wait_events[global_segment]);
-
- if (os_aio_print_debug) {
- fprintf(stderr,
- "InnoDB: i/o handler thread for i/o"
- " segment %lu wakes up\n",
- (ulong) global_segment);
- }
-
- goto restart;
-}
-
-/**************************************************************************
-Validates the consistency of an aio array. */
-static
-ibool
-os_aio_array_validate(
-/*==================*/
- /* out: TRUE if ok */
- os_aio_array_t* array) /* in: aio wait array */
-{
- os_aio_slot_t* slot;
- ulint n_reserved = 0;
- ulint i;
-
- ut_a(array);
-
- os_mutex_enter(array->mutex);
-
- ut_a(array->n_slots > 0);
-
- for (i = 0; i < array->n_slots; i++) {
- slot = os_aio_array_get_nth_slot(array, i);
-
- if (slot->reserved) {
- n_reserved++;
- ut_a(slot->len > 0);
- }
- }
-
- ut_a(array->n_reserved == n_reserved);
-
- os_mutex_exit(array->mutex);
-
- return(TRUE);
-}
-
-/**************************************************************************
-Validates the consistency the aio system. */
-
-ibool
-os_aio_validate(void)
-/*=================*/
- /* out: TRUE if ok */
-{
- os_aio_array_validate(os_aio_read_array);
- os_aio_array_validate(os_aio_write_array);
- os_aio_array_validate(os_aio_ibuf_array);
- os_aio_array_validate(os_aio_log_array);
- os_aio_array_validate(os_aio_sync_array);
-
- return(TRUE);
-}
-
-/**************************************************************************
-Prints info of the aio arrays. */
-
-void
-os_aio_print(
-/*=========*/
- FILE* file) /* in: file where to print */
-{
- os_aio_array_t* array;
- os_aio_slot_t* slot;
- ulint n_reserved;
- time_t current_time;
- double time_elapsed;
- double avg_bytes_read;
- ulint i;
- ulint num_issued, num_done, num_claimed;
-
- for (i = 0; i < os_aio_n_segments; i++) {
- fprintf(file,
- "I/O thread %lu state: %s (%s) reads %lu writes %lu "
- "requests %lu io secs %lf io msecs/request %lf max_io_wait %lf",
- i, srv_io_thread_op_info[i], srv_io_thread_function[i],
- os_aio_thread_io_reads[i], os_aio_thread_io_writes[i],
- os_aio_thread_io_requests[i],
- os_aio_thread_io_wait[i] / 1000000.0,
- os_aio_thread_io_requests[i] ?
- os_aio_thread_io_wait[i] / os_aio_thread_io_requests[i] / 1000.0 : 0.0,
- os_aio_thread_max_io_wait[i] / 1000.0);
-
-#ifndef __WIN__
- if (os_aio_segment_wait_events[i]->is_set) {
- fprintf(file, " ev set");
- }
-#endif
-
- fprintf(file, "\n");
- }
-
- fputs("Pending normal aio reads:", file);
-
- array = os_aio_read_array;
-loop:
- ut_a(array);
-
- os_mutex_enter(array->mutex);
-
- ut_a(array->n_slots > 0);
- n_reserved = 0;
- num_done = num_issued = num_claimed = 0;
-
- for (i = 0; i < array->n_slots; i++) {
- slot = os_aio_array_get_nth_slot(array, i);
-
- if (slot->reserved) {
- if (slot->status == OS_AIO_ISSUED)
- num_issued++;
- else if (slot->status == OS_AIO_DONE)
- num_done++;
- else {
- ut_ad(slot->status == OS_AIO_CLAIMED);
- num_claimed++;
- }
- n_reserved++;
-#if 0
- fprintf(stderr, "Reserved slot, messages %p %p\n",
- (void*) slot->message1,
- (void*) slot->message2);
-#endif
- ut_a(slot->len > 0);
- }
- }
-
- ut_a(array->n_reserved == n_reserved);
-
- fprintf(file, " %lu", (ulong) n_reserved);
-
- os_mutex_exit(array->mutex);
-
- if (array == os_aio_read_array) {
- fputs(", aio writes:", file);
-
- array = os_aio_write_array;
-
- goto loop;
- }
-
- if (array == os_aio_write_array) {
- fputs(",\n ibuf aio reads:", file);
- array = os_aio_ibuf_array;
-
- goto loop;
- }
-
- if (array == os_aio_ibuf_array) {
- fputs(", log i/o's:", file);
- array = os_aio_log_array;
-
- goto loop;
- }
-
- if (array == os_aio_log_array) {
- fputs(", sync i/o's:", file);
- array = os_aio_sync_array;
-
- goto loop;
- }
-
- putc('\n', file);
- fprintf(file,
- "Summary of background IO slot status: %lu issued, "
- "%lu done, %lu claimed, sleep set %d\n",
- num_issued, num_done, num_claimed,
- (int)os_aio_recommend_sleep_for_read_threads);
-
- putc('\n', file);
- current_time = time(NULL);
- time_elapsed = 0.001 + difftime(current_time, os_last_printout);
-
- fprintf(file,
- "Pending flushes (fsync) log: %lu; buffer pool: %lu\n"
- "%lu OS file reads, %lu OS file writes, %lu OS fsyncs\n",
- (ulong) fil_n_pending_log_flushes,
- (ulong) fil_n_pending_tablespace_flushes,
- (ulong) os_n_file_reads, (ulong) os_n_file_writes,
- (ulong) os_n_fsyncs);
-
- if (os_file_n_pending_preads != 0 || os_file_n_pending_pwrites != 0) {
- fprintf(file,
- "%lu pending preads, %lu pending pwrites\n",
- (ulong) os_file_n_pending_preads,
- (ulong) os_file_n_pending_pwrites);
- }
-
- if (os_n_file_reads == os_n_file_reads_old) {
- avg_bytes_read = 0.0;
- } else {
- avg_bytes_read = (double) os_bytes_read_since_printout
- / (os_n_file_reads - os_n_file_reads_old);
- }
-
- fprintf(file,
- "%.2f reads/s, %lu avg bytes/read,"
- " %.2f writes/s, %.2f fsyncs/s\n",
- (os_n_file_reads - os_n_file_reads_old)
- / time_elapsed,
- (ulong)avg_bytes_read,
- (os_n_file_writes - os_n_file_writes_old)
- / time_elapsed,
- (os_n_fsyncs - os_n_fsyncs_old)
- / time_elapsed);
-
- os_n_file_reads_old = os_n_file_reads;
- os_n_file_writes_old = os_n_file_writes;
- os_n_fsyncs_old = os_n_fsyncs;
- os_bytes_read_since_printout = 0;
-
- os_last_printout = current_time;
-}
-
-/**************************************************************************
-Refreshes the statistics used to print per-second averages. */
-
-void
-os_aio_refresh_stats(void)
-/*======================*/
-{
- os_n_file_reads_old = os_n_file_reads;
- os_n_file_writes_old = os_n_file_writes;
- os_n_fsyncs_old = os_n_fsyncs;
- os_bytes_read_since_printout = 0;
-
- os_last_printout = time(NULL);
-}
-
-#ifdef UNIV_DEBUG
-/**************************************************************************
-Checks that all slots in the system have been freed, that is, there are
-no pending io operations. */
-
-ibool
-os_aio_all_slots_free(void)
-/*=======================*/
- /* out: TRUE if all free */
-{
- os_aio_array_t* array;
- ulint n_res = 0;
-
- array = os_aio_read_array;
-
- os_mutex_enter(array->mutex);
-
- n_res += array->n_reserved;
-
- os_mutex_exit(array->mutex);
-
- array = os_aio_write_array;
-
- os_mutex_enter(array->mutex);
-
- n_res += array->n_reserved;
-
- os_mutex_exit(array->mutex);
-
- array = os_aio_ibuf_array;
-
- os_mutex_enter(array->mutex);
-
- n_res += array->n_reserved;
-
- os_mutex_exit(array->mutex);
-
- array = os_aio_log_array;
-
- os_mutex_enter(array->mutex);
-
- n_res += array->n_reserved;
-
- os_mutex_exit(array->mutex);
-
- array = os_aio_sync_array;
-
- os_mutex_enter(array->mutex);
-
- n_res += array->n_reserved;
-
- os_mutex_exit(array->mutex);
-
- if (n_res == 0) {
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-#endif /* UNIV_DEBUG */
diff --git a/storage/innobase/os/os0proc.c b/storage/innobase/os/os0proc.c
deleted file mode 100644
index a99fe8b6a0e..00000000000
--- a/storage/innobase/os/os0proc.c
+++ /dev/null
@@ -1,674 +0,0 @@
-/******************************************************
-The interface to the operating system
-process control primitives
-
-(c) 1995 Innobase Oy
-
-Created 9/30/1995 Heikki Tuuri
-*******************************************************/
-
-#include "os0proc.h"
-#ifdef UNIV_NONINL
-#include "os0proc.ic"
-#endif
-
-#include "ut0mem.h"
-#include "ut0byte.h"
-
-
-/*
-How to get AWE to compile on Windows?
--------------------------------------
-
-In the project settings of the innobase project the Visual C++ source,
-__WIN2000__ has to be defined.
-
-The Visual C++ has to be relatively recent and _WIN32_WINNT has to be
-defined to a value >= 0x0500 when windows.h is included.
-
-#define _WIN32_WINNT 0x0500
-
-Where does AWE work?
--------------------
-
-See the error message in os_awe_allocate_physical_mem().
-
-How to assign privileges for mysqld to use AWE?
------------------------------------------------
-
-See the error message in os_awe_enable_lock_pages_in_mem().
-
-Use Windows AWE functions in this order
----------------------------------------
-
-(1) os_awe_enable_lock_pages_in_mem();
-(2) os_awe_allocate_physical_mem();
-(3) os_awe_allocate_virtual_mem_window();
-(4) os_awe_map_physical_mem_to_window().
-
-To test 'AWE' in a computer which does not have the AWE API,
-you can compile with UNIV_SIMULATE_AWE defined in this file.
-*/
-
-#ifdef UNIV_SIMULATE_AWE
-/* If we simulate AWE, we allocate the 'physical memory' here */
-byte* os_awe_simulate_mem;
-ulint os_awe_simulate_mem_size;
-os_awe_t* os_awe_simulate_page_info;
-byte* os_awe_simulate_window;
-ulint os_awe_simulate_window_size;
-/* In simulated AWE the following contains a NULL pointer or a pointer
-to a mapped 'physical page' for each 4 kB page in the AWE window */
-byte** os_awe_simulate_map;
-#endif
-
-#ifdef __WIN2000__
-os_awe_t* os_awe_page_info;
-ulint os_awe_n_pages;
-byte* os_awe_window;
-ulint os_awe_window_size;
-#endif
-
-ibool os_use_large_pages;
-/* Large page size. This may be a boot-time option on some platforms */
-ulint os_large_page_size;
-
-/********************************************************************
-Windows AWE support. Tries to enable the "lock pages in memory" privilege for
-the current process so that the current process can allocate memory-locked
-virtual address space to act as the window where AWE maps physical memory. */
-
-ibool
-os_awe_enable_lock_pages_in_mem(void)
-/*=================================*/
- /* out: TRUE if success, FALSE if error;
- prints error info to stderr if no success */
-{
-#ifdef UNIV_SIMULATE_AWE
-
- return(TRUE);
-
-#elif defined(__WIN2000__)
- struct {
- DWORD Count;
- LUID_AND_ATTRIBUTES Privilege[1];
- } Info;
- HANDLE hProcess;
- HANDLE Token;
- BOOL Result;
-
- hProcess = GetCurrentProcess();
-
- /* Open the token of the current process */
-
- Result = OpenProcessToken(hProcess,
- TOKEN_ADJUST_PRIVILEGES, &Token);
- if (Result != TRUE) {
- fprintf(stderr,
- "InnoDB: AWE: Cannot open process token, error %lu\n",
- (ulint)GetLastError());
- return(FALSE);
- }
-
- Info.Count = 1;
-
- Info.Privilege[0].Attributes = SE_PRIVILEGE_ENABLED;
-
- /* Get the local unique identifier (LUID) of the SE_LOCK_MEMORY
- privilege */
-
- Result = LookupPrivilegeValue(NULL, SE_LOCK_MEMORY_NAME,
- &(Info.Privilege[0].Luid));
- if (Result != TRUE) {
- fprintf(stderr,
- "InnoDB: AWE: Cannot get local privilege"
- " value for %s, error %lu.\n",
- SE_LOCK_MEMORY_NAME, (ulint)GetLastError());
-
- return(FALSE);
- }
-
- /* Try to adjust the privilege */
-
- Result = AdjustTokenPrivileges(Token, FALSE,
- (PTOKEN_PRIVILEGES)&Info,
- 0, NULL, NULL);
- /* Check the result */
-
- if (Result != TRUE) {
- fprintf(stderr,
- "InnoDB: AWE: Cannot adjust process token privileges,"
- " error %u.\n",
- GetLastError());
- return(FALSE);
- } else if (GetLastError() != ERROR_SUCCESS) {
- fprintf(stderr,
- "InnoDB: AWE: Cannot enable SE_LOCK_MEMORY privilege,"
- " error %lu.\n"
- "InnoDB: In Windows XP Home you cannot use AWE."
- " In Windows 2000 and XP\n"
- "InnoDB: Professional you must go to the"
- " Control Panel, to\n"
- "InnoDB: Security Settings, to Local Policies,"
- " and enable\n"
- "InnoDB: the 'lock pages in memory' privilege"
- " for the user who runs\n"
- "InnoDB: the MySQL server.\n", GetLastError());
-
- return(FALSE);
- }
-
- CloseHandle(Token);
-
- return(TRUE);
-#else
-#ifdef __WIN__
- fprintf(stderr,
- "InnoDB: AWE: Error: to use AWE you must use"
- " a ...-nt MySQL executable.\n");
-#endif
- return(FALSE);
-#endif
-}
-
-/********************************************************************
-Allocates physical RAM memory up to 64 GB in an Intel 32-bit x86
-processor. */
-
-ibool
-os_awe_allocate_physical_mem(
-/*=========================*/
- /* out: TRUE if success */
- os_awe_t** page_info, /* out, own: array of opaque data containing
- the info for allocated physical memory pages;
- each allocated 4 kB physical memory page has
- one slot of type os_awe_t in the array */
- ulint n_megabytes) /* in: number of megabytes to allocate */
-{
-#ifdef UNIV_SIMULATE_AWE
- os_awe_simulate_page_info = ut_malloc
- (sizeof(os_awe_t) * n_megabytes
- * ((1024 * 1024) / OS_AWE_X86_PAGE_SIZE));
-
- os_awe_simulate_mem
- = ut_align(ut_malloc(4096 + 1024 * 1024 * n_megabytes), 4096);
- os_awe_simulate_mem_size = n_megabytes * 1024 * 1024;
-
- *page_info = os_awe_simulate_page_info;
-
- return(TRUE);
-
-#elif defined(__WIN2000__)
- BOOL bResult;
- os_awe_t NumberOfPages; /* Question: why does Windows
- use the name ULONG_PTR for
- a scalar integer type? Maybe
- because we may also refer to
- &NumberOfPages? */
- os_awe_t NumberOfPagesInitial;
- SYSTEM_INFO sSysInfo;
- int PFNArraySize;
-
- if (n_megabytes > 64 * 1024) {
-
- fprintf(stderr,
- "InnoDB: AWE: Error: tried to allocate %lu MB.\n"
- "InnoDB: AWE cannot allocate more than"
- " 64 GB in any computer.\n", n_megabytes);
-
- return(FALSE);
- }
-
- GetSystemInfo(&sSysInfo); /* fill the system information structure */
-
- if ((ulint)OS_AWE_X86_PAGE_SIZE != (ulint)sSysInfo.dwPageSize) {
- fprintf(stderr,
- "InnoDB: AWE: Error: this computer has a page size"
- " of %lu.\n"
- "InnoDB: Should be 4096 bytes for"
- " InnoDB AWE support to work.\n",
- (ulint)sSysInfo.dwPageSize);
-
- return(FALSE);
- }
-
- /* Calculate the number of pages of memory to request */
-
- NumberOfPages = n_megabytes * ((1024 * 1024) / OS_AWE_X86_PAGE_SIZE);
-
- /* Calculate the size of page_info for allocated physical pages */
-
- PFNArraySize = NumberOfPages * sizeof(os_awe_t);
-
- *page_info = (os_awe_t*)HeapAlloc(GetProcessHeap(), 0, PFNArraySize);
-
- if (*page_info == NULL) {
- fprintf(stderr,
- "InnoDB: AWE: Failed to allocate page info"
- " array from process heap, error %lu\n",
- (ulint)GetLastError());
-
- return(FALSE);
- }
-
- ut_total_allocated_memory += PFNArraySize;
-
- /* Enable this process' privilege to lock pages to physical memory */
-
- if (!os_awe_enable_lock_pages_in_mem()) {
-
- return(FALSE);
- }
-
- /* Allocate the physical memory */
-
- NumberOfPagesInitial = NumberOfPages;
-
- os_awe_page_info = *page_info;
- os_awe_n_pages = (ulint)NumberOfPages;
-
- /* Compilation note: if the compiler complains the function is not
- defined, see the note at the start of this file */
-
- bResult = AllocateUserPhysicalPages(GetCurrentProcess(),
- &NumberOfPages, *page_info);
- if (bResult != TRUE) {
- fprintf(stderr,
- "InnoDB: AWE: Cannot allocate physical pages,"
- " error %lu.\n",
- (ulint)GetLastError());
-
- return(FALSE);
- }
-
- if (NumberOfPagesInitial != NumberOfPages) {
- fprintf(stderr,
- "InnoDB: AWE: Error: allocated only %lu pages"
- " of %lu requested.\n"
- "InnoDB: Check that you have enough free RAM.\n"
- "InnoDB: In Windows XP Professional and"
- " 2000 Professional\n"
- "InnoDB: Windows PAE size is max 4 GB."
- " In 2000 and .NET\n"
- "InnoDB: Advanced Servers and 2000 Datacenter Server"
- " it is 32 GB,\n"
- "InnoDB: and in .NET Datacenter Server it is 64 GB.\n"
- "InnoDB: A Microsoft web page said that"
- " the processor must be an Intel\n"
- "InnoDB: processor.\n",
- (ulint)NumberOfPages,
- (ulint)NumberOfPagesInitial);
-
- return(FALSE);
- }
-
- fprintf(stderr,
- "InnoDB: Using Address Windowing Extensions (AWE);"
- " allocated %lu MB\n",
- n_megabytes);
-
- return(TRUE);
-#else
- UT_NOT_USED(n_megabytes);
- UT_NOT_USED(page_info);
-
- return(FALSE);
-#endif
-}
-
-/********************************************************************
-Allocates a window in the virtual address space where we can map then
-pages of physical memory. */
-
-byte*
-os_awe_allocate_virtual_mem_window(
-/*===============================*/
- /* out, own: allocated memory, or NULL if did not
- succeed */
- ulint size) /* in: virtual memory allocation size in bytes, must
- be < 2 GB */
-{
-#ifdef UNIV_SIMULATE_AWE
- ulint i;
-
- os_awe_simulate_window = ut_align(ut_malloc(4096 + size), 4096);
- os_awe_simulate_window_size = size;
-
- os_awe_simulate_map = ut_malloc(sizeof(byte*) * (size / 4096));
-
- for (i = 0; i < (size / 4096); i++) {
- *(os_awe_simulate_map + i) = NULL;
- }
-
- return(os_awe_simulate_window);
-
-#elif defined(__WIN2000__)
- byte* ptr;
-
- if (size > (ulint)0x7FFFFFFFUL) {
- fprintf(stderr,
- "InnoDB: AWE: Cannot allocate %lu bytes"
- " of virtual memory\n", size);
-
- return(NULL);
- }
-
- ptr = VirtualAlloc(NULL, (SIZE_T)size, MEM_RESERVE | MEM_PHYSICAL,
- PAGE_READWRITE);
- if (ptr == NULL) {
- fprintf(stderr,
- "InnoDB: AWE: Cannot allocate %lu bytes"
- " of virtual memory, error %lu\n",
- size, (ulint)GetLastError());
-
- return(NULL);
- }
-
- os_awe_window = ptr;
- os_awe_window_size = size;
-
- ut_total_allocated_memory += size;
-
- return(ptr);
-#else
- UT_NOT_USED(size);
-
- return(NULL);
-#endif
-}
-
-/********************************************************************
-With this function you can map parts of physical memory allocated with
-the ..._allocate_physical_mem to the virtual address space allocated with
-the previous function. Intel implements this so that the process page
-tables are updated accordingly. A test on a 1.5 GHz AMD processor and XP
-showed that this takes < 1 microsecond, much better than the estimated 80 us
-for copying a 16 kB page memory to memory. But, the operation will at least
-partially invalidate the translation lookaside buffer (TLB) of all
-processors. Under a real-world load the performance hit may be bigger. */
-
-ibool
-os_awe_map_physical_mem_to_window(
-/*==============================*/
- /* out: TRUE if success; the function
- calls exit(1) in case of an error */
- byte* ptr, /* in: a page-aligned pointer to
- somewhere in the virtual address
- space window; we map the physical mem
- pages here */
- ulint n_mem_pages, /* in: number of 4 kB mem pages to
- map */
- os_awe_t* page_info) /* in: array of page infos for those
- pages; each page has one slot in the
- array */
-{
-#ifdef UNIV_SIMULATE_AWE
- ulint i;
- byte** map;
- byte* page;
- byte* phys_page;
-
- ut_a(ptr >= os_awe_simulate_window);
- ut_a(ptr < os_awe_simulate_window + os_awe_simulate_window_size);
- ut_a(page_info >= os_awe_simulate_page_info);
- ut_a(page_info < os_awe_simulate_page_info
- + (os_awe_simulate_mem_size / 4096));
-
- /* First look if some other 'physical pages' are mapped at ptr,
- and copy them back to where they were if yes */
-
- map = os_awe_simulate_map
- + ((ulint)(ptr - os_awe_simulate_window)) / 4096;
- page = ptr;
-
- for (i = 0; i < n_mem_pages; i++) {
- if (*map != NULL) {
- ut_memcpy(*map, page, 4096);
- }
- map++;
- page += 4096;
- }
-
- /* Then copy to ptr the 'physical pages' determined by page_info; we
- assume page_info is a segment of the array we created at the start */
-
- phys_page = os_awe_simulate_mem
- + (ulint)(page_info - os_awe_simulate_page_info)
- * 4096;
-
- ut_memcpy(ptr, phys_page, n_mem_pages * 4096);
-
- /* Update the map */
-
- map = os_awe_simulate_map
- + ((ulint)(ptr - os_awe_simulate_window)) / 4096;
-
- for (i = 0; i < n_mem_pages; i++) {
- *map = phys_page;
-
- map++;
- phys_page += 4096;
- }
-
- return(TRUE);
-
-#elif defined(__WIN2000__)
- BOOL bResult;
- os_awe_t n_pages;
-
- n_pages = (os_awe_t)n_mem_pages;
-
- if (!(ptr >= os_awe_window)) {
- fprintf(stderr,
- "InnoDB: AWE: Error: trying to map to address %lx"
- " but AWE window start %lx\n",
- (ulint)ptr, (ulint)os_awe_window);
- ut_a(0);
- }
-
- if (!(ptr <= os_awe_window + os_awe_window_size - UNIV_PAGE_SIZE)) {
- fprintf(stderr,
- "InnoDB: AWE: Error: trying to map to address %lx"
- " but AWE window end %lx\n",
- (ulint)ptr, (ulint)os_awe_window + os_awe_window_size);
- ut_a(0);
- }
-
- if (!(page_info >= os_awe_page_info)) {
- fprintf(stderr,
- "InnoDB: AWE: Error: trying to map page info"
- " at %lx but array start %lx\n",
- (ulint)page_info, (ulint)os_awe_page_info);
- ut_a(0);
- }
-
- if (!(page_info <= os_awe_page_info + (os_awe_n_pages - 4))) {
- fprintf(stderr,
- "InnoDB: AWE: Error: trying to map page info"
- " at %lx but array end %lx\n",
- (ulint)page_info,
- (ulint)(os_awe_page_info + os_awe_n_pages));
- ut_a(0);
- }
-
- bResult = MapUserPhysicalPages((PVOID)ptr, n_pages, page_info);
-
- if (bResult != TRUE) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: AWE: Mapping of %lu physical pages"
- " to address %lx failed,\n"
- "InnoDB: error %lu.\n"
- "InnoDB: Cannot continue operation.\n",
- n_mem_pages, (ulint)ptr, (ulint)GetLastError());
- exit(1);
- }
-
- return(TRUE);
-#else
- UT_NOT_USED(ptr);
- UT_NOT_USED(n_mem_pages);
- UT_NOT_USED(page_info);
-
- return(FALSE);
-#endif
-}
-
-/********************************************************************
-Converts the current process id to a number. It is not guaranteed that the
-number is unique. In Linux returns the 'process number' of the current
-thread. That number is the same as one sees in 'top', for example. In Linux
-the thread id is not the same as one sees in 'top'. */
-
-ulint
-os_proc_get_number(void)
-/*====================*/
-{
-#ifdef __WIN__
- return((ulint)GetCurrentProcessId());
-#else
- return((ulint)getpid());
-#endif
-}
-
-/********************************************************************
-Allocates non-cacheable memory. */
-
-void*
-os_mem_alloc_nocache(
-/*=================*/
- /* out: allocated memory */
- ulint n) /* in: number of bytes */
-{
-#ifdef __WIN__
- void* ptr;
-
- ptr = VirtualAlloc(NULL, n, MEM_COMMIT,
- PAGE_READWRITE | PAGE_NOCACHE);
- ut_a(ptr);
-
- return(ptr);
-#else
- return(ut_malloc(n));
-#endif
-}
-
-/********************************************************************
-Allocates large pages memory. */
-
-void*
-os_mem_alloc_large(
-/*===============*/
- /* out: allocated memory */
- ulint n, /* in: number of bytes */
- ibool set_to_zero, /* in: TRUE if allocated memory
- should be set to zero if
- UNIV_SET_MEM_TO_ZERO is defined */
- ibool assert_on_error)/* in: if TRUE, we crash mysqld if
- the memory cannot be allocated */
-{
-#ifdef HAVE_LARGE_PAGES
- ulint size;
- int shmid;
- void *ptr = NULL;
- struct shmid_ds buf;
-
- if (!os_use_large_pages || !os_large_page_size) {
- goto skip;
- }
-
-#ifdef UNIV_LINUX
- /* Align block size to os_large_page_size */
- size = ((n - 1) & ~(os_large_page_size - 1)) + os_large_page_size;
-
- shmid = shmget(IPC_PRIVATE, (size_t)size, SHM_HUGETLB | SHM_R | SHM_W);
- if (shmid < 0) {
- fprintf(stderr, "InnoDB: HugeTLB: Warning: Failed to allocate"
- " %lu bytes. errno %d\n", n, errno);
- } else {
- ptr = shmat(shmid, NULL, 0);
- if (ptr == (void *)-1) {
- fprintf(stderr, "InnoDB: HugeTLB: Warning: Failed to"
- " attach shared memory segment, errno %d\n",
- errno);
- }
-
- /* Remove the shared memory segment so that it will be
- automatically freed after memory is detached or
- process exits */
- shmctl(shmid, IPC_RMID, &buf);
- }
-#endif
-
- if (ptr) {
- if (set_to_zero) {
-#ifdef UNIV_SET_MEM_TO_ZERO
- memset(ptr, '\0', size);
-#endif
- }
-
- return(ptr);
- }
-
- fprintf(stderr, "InnoDB HugeTLB: Warning: Using conventional"
- " memory pool\n");
-skip:
-#endif /* HAVE_LARGE_PAGES */
-
- return(ut_malloc_low(n, set_to_zero, assert_on_error));
-}
-
-/********************************************************************
-Frees large pages memory. */
-
-void
-os_mem_free_large(
-/*==============*/
- void *ptr) /* in: number of bytes */
-{
-#ifdef HAVE_LARGE_PAGES
- if (os_use_large_pages && os_large_page_size
-#ifdef UNIV_LINUX
- && !shmdt(ptr)
-#endif
- ) {
- return;
- }
-#endif
-
- ut_free(ptr);
-}
-
-/********************************************************************
-Sets the priority boost for threads released from waiting within the current
-process. */
-
-void
-os_process_set_priority_boost(
-/*==========================*/
- ibool do_boost) /* in: TRUE if priority boost should be done,
- FALSE if not */
-{
-#ifdef __WIN__
- ibool no_boost;
-
- if (do_boost) {
- no_boost = FALSE;
- } else {
- no_boost = TRUE;
- }
-
-#if TRUE != 1
-# error "TRUE != 1"
-#endif
-
- /* Does not do anything currently!
- SetProcessPriorityBoost(GetCurrentProcess(), no_boost);
- */
- fputs("Warning: process priority boost setting"
- " currently not functional!\n",
- stderr);
-#else
- UT_NOT_USED(do_boost);
-#endif
-}
diff --git a/storage/innobase/os/os0sync.c b/storage/innobase/os/os0sync.c
deleted file mode 100644
index 18fd38f3f9b..00000000000
--- a/storage/innobase/os/os0sync.c
+++ /dev/null
@@ -1,753 +0,0 @@
-/******************************************************
-The interface to the operating system
-synchronization primitives.
-
-(c) 1995 Innobase Oy
-
-Created 9/6/1995 Heikki Tuuri
-*******************************************************/
-
-#include "os0sync.h"
-#ifdef UNIV_NONINL
-#include "os0sync.ic"
-#endif
-
-#ifdef __WIN__
-#include <windows.h>
-#endif
-
-#include "ut0mem.h"
-#include "srv0start.h"
-
-/* Type definition for an operating system mutex struct */
-struct os_mutex_struct{
- os_event_t event; /* Used by sync0arr.c for queing threads */
- void* handle; /* OS handle to mutex */
- ulint count; /* we use this counter to check
- that the same thread does not
- recursively lock the mutex: we
- do not assume that the OS mutex
- supports recursive locking, though
- NT seems to do that */
- UT_LIST_NODE_T(os_mutex_str_t) os_mutex_list;
- /* list of all 'slow' OS mutexes created */
-};
-
-/* Mutex protecting counts and the lists of OS mutexes and events */
-os_mutex_t os_sync_mutex;
-ibool os_sync_mutex_inited = FALSE;
-ibool os_sync_free_called = FALSE;
-
-/* This is incremented by 1 in os_thread_create and decremented by 1 in
-os_thread_exit */
-ulint os_thread_count = 0;
-
-/* The list of all events created */
-UT_LIST_BASE_NODE_T(os_event_struct_t) os_event_list;
-
-/* The list of all OS 'slow' mutexes */
-UT_LIST_BASE_NODE_T(os_mutex_str_t) os_mutex_list;
-
-ulint os_event_count = 0;
-ulint os_mutex_count = 0;
-ulint os_fast_mutex_count = 0;
-
-/* Because a mutex is embedded inside an event and there is an
-event embedded inside a mutex, on free, this generates a recursive call.
-This version of the free event function doesn't acquire the global lock */
-static void os_event_free_internal(os_event_t event);
-
-/*************************************************************
-Initializes global event and OS 'slow' mutex lists. */
-
-void
-os_sync_init(void)
-/*==============*/
-{
- UT_LIST_INIT(os_event_list);
- UT_LIST_INIT(os_mutex_list);
-
- os_sync_mutex = os_mutex_create(NULL);
-
- os_sync_mutex_inited = TRUE;
-}
-
-/*************************************************************
-Frees created events and OS 'slow' mutexes. */
-
-void
-os_sync_free(void)
-/*==============*/
-{
- os_event_t event;
- os_mutex_t mutex;
-
- os_sync_free_called = TRUE;
- event = UT_LIST_GET_FIRST(os_event_list);
-
- while (event) {
-
- os_event_free(event);
-
- event = UT_LIST_GET_FIRST(os_event_list);
- }
-
- mutex = UT_LIST_GET_FIRST(os_mutex_list);
-
- while (mutex) {
- if (mutex == os_sync_mutex) {
- /* Set the flag to FALSE so that we do not try to
- reserve os_sync_mutex any more in remaining freeing
- operations in shutdown */
- os_sync_mutex_inited = FALSE;
- }
-
- os_mutex_free(mutex);
-
- mutex = UT_LIST_GET_FIRST(os_mutex_list);
- }
- os_sync_free_called = FALSE;
-}
-
-/*************************************************************
-Creates an event semaphore, i.e., a semaphore which may just have two
-states: signaled and nonsignaled. The created event is manual reset: it
-must be reset explicitly by calling sync_os_reset_event. */
-
-os_event_t
-os_event_create(
-/*============*/
- /* out: the event handle */
- const char* name) /* in: the name of the event, if NULL
- the event is created without a name */
-{
-#ifdef __WIN__
- os_event_t event;
-
- event = ut_malloc(sizeof(struct os_event_struct));
-
- event->handle = CreateEvent(NULL, /* No security attributes */
- TRUE, /* Manual reset */
- FALSE, /* Initial state nonsignaled */
- (LPCTSTR) name);
- if (!event->handle) {
- fprintf(stderr,
- "InnoDB: Could not create a Windows event semaphore;"
- " Windows error %lu\n",
- (ulong) GetLastError());
- }
-#else /* Unix */
- os_event_t event;
-
- UT_NOT_USED(name);
-
- event = ut_malloc(sizeof(struct os_event_struct));
-
- os_fast_mutex_init(&(event->os_mutex));
-
-#if defined(UNIV_HOTBACKUP) && defined(UNIV_HPUX10)
- ut_a(0 == pthread_cond_init(&(event->cond_var),
- pthread_condattr_default));
-#else
- ut_a(0 == pthread_cond_init(&(event->cond_var), NULL));
-#endif
- event->is_set = FALSE;
-
- /* We return this value in os_event_reset(), which can then be
- be used to pass to the os_event_wait_low(). The value of zero
- is reserved in os_event_wait_low() for the case when the
- caller does not want to pass any signal_count value. To
- distinguish between the two cases we initialize signal_count
- to 1 here. */
- event->signal_count = 1;
-#endif /* __WIN__ */
-
- /* The os_sync_mutex can be NULL because during startup an event
- can be created [ because it's embedded in the mutex/rwlock ] before
- this module has been initialized */
- if (os_sync_mutex != NULL) {
- os_mutex_enter(os_sync_mutex);
- }
-
- /* Put to the list of events */
- UT_LIST_ADD_FIRST(os_event_list, os_event_list, event);
-
- os_event_count++;
-
- if (os_sync_mutex != NULL) {
- os_mutex_exit(os_sync_mutex);
- }
-
- return(event);
-}
-
-#ifdef __WIN__
-/*************************************************************
-Creates an auto-reset event semaphore, i.e., an event which is automatically
-reset when a single thread is released. Works only in Windows. */
-
-os_event_t
-os_event_create_auto(
-/*=================*/
- /* out: the event handle */
- const char* name) /* in: the name of the event, if NULL
- the event is created without a name */
-{
- os_event_t event;
-
- event = ut_malloc(sizeof(struct os_event_struct));
-
- event->handle = CreateEvent(NULL, /* No security attributes */
- FALSE, /* Auto-reset */
- FALSE, /* Initial state nonsignaled */
- (LPCTSTR) name);
-
- if (!event->handle) {
- fprintf(stderr,
- "InnoDB: Could not create a Windows auto"
- " event semaphore; Windows error %lu\n",
- (ulong) GetLastError());
- }
-
- /* Put to the list of events */
- os_mutex_enter(os_sync_mutex);
-
- UT_LIST_ADD_FIRST(os_event_list, os_event_list, event);
-
- os_event_count++;
-
- os_mutex_exit(os_sync_mutex);
-
- return(event);
-}
-#endif
-
-/**************************************************************
-Sets an event semaphore to the signaled state: lets waiting threads
-proceed. */
-
-void
-os_event_set(
-/*=========*/
- os_event_t event) /* in: event to set */
-{
-#ifdef __WIN__
- ut_a(event);
- ut_a(SetEvent(event->handle));
-#else
- ut_a(event);
-
- os_fast_mutex_lock(&(event->os_mutex));
-
- if (event->is_set) {
- /* Do nothing */
- } else {
- event->is_set = TRUE;
- event->signal_count += 1;
- ut_a(0 == pthread_cond_broadcast(&(event->cond_var)));
- }
-
- os_fast_mutex_unlock(&(event->os_mutex));
-#endif
-}
-
-/**************************************************************
-Resets an event semaphore to the nonsignaled state. Waiting threads will
-stop to wait for the event.
-The return value should be passed to os_even_wait_low() if it is desired
-that this thread should not wait in case of an intervening call to
-os_event_set() between this os_event_reset() and the
-os_event_wait_low() call. See comments for os_event_wait_low(). */
-
-ib_longlong
-os_event_reset(
-/*===========*/
- /* out: current signal_count. */
- os_event_t event) /* in: event to reset */
-{
- ib_longlong ret = 0;
-
-#ifdef __WIN__
- ut_a(event);
-
- ut_a(ResetEvent(event->handle));
-#else
- ut_a(event);
-
- os_fast_mutex_lock(&(event->os_mutex));
-
- if (!event->is_set) {
- /* Do nothing */
- } else {
- event->is_set = FALSE;
- }
- ret = event->signal_count;
-
- os_fast_mutex_unlock(&(event->os_mutex));
-#endif
- return(ret);
-}
-
-/**************************************************************
-Frees an event object, without acquiring the global lock. */
-static
-void
-os_event_free_internal(
-/*===================*/
- os_event_t event) /* in: event to free */
-{
-#ifdef __WIN__
- ut_a(event);
-
- ut_a(CloseHandle(event->handle));
-#else
- ut_a(event);
-
- /* This is to avoid freeing the mutex twice */
- os_fast_mutex_free(&(event->os_mutex));
-
- ut_a(0 == pthread_cond_destroy(&(event->cond_var)));
-#endif
- /* Remove from the list of events */
-
- UT_LIST_REMOVE(os_event_list, os_event_list, event);
-
- os_event_count--;
-
- ut_free(event);
-}
-
-/**************************************************************
-Frees an event object. */
-
-void
-os_event_free(
-/*==========*/
- os_event_t event) /* in: event to free */
-
-{
-#ifdef __WIN__
- ut_a(event);
-
- ut_a(CloseHandle(event->handle));
-#else
- ut_a(event);
-
- os_fast_mutex_free(&(event->os_mutex));
- ut_a(0 == pthread_cond_destroy(&(event->cond_var)));
-#endif
- /* Remove from the list of events */
-
- os_mutex_enter(os_sync_mutex);
-
- UT_LIST_REMOVE(os_event_list, os_event_list, event);
-
- os_event_count--;
-
- os_mutex_exit(os_sync_mutex);
-
- ut_free(event);
-}
-
-/**************************************************************
-Waits for an event object until it is in the signaled state. If
-srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS this also exits the
-waiting thread when the event becomes signaled (or immediately if the
-event is already in the signaled state).
-
-Typically, if the event has been signalled after the os_event_reset()
-we'll return immediately because event->is_set == TRUE.
-There are, however, situations (e.g.: sync_array code) where we may
-lose this information. For example:
-
-thread A calls os_event_reset()
-thread B calls os_event_set() [event->is_set == TRUE]
-thread C calls os_event_reset() [event->is_set == FALSE]
-thread A calls os_event_wait() [infinite wait!]
-thread C calls os_event_wait() [infinite wait!]
-
-Where such a scenario is possible, to avoid infinite wait, the
-value returned by os_event_reset() should be passed in as
-reset_sig_count. */
-
-void
-os_event_wait_low(
-/*==============*/
- os_event_t event, /* in: event to wait */
- ib_longlong reset_sig_count)/* in: zero or the value
- returned by previous call of
- os_event_reset(). */
-{
-#ifdef __WIN__
- DWORD err;
-
- ut_a(event);
-
- UT_NOT_USED(reset_sig_count);
-
- /* Specify an infinite time limit for waiting */
- err = WaitForSingleObject(event->handle, INFINITE);
-
- ut_a(err == WAIT_OBJECT_0);
-
- if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
- os_thread_exit(NULL);
- }
-#else
- ib_longlong old_signal_count;
-
- os_fast_mutex_lock(&(event->os_mutex));
-
- if (reset_sig_count) {
- old_signal_count = reset_sig_count;
- } else {
- old_signal_count = event->signal_count;
- }
-
- for (;;) {
- if (event->is_set == TRUE
- || event->signal_count != old_signal_count) {
-
- os_fast_mutex_unlock(&(event->os_mutex));
-
- if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
-
- os_thread_exit(NULL);
- }
- /* Ok, we may return */
-
- return;
- }
-
- pthread_cond_wait(&(event->cond_var), &(event->os_mutex));
-
- /* Solaris manual said that spurious wakeups may occur: we
- have to check if the event really has been signaled after
- we came here to wait */
- }
-#endif
-}
-
-/**************************************************************
-Waits for an event object until it is in the signaled state or
-a timeout is exceeded. In Unix the timeout is always infinite. */
-
-ulint
-os_event_wait_time(
-/*===============*/
- /* out: 0 if success, OS_SYNC_TIME_EXCEEDED if
- timeout was exceeded */
- os_event_t event, /* in: event to wait */
- ulint time) /* in: timeout in microseconds, or
- OS_SYNC_INFINITE_TIME */
-{
-#ifdef __WIN__
- DWORD err;
-
- ut_a(event);
-
- if (time != OS_SYNC_INFINITE_TIME) {
- err = WaitForSingleObject(event->handle, (DWORD) time / 1000);
- } else {
- err = WaitForSingleObject(event->handle, INFINITE);
- }
-
- if (err == WAIT_OBJECT_0) {
-
- return(0);
- } else if (err == WAIT_TIMEOUT) {
-
- return(OS_SYNC_TIME_EXCEEDED);
- } else {
- ut_error;
- return(1000000); /* dummy value to eliminate compiler warn. */
- }
-#else
- UT_NOT_USED(time);
-
- /* In Posix this is just an ordinary, infinite wait */
-
- os_event_wait(event);
-
- return(0);
-#endif
-}
-
-#ifdef __WIN__
-/**************************************************************
-Waits for any event in an OS native event array. Returns if even a single
-one is signaled or becomes signaled. */
-
-ulint
-os_event_wait_multiple(
-/*===================*/
- /* out: index of the event
- which was signaled */
- ulint n, /* in: number of events in the
- array */
- os_native_event_t* native_event_array)
- /* in: pointer to an array of event
- handles */
-{
- DWORD index;
-
- ut_a(native_event_array);
- ut_a(n > 0);
-
- index = WaitForMultipleObjects((DWORD) n, native_event_array,
- FALSE, /* Wait for any 1 event */
- INFINITE); /* Infinite wait time
- limit */
- ut_a(index >= WAIT_OBJECT_0); /* NOTE: Pointless comparision */
- ut_a(index < WAIT_OBJECT_0 + n);
-
- if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
- os_thread_exit(NULL);
- }
-
- return(index - WAIT_OBJECT_0);
-}
-#endif
-
-/*************************************************************
-Creates an operating system mutex semaphore. Because these are slow, the
-mutex semaphore of InnoDB itself (mutex_t) should be used where possible. */
-
-os_mutex_t
-os_mutex_create(
-/*============*/
- /* out: the mutex handle */
- const char* name) /* in: the name of the mutex, if NULL
- the mutex is created without a name */
-{
-#ifdef __WIN__
- HANDLE mutex;
- os_mutex_t mutex_str;
-
- mutex = CreateMutex(NULL, /* No security attributes */
- FALSE, /* Initial state: no owner */
- (LPCTSTR) name);
- ut_a(mutex);
-#else
- os_fast_mutex_t* mutex;
- os_mutex_t mutex_str;
-
- UT_NOT_USED(name);
-
- mutex = ut_malloc(sizeof(os_fast_mutex_t));
-
- os_fast_mutex_init(mutex);
-#endif
- mutex_str = ut_malloc(sizeof(os_mutex_str_t));
-
- mutex_str->handle = mutex;
- mutex_str->count = 0;
- mutex_str->event = os_event_create(NULL);
-
- if (os_sync_mutex_inited) {
- /* When creating os_sync_mutex itself we cannot reserve it */
- os_mutex_enter(os_sync_mutex);
- }
-
- UT_LIST_ADD_FIRST(os_mutex_list, os_mutex_list, mutex_str);
-
- os_mutex_count++;
-
- if (os_sync_mutex_inited) {
- os_mutex_exit(os_sync_mutex);
- }
-
- return(mutex_str);
-}
-
-/**************************************************************
-Acquires ownership of a mutex semaphore. */
-
-void
-os_mutex_enter(
-/*===========*/
- os_mutex_t mutex) /* in: mutex to acquire */
-{
-#ifdef __WIN__
- DWORD err;
-
- ut_a(mutex);
-
- /* Specify infinite time limit for waiting */
- err = WaitForSingleObject(mutex->handle, INFINITE);
-
- ut_a(err == WAIT_OBJECT_0);
-
- (mutex->count)++;
- ut_a(mutex->count == 1);
-#else
- os_fast_mutex_lock(mutex->handle);
-
- (mutex->count)++;
-
- ut_a(mutex->count == 1);
-#endif
-}
-
-/**************************************************************
-Releases ownership of a mutex. */
-
-void
-os_mutex_exit(
-/*==========*/
- os_mutex_t mutex) /* in: mutex to release */
-{
- ut_a(mutex);
-
- ut_a(mutex->count == 1);
-
- (mutex->count)--;
-#ifdef __WIN__
- ut_a(ReleaseMutex(mutex->handle));
-#else
- os_fast_mutex_unlock(mutex->handle);
-#endif
-}
-
-/**************************************************************
-Frees a mutex object. */
-
-void
-os_mutex_free(
-/*==========*/
- os_mutex_t mutex) /* in: mutex to free */
-{
- ut_a(mutex);
-
- if (!os_sync_free_called) {
- os_event_free_internal(mutex->event);
- }
-
- if (os_sync_mutex_inited) {
- os_mutex_enter(os_sync_mutex);
- }
-
- UT_LIST_REMOVE(os_mutex_list, os_mutex_list, mutex);
-
- os_mutex_count--;
-
- if (os_sync_mutex_inited) {
- os_mutex_exit(os_sync_mutex);
- }
-
-#ifdef __WIN__
- ut_a(CloseHandle(mutex->handle));
-
- ut_free(mutex);
-#else
- os_fast_mutex_free(mutex->handle);
- ut_free(mutex->handle);
- ut_free(mutex);
-#endif
-}
-
-/*************************************************************
-Initializes an operating system fast mutex semaphore. */
-
-void
-os_fast_mutex_init(
-/*===============*/
- os_fast_mutex_t* fast_mutex) /* in: fast mutex */
-{
-#ifdef __WIN__
- ut_a(fast_mutex);
-
- InitializeCriticalSection((LPCRITICAL_SECTION) fast_mutex);
-#else
-#if defined(UNIV_HOTBACKUP) && defined(UNIV_HPUX10)
- ut_a(0 == pthread_mutex_init(fast_mutex, pthread_mutexattr_default));
-#else
- ut_a(0 == pthread_mutex_init(fast_mutex, MY_MUTEX_INIT_FAST));
-#endif
-#endif
- if (os_sync_mutex_inited) {
- /* When creating os_sync_mutex itself (in Unix) we cannot
- reserve it */
-
- os_mutex_enter(os_sync_mutex);
- }
-
- os_fast_mutex_count++;
-
- if (os_sync_mutex_inited) {
- os_mutex_exit(os_sync_mutex);
- }
-}
-
-/**************************************************************
-Acquires ownership of a fast mutex. */
-
-void
-os_fast_mutex_lock(
-/*===============*/
- os_fast_mutex_t* fast_mutex) /* in: mutex to acquire */
-{
-#ifdef __WIN__
- EnterCriticalSection((LPCRITICAL_SECTION) fast_mutex);
-#else
- pthread_mutex_lock(fast_mutex);
-#endif
-}
-
-/**************************************************************
-Releases ownership of a fast mutex. */
-
-void
-os_fast_mutex_unlock(
-/*=================*/
- os_fast_mutex_t* fast_mutex) /* in: mutex to release */
-{
-#ifdef __WIN__
- LeaveCriticalSection(fast_mutex);
-#else
- pthread_mutex_unlock(fast_mutex);
-#endif
-}
-
-/**************************************************************
-Frees a mutex object. */
-
-void
-os_fast_mutex_free(
-/*===============*/
- os_fast_mutex_t* fast_mutex) /* in: mutex to free */
-{
-#ifdef __WIN__
- ut_a(fast_mutex);
-
- DeleteCriticalSection((LPCRITICAL_SECTION) fast_mutex);
-#else
- int ret;
-
- ret = pthread_mutex_destroy(fast_mutex);
-
- if (ret != 0) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: error: return value %lu when calling\n"
- "InnoDB: pthread_mutex_destroy().\n", (ulint)ret);
- fprintf(stderr,
- "InnoDB: Byte contents of the pthread mutex at %p:\n",
- (void*) fast_mutex);
- ut_print_buf(stderr, fast_mutex, sizeof(os_fast_mutex_t));
- fprintf(stderr, "\n");
- }
-#endif
- if (os_sync_mutex_inited) {
- /* When freeing the last mutexes, we have
- already freed os_sync_mutex */
-
- os_mutex_enter(os_sync_mutex);
- }
-
- os_fast_mutex_count--;
-
- if (os_sync_mutex_inited) {
- os_mutex_exit(os_sync_mutex);
- }
-}
diff --git a/storage/innobase/os/os0thread.c b/storage/innobase/os/os0thread.c
deleted file mode 100644
index a0b1e51d359..00000000000
--- a/storage/innobase/os/os0thread.c
+++ /dev/null
@@ -1,358 +0,0 @@
-/******************************************************
-The interface to the operating system thread control primitives
-
-(c) 1995 Innobase Oy
-
-Created 9/8/1995 Heikki Tuuri
-*******************************************************/
-
-#include "os0thread.h"
-#ifdef UNIV_NONINL
-#include "os0thread.ic"
-#endif
-
-#ifdef __WIN__
-#include <windows.h>
-#endif
-
-#include "srv0srv.h"
-#include "os0sync.h"
-
-/*******************************************************************
-Compares two thread ids for equality. */
-
-ibool
-os_thread_eq(
-/*=========*/
- /* out: TRUE if equal */
- os_thread_id_t a, /* in: OS thread or thread id */
- os_thread_id_t b) /* in: OS thread or thread id */
-{
-#ifdef __WIN__
- if (a == b) {
- return(TRUE);
- }
-
- return(FALSE);
-#else
- if (pthread_equal(a, b)) {
- return(TRUE);
- }
-
- return(FALSE);
-#endif
-}
-
-/********************************************************************
-Converts an OS thread id to a ulint. It is NOT guaranteed that the ulint is
-unique for the thread though! */
-
-ulint
-os_thread_pf(
-/*=========*/
- os_thread_id_t a)
-{
-#ifdef UNIV_HPUX10
- /* In HP-UX-10.20 a pthread_t is a struct of 3 fields: field1, field2,
- field3. We do not know if field1 determines the thread uniquely. */
-
- return((ulint)(a.field1));
-#else
- return((ulint)a);
-#endif
-}
-
-/*********************************************************************
-Returns the thread identifier of current thread. Currently the thread
-identifier in Unix is the thread handle itself. Note that in HP-UX
-pthread_t is a struct of 3 fields. */
-
-os_thread_id_t
-os_thread_get_curr_id(void)
-/*=======================*/
-{
-#ifdef __WIN__
- return(GetCurrentThreadId());
-#else
- return(pthread_self());
-#endif
-}
-
-/********************************************************************
-Creates a new thread of execution. The execution starts from
-the function given. The start function takes a void* parameter
-and returns an ulint. */
-
-os_thread_t
-os_thread_create(
-/*=============*/
- /* out: handle to the thread */
-#ifndef __WIN__
- os_posix_f_t start_f,
-#else
- ulint (*start_f)(void*), /* in: pointer to function
- from which to start */
-#endif
- void* arg, /* in: argument to start
- function */
- os_thread_id_t* thread_id) /* out: id of the created
- thread, or NULL */
-{
-#ifdef __WIN__
- os_thread_t thread;
- DWORD win_thread_id;
-
- os_mutex_enter(os_sync_mutex);
- os_thread_count++;
- os_mutex_exit(os_sync_mutex);
-
- thread = CreateThread(NULL, /* no security attributes */
- 0, /* default size stack */
- (LPTHREAD_START_ROUTINE)start_f,
- arg,
- 0, /* thread runs immediately */
- &win_thread_id);
-
- if (srv_set_thread_priorities) {
-
- /* Set created thread priority the same as a normal query
- in MYSQL: we try to prevent starvation of threads by
- assigning same priority QUERY_PRIOR to all */
-
- ut_a(SetThreadPriority(thread, srv_query_thread_priority));
- }
-
- if (thread_id) {
- *thread_id = win_thread_id;
- }
-
- return(thread);
-#else
- int ret;
- os_thread_t pthread;
- pthread_attr_t attr;
-
-#if !(defined(UNIV_HOTBACKUP) && defined(UNIV_HPUX10))
- pthread_attr_init(&attr);
-#endif
-
-#ifdef UNIV_AIX
- /* We must make sure a thread stack is at least 32 kB, otherwise
- InnoDB might crash; we do not know if the default stack size on
- AIX is always big enough. An empirical test on AIX-4.3 suggested
- the size was 96 kB, though. */
-
- ret = pthread_attr_setstacksize(&attr,
- (size_t)(PTHREAD_STACK_MIN
- + 32 * 1024));
- if (ret) {
- fprintf(stderr,
- "InnoDB: Error: pthread_attr_setstacksize"
- " returned %d\n", ret);
- exit(1);
- }
-#endif
-#ifdef __NETWARE__
- ret = pthread_attr_setstacksize(&attr,
- (size_t) NW_THD_STACKSIZE);
- if (ret) {
- fprintf(stderr,
- "InnoDB: Error: pthread_attr_setstacksize"
- " returned %d\n", ret);
- exit(1);
- }
-#endif
- os_mutex_enter(os_sync_mutex);
- os_thread_count++;
- os_mutex_exit(os_sync_mutex);
-
-#if defined(UNIV_HOTBACKUP) && defined(UNIV_HPUX10)
- ret = pthread_create(&pthread, pthread_attr_default, start_f, arg);
-#else
- ret = pthread_create(&pthread, &attr, start_f, arg);
-#endif
- if (ret) {
- fprintf(stderr,
- "InnoDB: Error: pthread_create returned %d\n", ret);
- exit(1);
- }
-
-#if !(defined(UNIV_HOTBACKUP) && defined(UNIV_HPUX10))
- pthread_attr_destroy(&attr);
-#endif
- if (srv_set_thread_priorities) {
-
- my_pthread_setprio(pthread, srv_query_thread_priority);
- }
-
- if (thread_id) {
- *thread_id = pthread;
- }
-
- return(pthread);
-#endif
-}
-
-/*********************************************************************
-Exits the current thread. */
-
-void
-os_thread_exit(
-/*===========*/
- void* exit_value) /* in: exit value; in Windows this void*
- is cast as a DWORD */
-{
-#ifdef UNIV_DEBUG_THREAD_CREATION
- fprintf(stderr, "Thread exits, id %lu\n",
- os_thread_pf(os_thread_get_curr_id()));
-#endif
- os_mutex_enter(os_sync_mutex);
- os_thread_count--;
- os_mutex_exit(os_sync_mutex);
-
-#ifdef __WIN__
- ExitThread((DWORD)exit_value);
-#else
- pthread_exit(exit_value);
-#endif
-}
-
-#ifdef HAVE_PTHREAD_JOIN
-int
-os_thread_join(
-/*===========*/
- os_thread_id_t thread_id) /* in: id of the thread to join */
-{
- return(pthread_join(thread_id, NULL));
-}
-#endif
-/*********************************************************************
-Returns handle to the current thread. */
-
-os_thread_t
-os_thread_get_curr(void)
-/*====================*/
-{
-#ifdef __WIN__
- return(GetCurrentThread());
-#else
- return(pthread_self());
-#endif
-}
-
-/*********************************************************************
-Advises the os to give up remainder of the thread's time slice. */
-
-void
-os_thread_yield(void)
-/*=================*/
-{
-#if defined(__WIN__)
- Sleep(0);
-#elif (defined(HAVE_SCHED_YIELD) && defined(HAVE_SCHED_H))
- sched_yield();
-#elif defined(HAVE_PTHREAD_YIELD_ZERO_ARG)
- pthread_yield();
-#elif defined(HAVE_PTHREAD_YIELD_ONE_ARG)
- pthread_yield(0);
-#else
- os_thread_sleep(0);
-#endif
-}
-
-/*********************************************************************
-The thread sleeps at least the time given in microseconds. */
-
-void
-os_thread_sleep(
-/*============*/
- ulint tm) /* in: time in microseconds */
-{
-#ifdef __WIN__
- Sleep((DWORD) tm / 1000);
-#elif defined(__NETWARE__)
- delay(tm / 1000);
-#else
- struct timeval t;
-
- t.tv_sec = tm / 1000000;
- t.tv_usec = tm % 1000000;
-
- select(0, NULL, NULL, NULL, &t);
-#endif
-}
-
-/**********************************************************************
-Sets a thread priority. */
-
-void
-os_thread_set_priority(
-/*===================*/
- os_thread_t handle, /* in: OS handle to the thread */
- ulint pri) /* in: priority */
-{
-#ifdef __WIN__
- int os_pri;
-
- if (pri == OS_THREAD_PRIORITY_BACKGROUND) {
- os_pri = THREAD_PRIORITY_BELOW_NORMAL;
- } else if (pri == OS_THREAD_PRIORITY_NORMAL) {
- os_pri = THREAD_PRIORITY_NORMAL;
- } else if (pri == OS_THREAD_PRIORITY_ABOVE_NORMAL) {
- os_pri = THREAD_PRIORITY_HIGHEST;
- } else {
- ut_error;
- }
-
- ut_a(SetThreadPriority(handle, os_pri));
-#else
- UT_NOT_USED(handle);
- UT_NOT_USED(pri);
-#endif
-}
-
-/**********************************************************************
-Gets a thread priority. */
-
-ulint
-os_thread_get_priority(
-/*===================*/
- /* out: priority */
- os_thread_t handle __attribute__((unused)))
- /* in: OS handle to the thread */
-{
-#ifdef __WIN__
- int os_pri;
- ulint pri;
-
- os_pri = GetThreadPriority(handle);
-
- if (os_pri == THREAD_PRIORITY_BELOW_NORMAL) {
- pri = OS_THREAD_PRIORITY_BACKGROUND;
- } else if (os_pri == THREAD_PRIORITY_NORMAL) {
- pri = OS_THREAD_PRIORITY_NORMAL;
- } else if (os_pri == THREAD_PRIORITY_HIGHEST) {
- pri = OS_THREAD_PRIORITY_ABOVE_NORMAL;
- } else {
- ut_error;
- }
-
- return(pri);
-#else
- return(0);
-#endif
-}
-
-/**********************************************************************
-Gets the last operating system error code for the calling thread. */
-
-ulint
-os_thread_get_last_error(void)
-/*==========================*/
-{
-#ifdef __WIN__
- return(GetLastError());
-#else
- return(0);
-#endif
-}