summaryrefslogtreecommitdiff
path: root/innobase/os
diff options
context:
space:
mode:
Diffstat (limited to 'innobase/os')
-rw-r--r--innobase/os/makefilewin17
-rw-r--r--innobase/os/os0file.c1369
-rw-r--r--innobase/os/os0proc.c462
-rw-r--r--innobase/os/os0sync.c44
-rw-r--r--innobase/os/os0thread.c13
5 files changed, 1716 insertions, 189 deletions
diff --git a/innobase/os/makefilewin b/innobase/os/makefilewin
deleted file mode 100644
index 8bc8d08611b..00000000000
--- a/innobase/os/makefilewin
+++ /dev/null
@@ -1,17 +0,0 @@
-include ..\include\makefile.i
-
-os.lib: os0sync.obj os0thread.obj os0proc.obj os0file.obj
- lib -out:..\libs\os.lib os0sync.obj os0thread.obj os0proc.obj os0file.obj
-
-os0sync.obj: os0sync.c
- $(CCOM) $(CFLW) -c os0sync.c
-
-os0thread.obj: os0thread.c
- $(CCOM) $(CFLW) -c os0thread.c
-
-os0proc.obj: os0proc.c
- $(CCOM) $(CFLW) -c os0proc.c
-
-os0file.obj: os0file.c
- $(CCOM) $(CFLW) -c os0file.c
-
diff --git a/innobase/os/os0file.c b/innobase/os/os0file.c
index cadf1c0385f..49f88c0d62a 100644
--- a/innobase/os/os0file.c
+++ b/innobase/os/os0file.c
@@ -11,6 +11,7 @@ Created 10/21/1995 Heikki Tuuri
#include "os0thread.h"
#include "ut0mem.h"
#include "srv0srv.h"
+#include "srv0start.h"
#include "fil0fil.h"
#include "buf0buf.h"
@@ -32,9 +33,13 @@ ulint os_innodb_umask = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP;
ulint os_innodb_umask = 0;
#endif
+#ifdef UNIV_DO_FLUSH
/* If the following is set to TRUE, we do not call os_file_flush in every
-os_file_write. We can set this TRUE if the doublewrite buffer is used. */
+os_file_write. We can set this TRUE when the doublewrite buffer is used. */
ibool os_do_not_call_flush_at_each_write = FALSE;
+#else
+/* We do not call os_file_flush in every os_file_write. */
+#endif /* UNIV_DO_FLUSH */
/* We use these mutexes to protect lseek + file i/o operation, if the
OS does not provide an atomic pread or pwrite, or similar */
@@ -69,7 +74,7 @@ struct os_aio_slot_struct{
bytes */
ulint offset_high; /* 32 high bits of file offset */
os_file_t file; /* file where to read or write */
- char* name; /* file name or path */
+ const char* name; /* file name or path */
ibool io_already_done;/* used only in simulated aio:
TRUE if the physical i/o already
made and only the slot message
@@ -154,7 +159,6 @@ os_mutex_t os_file_count_mutex;
ulint os_file_n_pending_preads = 0;
ulint os_file_n_pending_pwrites = 0;
-
/***************************************************************************
Gets the operating system version. Currently works only on Windows. */
@@ -198,9 +202,12 @@ overwrite the error number). If the number is not known to this program,
the OS error number + 100 is returned. */
ulint
-os_file_get_last_error(void)
-/*========================*/
- /* out: error number, or OS error number + 100 */
+os_file_get_last_error(
+/*===================*/
+ /* out: error number, or OS error
+ number + 100 */
+ ibool report_all_errors) /* in: TRUE if we want an error message
+ printed of all errors */
{
ulint err;
@@ -208,26 +215,29 @@ os_file_get_last_error(void)
err = (ulint) GetLastError();
- if (err != ERROR_DISK_FULL && err != ERROR_FILE_EXISTS) {
+ if (report_all_errors
+ || (err != ERROR_DISK_FULL && err != ERROR_FILE_EXISTS)) {
+
ut_print_timestamp(stderr);
fprintf(stderr,
- " InnoDB: Operating system error number %lu in a file operation.\n"
- "InnoDB: See http://dev.mysql.com/doc/mysql/en/InnoDB.html\n"
- "InnoDB: for installation help.\n",
- err);
+ " InnoDB: Operating system error number %lu in a file operation.\n", (ulong) err);
if (err == ERROR_PATH_NOT_FOUND) {
- fprintf(stderr,
- "InnoDB: The error means the system cannot find the path specified.\n"
- "InnoDB: In installation you must create directories yourself, InnoDB\n"
- "InnoDB: does not create them.\n");
+ fprintf(stderr,
+ "InnoDB: The error means the system cannot find the path specified.\n");
+
+ if (srv_is_being_started) {
+ fprintf(stderr,
+ "InnoDB: If you are installing InnoDB, remember that you must create\n"
+ "InnoDB: directories yourself, InnoDB does not create them.\n");
+ }
} else if (err == ERROR_ACCESS_DENIED) {
- fprintf(stderr,
+ fprintf(stderr,
"InnoDB: The error means mysqld does not have the access rights to\n"
"InnoDB: the directory. It may also be you have created a subdirectory\n"
"InnoDB: of the same name as a data file.\n");
} else {
- fprintf(stderr,
+ fprintf(stderr,
"InnoDB: Some operating system error numbers are described at\n"
"InnoDB: "
"http://dev.mysql.com/doc/mysql/en/Operating_System_error_codes.html\n");
@@ -248,31 +258,33 @@ os_file_get_last_error(void)
#else
err = (ulint) errno;
- if (err != ENOSPC && err != EEXIST) {
- ut_print_timestamp(stderr);
+ if (report_all_errors
+ || (err != ENOSPC && err != EEXIST)) {
+ ut_print_timestamp(stderr);
fprintf(stderr,
- " InnoDB: Operating system error number %lu in a file operation.\n"
- "InnoDB: See http://dev.mysql.com/doc/mysql/en/InnoDB.html\n"
- "InnoDB: for installation help.\n",
- err);
+ " InnoDB: Operating system error number %lu in a file operation.\n", (ulong) err);
if (err == ENOENT) {
- fprintf(stderr,
- "InnoDB: The error means the system cannot find the path specified.\n"
- "InnoDB: In installation you must create directories yourself, InnoDB\n"
- "InnoDB: does not create them.\n");
+ fprintf(stderr,
+ "InnoDB: The error means the system cannot find the path specified.\n");
+
+ if (srv_is_being_started) {
+ fprintf(stderr,
+ "InnoDB: If you are installing InnoDB, remember that you must create\n"
+ "InnoDB: directories yourself, InnoDB does not create them.\n");
+ }
} else if (err == EACCES) {
- fprintf(stderr,
+ fprintf(stderr,
"InnoDB: The error means mysqld does not have the access rights to\n"
"InnoDB: the directory.\n");
} else {
- if (strerror((int)err) != NULL) {
+ if (strerror((int)err) != NULL) {
fprintf(stderr,
"InnoDB: Error number %lu means '%s'.\n", err, strerror((int)err));
- }
+ }
- fprintf(stderr,
+ fprintf(stderr,
"InnoDB: Some operating system error numbers are described at\n"
"InnoDB: "
"http://dev.mysql.com/doc/mysql/en/Operating_System_error_codes.html\n");
@@ -310,7 +322,7 @@ os_file_handle_error(
{
ulint err;
- err = os_file_get_last_error();
+ err = os_file_get_last_error(FALSE);
if (err == OS_FILE_DISK_FULL) {
/* We only print a warning about disk full once */
@@ -337,6 +349,7 @@ os_file_handle_error(
return(FALSE);
} else if (err == OS_FILE_AIO_RESOURCES_RESERVED) {
+
return(TRUE);
} else if (err == OS_FILE_ALREADY_EXISTS) {
@@ -359,6 +372,106 @@ os_file_handle_error(
return(FALSE);
}
+#undef USE_FILE_LOCK
+#define USE_FILE_LOCK
+#if defined(UNIV_HOTBACKUP) || defined(__WIN__) || defined(__FreeBSD__) || defined(__NETWARE__)
+/* InnoDB Hot Backup does not lock the data files.
+ * On Windows, mandatory locking is used.
+ * On FreeBSD with LinuxThreads, advisory locking does not work properly.
+ */
+# undef USE_FILE_LOCK
+#endif
+#ifdef USE_FILE_LOCK
+/********************************************************************
+Obtain an exclusive lock on a file. */
+static
+int
+os_file_lock(
+/*=========*/
+ /* out: 0 on success */
+ int fd, /* in: file descriptor */
+ const char* name) /* in: file name */
+{
+ struct flock lk;
+ lk.l_type = F_WRLCK;
+ lk.l_whence = SEEK_SET;
+ lk.l_start = lk.l_len = 0;
+ if (fcntl(fd, F_SETLK, &lk) == -1) {
+ fprintf(stderr,
+ "InnoDB: Unable to lock %s, error: %d\n", name, errno);
+
+ if (errno == EAGAIN || errno == EACCES) {
+ fprintf(stderr,
+"InnoDB: Check that you do not already have another mysqld process\n"
+"InnoDB: using the same InnoDB data or log files.\n");
+ }
+
+ return(-1);
+ }
+
+ return(0);
+}
+#endif /* USE_FILE_LOCK */
+
+/********************************************************************
+Does error handling when a file operation fails. */
+static
+ibool
+os_file_handle_error_no_exit(
+/*=========================*/
+ /* out: TRUE if we should retry the
+ operation */
+ const char* name, /* in: name of a file or NULL */
+ const char* operation)/* in: operation */
+{
+ ulint err;
+
+ err = os_file_get_last_error(FALSE);
+
+ if (err == OS_FILE_DISK_FULL) {
+ /* We only print a warning about disk full once */
+
+ if (os_has_said_disk_full) {
+
+ return(FALSE);
+ }
+
+ if (name) {
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+ " InnoDB: Encountered a problem with file %s\n", name);
+ }
+
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+ " InnoDB: Disk is full. Try to clean the disk to free space.\n");
+
+ os_has_said_disk_full = TRUE;
+
+ fflush(stderr);
+
+ return(FALSE);
+
+ } else if (err == OS_FILE_AIO_RESOURCES_RESERVED) {
+
+ return(TRUE);
+
+ } else if (err == OS_FILE_ALREADY_EXISTS) {
+
+ return(FALSE);
+ } else {
+ if (name) {
+ fprintf(stderr, "InnoDB: File name %s\n", name);
+ }
+
+ fprintf(stderr, "InnoDB: File operation call: '%s'.\n",
+ operation);
+ return (FALSE);
+ }
+
+ return(FALSE); /* not reached */
+}
+
/********************************************************************
Creates the seek mutexes used in positioned reads and writes. */
@@ -450,21 +563,313 @@ os_file_create_tmpfile(void)
return(file);
}
+/***************************************************************************
+The os_file_opendir() function opens a directory stream corresponding to the
+directory named by the dirname argument. The directory stream is positioned
+at the first entry. In both Unix and Windows we automatically skip the '.'
+and '..' items at the start of the directory listing. */
+
+os_file_dir_t
+os_file_opendir(
+/*============*/
+ /* out: directory stream, NULL if
+ error */
+ const char* dirname, /* in: directory name; it must not
+ contain a trailing '\' or '/' */
+ ibool error_is_fatal) /* in: TRUE if we should treat an
+ error as a fatal error; if we try to
+ open symlinks then we do not wish a
+ fatal error if it happens not to be
+ a directory */
+{
+ os_file_dir_t dir;
+#ifdef __WIN__
+ LPWIN32_FIND_DATA lpFindFileData;
+ char path[OS_FILE_MAX_PATH + 3];
+
+ ut_a(strlen(dirname) < OS_FILE_MAX_PATH);
+
+ strcpy(path, dirname);
+ strcpy(path + strlen(path), "\\*");
+
+ /* Note that in Windows opening the 'directory stream' also retrieves
+ the first entry in the directory. Since it is '.', that is no problem,
+ as we will skip over the '.' and '..' entries anyway. */
+
+ lpFindFileData = ut_malloc(sizeof(WIN32_FIND_DATA));
+
+ dir = FindFirstFile(path, lpFindFileData);
+
+ ut_free(lpFindFileData);
+
+ if (dir == INVALID_HANDLE_VALUE) {
+
+ if (error_is_fatal) {
+ os_file_handle_error(dirname, "opendir");
+ }
+
+ return(NULL);
+ }
+
+ return(dir);
+#else
+ dir = opendir(dirname);
+
+ if (dir == NULL && error_is_fatal) {
+ os_file_handle_error(dirname, "opendir");
+ }
+
+ return(dir);
+#endif
+}
+
+/***************************************************************************
+Closes a directory stream. */
+
+int
+os_file_closedir(
+/*=============*/
+ /* out: 0 if success, -1 if failure */
+ os_file_dir_t dir) /* in: directory stream */
+{
+#ifdef __WIN__
+ BOOL ret;
+
+ ret = FindClose(dir);
+
+ if (!ret) {
+ os_file_handle_error_no_exit(NULL, "closedir");
+
+ return(-1);
+ }
+
+ return(0);
+#else
+ int ret;
+
+ ret = closedir(dir);
+
+ if (ret) {
+ os_file_handle_error_no_exit(NULL, "closedir");
+ }
+
+ return(ret);
+#endif
+}
+
+/***************************************************************************
+This function returns information of the next file in the directory. We jump
+over the '.' and '..' entries in the directory. */
+
+int
+os_file_readdir_next_file(
+/*======================*/
+ /* out: 0 if ok, -1 if error, 1 if at the end
+ of the directory */
+ const char* dirname,/* in: directory name or path */
+ os_file_dir_t dir, /* in: directory stream */
+ os_file_stat_t* info) /* in/out: buffer where the info is returned */
+{
+#ifdef __WIN__
+ LPWIN32_FIND_DATA lpFindFileData;
+ BOOL ret;
+
+ lpFindFileData = ut_malloc(sizeof(WIN32_FIND_DATA));
+next_file:
+ ret = FindNextFile(dir, lpFindFileData);
+
+ if (ret) {
+ ut_a(strlen(lpFindFileData->cFileName) < OS_FILE_MAX_PATH);
+
+ if (strcmp(lpFindFileData->cFileName, ".") == 0
+ || strcmp(lpFindFileData->cFileName, "..") == 0) {
+
+ goto next_file;
+ }
+
+ strcpy(info->name, lpFindFileData->cFileName);
+
+ info->size = (ib_longlong)(lpFindFileData->nFileSizeLow)
+ + (((ib_longlong)(lpFindFileData->nFileSizeHigh)) << 32);
+
+ if (lpFindFileData->dwFileAttributes
+ & FILE_ATTRIBUTE_REPARSE_POINT) {
+/* TODO: test Windows symlinks */
+/* TODO: MySQL has apparently its own symlink implementation in Windows,
+dbname.sym can redirect a database directory:
+http://www.mysql.com/doc/en/Windows_symbolic_links.html */
+ info->type = OS_FILE_TYPE_LINK;
+ } else if (lpFindFileData->dwFileAttributes
+ & FILE_ATTRIBUTE_DIRECTORY) {
+ info->type = OS_FILE_TYPE_DIR;
+ } else {
+ /* It is probably safest to assume that all other
+ file types are normal. Better to check them rather
+ than blindly skip them. */
+
+ info->type = OS_FILE_TYPE_FILE;
+ }
+ }
+
+ ut_free(lpFindFileData);
+
+ if (ret) {
+ return(0);
+ } else if (GetLastError() == ERROR_NO_MORE_FILES) {
+
+ return(1);
+ } else {
+ os_file_handle_error_no_exit(dirname,
+ "readdir_next_file");
+ return(-1);
+ }
+#else
+ struct dirent* ent;
+ char* full_path;
+ int ret;
+ struct stat statinfo;
+#ifdef HAVE_READDIR_R
+ char dirent_buf[sizeof(struct dirent) + _POSIX_PATH_MAX +
+ 100];
+ /* In /mysys/my_lib.c, _POSIX_PATH_MAX + 1 is used as
+ the max file name len; but in most standards, the
+ length is NAME_MAX; we add 100 to be even safer */
+#endif
+
+next_file:
+
+#ifdef HAVE_READDIR_R
+ ret = readdir_r(dir, (struct dirent*)dirent_buf, &ent);
+
+ if (ret != 0) {
+ fprintf(stderr,
+"InnoDB: cannot read directory %s, error %lu\n", dirname, (ulong)ret);
+
+ return(-1);
+ }
+
+ if (ent == NULL) {
+ /* End of directory */
+
+ return(1);
+ }
+
+ ut_a(strlen(ent->d_name) < _POSIX_PATH_MAX + 100 - 1);
+#else
+ ent = readdir(dir);
+
+ if (ent == NULL) {
+
+ return(1);
+ }
+#endif
+ ut_a(strlen(ent->d_name) < OS_FILE_MAX_PATH);
+
+ if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0) {
+
+ goto next_file;
+ }
+
+ strcpy(info->name, ent->d_name);
+
+ full_path = ut_malloc(strlen(dirname) + strlen(ent->d_name) + 10);
+
+ sprintf(full_path, "%s/%s", dirname, ent->d_name);
+
+ ret = stat(full_path, &statinfo);
+
+ if (ret) {
+ os_file_handle_error_no_exit(full_path, "stat");
+
+ ut_free(full_path);
+
+ return(-1);
+ }
+
+ info->size = (ib_longlong)statinfo.st_size;
+
+ if (S_ISDIR(statinfo.st_mode)) {
+ info->type = OS_FILE_TYPE_DIR;
+ } else if (S_ISLNK(statinfo.st_mode)) {
+ info->type = OS_FILE_TYPE_LINK;
+ } else if (S_ISREG(statinfo.st_mode)) {
+ info->type = OS_FILE_TYPE_FILE;
+ } else {
+ info->type = OS_FILE_TYPE_UNKNOWN;
+ }
+
+ ut_free(full_path);
+
+ return(0);
+#endif
+}
+
+/*********************************************************************
+This function attempts to create a directory named pathname. The new directory
+gets default permissions. On Unix the permissions are (0770 & ~umask). If the
+directory exists already, nothing is done and the call succeeds, unless the
+fail_if_exists arguments is true. */
+
+ibool
+os_file_create_directory(
+/*=====================*/
+ /* out: TRUE if call succeeds,
+ FALSE on error */
+ const char* pathname, /* in: directory name as
+ null-terminated string */
+ ibool fail_if_exists) /* in: if TRUE, pre-existing directory
+ is treated as an error. */
+{
+#ifdef __WIN__
+ BOOL rcode;
+
+ rcode = CreateDirectory(pathname, NULL);
+ if (!(rcode != 0 ||
+ (GetLastError() == ERROR_ALREADY_EXISTS && !fail_if_exists))) {
+ /* failure */
+ os_file_handle_error(pathname, "CreateDirectory");
+
+ return(FALSE);
+ }
+
+ return (TRUE);
+#else
+ int rcode;
+
+ rcode = mkdir(pathname, 0770);
+
+ if (!(rcode == 0 || (errno == EEXIST && !fail_if_exists))) {
+ /* failure */
+ os_file_handle_error(pathname, "mkdir");
+
+ return(FALSE);
+ }
+
+ return (TRUE);
+#endif
+}
+
/********************************************************************
A simple function to open or create a file. */
os_file_t
os_file_create_simple(
/*==================*/
- /* out, own: handle to the file, not defined if error,
- error number can be retrieved with os_get_last_error */
- char* name, /* in: name of the file or path as a null-terminated
- string */
- ulint create_mode,/* in: OS_FILE_OPEN if an existing file is opened
- (if does not exist, error), or OS_FILE_CREATE if a new
- file is created (if exists, error) */
- ulint access_type,/* in: OS_FILE_READ_ONLY or OS_FILE_READ_WRITE */
- ibool* success)/* out: TRUE if succeed, FALSE if error */
+ /* out, own: handle to the file, not defined
+ if error, error number can be retrieved with
+ os_file_get_last_error */
+ const char* name, /* in: name of the file or path as a
+ null-terminated string */
+ ulint create_mode,/* in: OS_FILE_OPEN if an existing file is
+ opened (if does not exist, error), or
+ OS_FILE_CREATE if a new file is created
+ (if exists, error), or
+ OS_FILE_CREATE_PATH if new file
+ (if exists, error) and subdirectories along
+ its path are created (if needed)*/
+ ulint access_type,/* in: OS_FILE_READ_ONLY or
+ OS_FILE_READ_WRITE */
+ ibool* success)/* out: TRUE if succeed, FALSE if error */
{
#ifdef __WIN__
os_file_t file;
@@ -480,6 +885,14 @@ try_again:
create_flag = OPEN_EXISTING;
} else if (create_mode == OS_FILE_CREATE) {
create_flag = CREATE_NEW;
+ } else if (create_mode == OS_FILE_CREATE_PATH) {
+ /* create subdirs along the path if needed */
+ *success = os_file_create_subdirs_if_needed(name);
+ if (!*success) {
+ ut_error;
+ }
+ create_flag = CREATE_NEW;
+ create_mode = OS_FILE_CREATE;
} else {
create_flag = 0;
ut_error;
@@ -496,8 +909,9 @@ try_again:
file = CreateFile(name,
access,
- FILE_SHARE_READ,/* file can be read also by other
- processes */
+ FILE_SHARE_READ | FILE_SHARE_WRITE,
+ /* file can be read ansd written also
+ by other processes */
NULL, /* default security attributes */
create_flag,
attributes,
@@ -533,6 +947,14 @@ try_again:
}
} else if (create_mode == OS_FILE_CREATE) {
create_flag = O_RDWR | O_CREAT | O_EXCL;
+ } else if (create_mode == OS_FILE_CREATE_PATH) {
+ /* create subdirs along the path if needed */
+ *success = os_file_create_subdirs_if_needed(name);
+ if (!*success) {
+ return (-1);
+ }
+ create_flag = O_RDWR | O_CREAT | O_EXCL;
+ create_mode = OS_FILE_CREATE;
} else {
create_flag = 0;
ut_error;
@@ -554,6 +976,13 @@ try_again:
if (retry) {
goto try_again;
}
+#ifdef USE_FILE_LOCK
+ } else if (access_type == OS_FILE_READ_WRITE
+ && os_file_lock(file, name)) {
+ *success = FALSE;
+ close(file);
+ file = -1;
+#endif
} else {
*success = TRUE;
}
@@ -568,21 +997,27 @@ A simple function to open or create a file. */
os_file_t
os_file_create_simple_no_error_handling(
/*====================================*/
- /* out, own: handle to the file, not defined if error,
- error number can be retrieved with os_get_last_error */
- char* name, /* in: name of the file or path as a null-terminated
- string */
- ulint create_mode,/* in: OS_FILE_OPEN if an existing file is opened
- (if does not exist, error), or OS_FILE_CREATE if a new
- file is created (if exists, error) */
- ulint access_type,/* in: OS_FILE_READ_ONLY or OS_FILE_READ_WRITE */
- ibool* success)/* out: TRUE if succeed, FALSE if error */
+ /* out, own: handle to the file, not defined
+ if error, error number can be retrieved with
+ os_file_get_last_error */
+ const char* name, /* in: name of the file or path as a
+ null-terminated string */
+ ulint create_mode,/* in: OS_FILE_OPEN if an existing file
+ is opened (if does not exist, error), or
+ OS_FILE_CREATE if a new file is created
+ (if exists, error) */
+ ulint access_type,/* in: OS_FILE_READ_ONLY,
+ OS_FILE_READ_WRITE, or
+ OS_FILE_READ_ALLOW_DELETE; the last option is
+ used by a backup program reading the file */
+ ibool* success)/* out: TRUE if succeed, FALSE if error */
{
#ifdef __WIN__
os_file_t file;
DWORD create_flag;
DWORD access;
DWORD attributes = 0;
+ DWORD share_mode = FILE_SHARE_READ | FILE_SHARE_WRITE;
ut_a(name);
@@ -599,6 +1034,13 @@ os_file_create_simple_no_error_handling(
access = GENERIC_READ;
} else if (access_type == OS_FILE_READ_WRITE) {
access = GENERIC_READ | GENERIC_WRITE;
+ } else if (access_type == OS_FILE_READ_ALLOW_DELETE) {
+ access = GENERIC_READ;
+ share_mode = FILE_SHARE_DELETE | FILE_SHARE_READ
+ | FILE_SHARE_WRITE; /* A backup program has to give
+ mysqld the maximum freedom to
+ do what it likes with the
+ file */
} else {
access = 0;
ut_error;
@@ -606,8 +1048,7 @@ os_file_create_simple_no_error_handling(
file = CreateFile(name,
access,
- FILE_SHARE_READ,/* file can be read also by other
- processes */
+ share_mode,
NULL, /* default security attributes */
create_flag,
attributes,
@@ -648,6 +1089,13 @@ os_file_create_simple_no_error_handling(
if (file == -1) {
*success = FALSE;
+#ifdef USE_FILE_LOCK
+ } else if (access_type == OS_FILE_READ_WRITE
+ && os_file_lock(file, name)) {
+ *success = FALSE;
+ close(file);
+ file = -1;
+#endif
} else {
*success = TRUE;
}
@@ -662,33 +1110,43 @@ Opens an existing file or creates a new. */
os_file_t
os_file_create(
/*===========*/
- /* out, own: handle to the file, not defined if error,
- error number can be retrieved with os_get_last_error */
- char* name, /* in: name of the file or path as a null-terminated
- string */
- ulint create_mode, /* in: OS_FILE_OPEN if an existing file is opened
- (if does not exist, error), or OS_FILE_CREATE if a new
- file is created (if exists, error), OS_FILE_OVERWRITE
- if a new is created or an old overwritten */
- ulint purpose,/* in: OS_FILE_AIO, if asynchronous, non-buffered i/o
- is desired, OS_FILE_NORMAL, if any normal file;
- NOTE that it also depends on type, os_aio_.. and srv_..
- variables whether we really use async i/o or
- unbuffered i/o: look in the function source code for
- the exact rules */
- ulint type, /* in: OS_DATA_FILE or OS_LOG_FILE */
- ibool* success)/* out: TRUE if succeed, FALSE if error */
+ /* out, own: handle to the file, not defined
+ if error, error number can be retrieved with
+ os_file_get_last_error */
+ const char* name, /* in: name of the file or path as a
+ null-terminated string */
+ ulint create_mode,/* in: OS_FILE_OPEN if an existing file
+ is opened (if does not exist, error), or
+ OS_FILE_CREATE if a new file is created
+ (if exists, error),
+ OS_FILE_OVERWRITE if a new file is created
+ or an old overwritten;
+ OS_FILE_OPEN_RAW, if a raw device or disk
+ partition should be opened */
+ ulint purpose,/* in: OS_FILE_AIO, if asynchronous,
+ non-buffered i/o is desired,
+ OS_FILE_NORMAL, if any normal file;
+ NOTE that it also depends on type, os_aio_..
+ and srv_.. variables whether we really use
+ async i/o or unbuffered i/o: look in the
+ function source code for the exact rules */
+ ulint type, /* in: OS_DATA_FILE or OS_LOG_FILE */
+ ibool* success)/* out: TRUE if succeed, FALSE if error */
{
#ifdef __WIN__
os_file_t file;
+ DWORD share_mode = FILE_SHARE_READ;
DWORD create_flag;
DWORD attributes;
ibool retry;
-
try_again:
ut_a(name);
- if (create_mode == OS_FILE_OPEN) {
+ if (create_mode == OS_FILE_OPEN_RAW) {
+ create_flag = OPEN_EXISTING;
+ share_mode = FILE_SHARE_WRITE;
+ } else if (create_mode == OS_FILE_OPEN
+ || create_mode == OS_FILE_OPEN_RETRY) {
create_flag = OPEN_EXISTING;
} else if (create_mode == OS_FILE_CREATE) {
create_flag = CREATE_NEW;
@@ -738,14 +1196,17 @@ try_again:
file = CreateFile(name,
GENERIC_READ | GENERIC_WRITE, /* read and write
access */
- FILE_SHARE_READ,/* File can be read also by other
+ share_mode, /* File can be read also by other
processes; we must give the read
permission because of ibbackup. We do
not give the write permission to
others because if one would succeed to
start 2 instances of mysqld on the
SAME files, that could cause severe
- database corruption! */
+ database corruption! When opening
+ raw disk partitions, Microsoft manuals
+ say that we must give also the write
+ permission. */
NULL, /* default security attributes */
create_flag,
attributes,
@@ -755,8 +1216,8 @@ try_again:
*success = FALSE;
retry = os_file_handle_error(name,
- create_mode == OS_FILE_OPEN ?
- "open" : "create");
+ create_mode == OS_FILE_CREATE ?
+ "create" : "open");
if (retry) {
goto try_again;
}
@@ -776,17 +1237,15 @@ try_again:
try_again:
ut_a(name);
- if (create_mode == OS_FILE_OPEN) {
+ if (create_mode == OS_FILE_OPEN || create_mode == OS_FILE_OPEN_RAW
+ || create_mode == OS_FILE_OPEN_RETRY) {
mode_str = "OPEN";
-
create_flag = O_RDWR;
} else if (create_mode == OS_FILE_CREATE) {
mode_str = "CREATE";
-
create_flag = O_RDWR | O_CREAT | O_EXCL;
} else if (create_mode == OS_FILE_OVERWRITE) {
mode_str = "OVERWRITE";
-
create_flag = O_RDWR | O_CREAT | O_TRUNC;
} else {
create_flag = 0;
@@ -843,11 +1302,34 @@ try_again:
*success = FALSE;
retry = os_file_handle_error(name,
- create_mode == OS_FILE_OPEN ?
- "open" : "create");
+ create_mode == OS_FILE_CREATE ?
+ "create" : "open");
if (retry) {
goto try_again;
}
+#ifdef USE_FILE_LOCK
+ } else if (create_mode != OS_FILE_OPEN_RAW
+ && os_file_lock(file, name)) {
+ *success = FALSE;
+ if (create_mode == OS_FILE_OPEN_RETRY) {
+ int i;
+ ut_print_timestamp(stderr);
+ fputs(" InnoDB: Retrying to lock the first data file\n",
+ stderr);
+ for (i = 0; i < 100; i++) {
+ os_thread_sleep(1000000);
+ if (!os_file_lock(file, name)) {
+ *success = TRUE;
+ return(file);
+ }
+ }
+ ut_print_timestamp(stderr);
+ fputs(" InnoDB: Unable to open the first data file\n",
+ stderr);
+ }
+ close(file);
+ file = -1;
+#endif
} else {
*success = TRUE;
}
@@ -857,6 +1339,168 @@ try_again:
}
/***************************************************************************
+Deletes a file if it exists. The file has to be closed before calling this. */
+
+ibool
+os_file_delete_if_exists(
+/*=====================*/
+ /* out: TRUE if success */
+ const char* name) /* in: file path as a null-terminated string */
+{
+#ifdef __WIN__
+ BOOL ret;
+ ulint count = 0;
+loop:
+ /* In Windows, deleting an .ibd file may fail if ibbackup is copying
+ it */
+
+ ret = DeleteFile((LPCTSTR)name);
+
+ if (ret) {
+ return(TRUE);
+ }
+
+ if (GetLastError() == ERROR_FILE_NOT_FOUND) {
+ /* the file does not exist, this not an error */
+
+ return(TRUE);
+ }
+
+ count++;
+
+ if (count > 100 && 0 == (count % 10)) {
+ fprintf(stderr,
+"InnoDB: Warning: cannot delete file %s\n"
+"InnoDB: Are you running ibbackup to back up the file?\n", name);
+
+ os_file_get_last_error(TRUE); /* print error information */
+ }
+
+ os_thread_sleep(1000000); /* sleep for a second */
+
+ if (count > 2000) {
+
+ return(FALSE);
+ }
+
+ goto loop;
+#else
+ int ret;
+
+ ret = unlink((const char*)name);
+
+ if (ret != 0 && errno != ENOENT) {
+ os_file_handle_error_no_exit(name, "delete");
+
+ return(FALSE);
+ }
+
+ return(TRUE);
+#endif
+}
+
+/***************************************************************************
+Deletes a file. The file has to be closed before calling this. */
+
+ibool
+os_file_delete(
+/*===========*/
+ /* out: TRUE if success */
+ const char* name) /* in: file path as a null-terminated string */
+{
+#ifdef __WIN__
+ BOOL ret;
+ ulint count = 0;
+loop:
+ /* In Windows, deleting an .ibd file may fail if ibbackup is copying
+ it */
+
+ ret = DeleteFile((LPCTSTR)name);
+
+ if (ret) {
+ return(TRUE);
+ }
+
+ if (GetLastError() == ERROR_FILE_NOT_FOUND) {
+ /* If the file does not exist, we classify this as a 'mild'
+ error and return */
+
+ return(FALSE);
+ }
+
+ count++;
+
+ if (count > 100 && 0 == (count % 10)) {
+ fprintf(stderr,
+"InnoDB: Warning: cannot delete file %s\n"
+"InnoDB: Are you running ibbackup to back up the file?\n", name);
+
+ os_file_get_last_error(TRUE); /* print error information */
+ }
+
+ os_thread_sleep(1000000); /* sleep for a second */
+
+ if (count > 2000) {
+
+ return(FALSE);
+ }
+
+ goto loop;
+#else
+ int ret;
+
+ ret = unlink((const char*)name);
+
+ if (ret != 0) {
+ os_file_handle_error_no_exit(name, "delete");
+
+ return(FALSE);
+ }
+
+ return(TRUE);
+#endif
+}
+
+/***************************************************************************
+Renames a file (can also move it to another directory). It is safest that the
+file is closed before calling this function. */
+
+ibool
+os_file_rename(
+/*===========*/
+ /* out: TRUE if success */
+ const char* oldpath,/* in: old file path as a null-terminated
+ string */
+ const char* newpath)/* in: new file path */
+{
+#ifdef __WIN__
+ BOOL ret;
+
+ ret = MoveFile((LPCTSTR)oldpath, (LPCTSTR)newpath);
+
+ if (ret) {
+ return(TRUE);
+ }
+
+ os_file_handle_error(oldpath, "rename");
+
+ return(FALSE);
+#else
+ int ret;
+
+ ret = rename((const char*)oldpath, (const char*)newpath);
+
+ if (ret != 0) {
+ os_file_handle_error(oldpath, "rename");
+
+ return(FALSE);
+ }
+
+ return(TRUE);
+#endif
+}
+
+/***************************************************************************
Closes a file handle. In case of error, error number can be retrieved with
os_file_get_last_error. */
@@ -878,6 +1522,7 @@ os_file_close(
}
os_file_handle_error(NULL, "close");
+
return(FALSE);
#else
int ret;
@@ -886,6 +1531,7 @@ os_file_close(
if (ret == -1) {
os_file_handle_error(NULL, "close");
+
return(FALSE);
}
@@ -965,7 +1611,7 @@ os_file_get_size(
}
if (sizeof(off_t) > 4) {
- *size = (ulint)(offs & 0xFFFFFFFF);
+ *size = (ulint)(offs & 0xFFFFFFFFUL);
*size_high = (ulint)(offs >> 32);
} else {
*size = (ulint) offs;
@@ -977,60 +1623,82 @@ os_file_get_size(
}
/***************************************************************************
-Sets a file size. This function can be used to extend or truncate a file. */
+Gets file size as a 64-bit integer ib_longlong. */
+
+ib_longlong
+os_file_get_size_as_iblonglong(
+/*===========================*/
+ /* out: size in bytes, -1 if error */
+ os_file_t file) /* in: handle to a file */
+{
+ ulint size;
+ ulint size_high;
+ ibool success;
+
+ success = os_file_get_size(file, &size, &size_high);
+
+ if (!success) {
+
+ return(-1);
+ }
+
+ return((((ib_longlong)size_high) << 32) + (ib_longlong)size);
+}
+
+/***************************************************************************
+Write the specified number of zeros to a newly created file. */
ibool
os_file_set_size(
/*=============*/
/* out: TRUE if success */
- char* name, /* in: name of the file or path as a
+ const char* name, /* in: name of the file or path as a
null-terminated string */
os_file_t file, /* in: handle to a file */
ulint size, /* in: least significant 32 bits of file
size */
ulint size_high)/* in: most significant 32 bits of size */
{
- ib_longlong offset;
- ib_longlong low;
- ulint n_bytes;
+ ib_longlong current_size;
+ ib_longlong desired_size;
ibool ret;
byte* buf;
byte* buf2;
- ulint i;
+ ulint buf_size;
ut_a(size == (size & 0xFFFFFFFF));
- /* We use a very big 8 MB buffer in writing because Linux may be
- extremely slow in fsync on 1 MB writes */
+ current_size = 0;
+ desired_size = (ib_longlong)size + (((ib_longlong)size_high) << 32);
- buf2 = ut_malloc(UNIV_PAGE_SIZE * 513);
+ /* Write up to 1 megabyte at a time. */
+ buf_size = ut_min(64, (ulint) (desired_size / UNIV_PAGE_SIZE))
+ * UNIV_PAGE_SIZE;
+ buf2 = ut_malloc(buf_size + UNIV_PAGE_SIZE);
/* Align the buffer for possible raw i/o */
buf = ut_align(buf2, UNIV_PAGE_SIZE);
/* Write buffer full of zeros */
- for (i = 0; i < UNIV_PAGE_SIZE * 512; i++) {
- buf[i] = '\0';
- }
+ memset(buf, 0, buf_size);
- offset = 0;
- low = (ib_longlong)size + (((ib_longlong)size_high) << 32);
-
- if (low >= (ib_longlong)(100 * 1024 * 1024)) {
+ if (desired_size >= (ib_longlong)(100 * 1024 * 1024)) {
fprintf(stderr, "InnoDB: Progress in MB:");
}
- while (offset < low) {
- if (low - offset < UNIV_PAGE_SIZE * 512) {
- n_bytes = (ulint)(low - offset);
- } else {
- n_bytes = UNIV_PAGE_SIZE * 512;
- }
-
+ while (current_size < desired_size) {
+ ulint n_bytes;
+
+ if (desired_size - current_size < (ib_longlong) buf_size) {
+ n_bytes = (ulint) (desired_size - current_size);
+ } else {
+ n_bytes = buf_size;
+ }
+
ret = os_file_write(name, file, buf,
- (ulint)(offset & 0xFFFFFFFF),
- (ulint)(offset >> 32),
+ (ulint)(current_size & 0xFFFFFFFF),
+ (ulint)(current_size >> 32),
n_bytes);
if (!ret) {
ut_free(buf2);
@@ -1038,18 +1706,18 @@ os_file_set_size(
}
/* Print about progress for each 100 MB written */
- if ((offset + n_bytes) / (ib_longlong)(100 * 1024 * 1024)
- != offset / (ib_longlong)(100 * 1024 * 1024)) {
+ if ((current_size + n_bytes) / (ib_longlong)(100 * 1024 * 1024)
+ != current_size / (ib_longlong)(100 * 1024 * 1024)) {
fprintf(stderr, " %lu00",
- (ulint)((offset + n_bytes)
+ (ulong) ((current_size + n_bytes)
/ (ib_longlong)(100 * 1024 * 1024)));
}
- offset += n_bytes;
+ current_size += n_bytes;
}
- if (low >= (ib_longlong)(100 * 1024 * 1024)) {
+ if (desired_size >= (ib_longlong)(100 * 1024 * 1024)) {
fprintf(stderr, "\n");
}
@@ -1105,6 +1773,15 @@ os_file_flush(
return(TRUE);
}
+ /* Since Windows returns ERROR_INVALID_FUNCTION if the 'file' is
+ actually a raw device, we choose to ignore that error if we are using
+ raw disks */
+
+ if (srv_start_raw_disk_in_use && GetLastError()
+ == ERROR_INVALID_FUNCTION) {
+ return(TRUE);
+ }
+
os_file_handle_error(NULL, "flush");
/* It is a fatal error if a file flush does not succeed, because then
@@ -1115,7 +1792,33 @@ os_file_flush(
#else
int ret;
-#ifdef HAVE_FDATASYNC
+#if defined(HAVE_DARWIN_THREADS)
+# ifndef F_FULLFSYNC
+ /* The following definition is from the Mac OS X 10.3 <sys/fcntl.h> */
+# define F_FULLFSYNC 51 /* fsync + ask the drive to flush to the media */
+# elif F_FULLFSYNC != 51
+# error "F_FULLFSYNC != 51: ABI incompatibility with Mac OS X 10.3"
+# endif
+ /* Apple has disabled fsync() for internal disk drives in OS X. That
+ caused corruption for a user when he tested a power outage. Let us in
+ OS X use a nonstandard flush method recommended by an Apple
+ engineer. */
+
+ if (!srv_have_fullfsync) {
+ /* If we are not on an operating system that supports this,
+ then fall back to a plain fsync. */
+
+ ret = fsync(file);
+ } else {
+ ret = fcntl(file, F_FULLFSYNC, NULL);
+
+ if (ret) {
+ /* If we are not on a file system that supports this,
+ then fall back to a plain fsync. */
+ ret = fsync(file);
+ }
+ }
+#elif HAVE_FDATASYNC
ret = fdatasync(file);
#else
/* fprintf(stderr, "Flushing to file %p\n", file); */
@@ -1128,9 +1831,10 @@ os_file_flush(
}
/* Since Linux returns EINVAL if the 'file' is actually a raw device,
- we choose to ignore that error */
+ we choose to ignore that error if we are using raw disks */
+
+ if (srv_start_raw_disk_in_use && errno == EINVAL) {
- if (errno == EINVAL) {
return(TRUE);
}
@@ -1168,7 +1872,7 @@ os_file_pread(
off_t offs;
ssize_t n_bytes;
- ut_a((offset & 0xFFFFFFFF) == offset);
+ ut_a((offset & 0xFFFFFFFFUL) == offset);
/* If off_t is > 4 bytes in size, then we assume we can pass a
64-bit address */
@@ -1235,7 +1939,7 @@ os_file_pwrite(
/*===========*/
/* out: number of bytes written, -1 if error */
os_file_t file, /* in: handle to a file */
- void* buf, /* in: buffer from where to write */
+ const void* buf, /* in: buffer from where to write */
ulint n, /* in: number of bytes to write */
ulint offset, /* in: least significant 32 bits of file
offset where to write */
@@ -1245,7 +1949,7 @@ os_file_pwrite(
ssize_t ret;
off_t offs;
- ut_a((offset & 0xFFFFFFFF) == offset);
+ ut_a((offset & 0xFFFFFFFFUL) == offset);
/* If off_t is > 4 bytes in size, then we assume we can pass a
64-bit address */
@@ -1274,6 +1978,7 @@ os_file_pwrite(
os_file_n_pending_pwrites--;
os_mutex_exit(os_file_count_mutex);
+# ifdef UNIV_DO_FLUSH
if (srv_unix_file_flush_method != SRV_UNIX_LITTLESYNC
&& srv_unix_file_flush_method != SRV_UNIX_NOSYNC
&& !os_do_not_call_flush_at_each_write) {
@@ -1284,6 +1989,7 @@ os_file_pwrite(
ut_a(TRUE == os_file_flush(file));
}
+# endif /* UNIV_DO_FLUSH */
return(ret);
#else
@@ -1306,6 +2012,7 @@ os_file_pwrite(
ret = write(file, buf, (ssize_t)n);
+# ifdef UNIV_DO_FLUSH
if (srv_unix_file_flush_method != SRV_UNIX_LITTLESYNC
&& srv_unix_file_flush_method != SRV_UNIX_NOSYNC
&& !os_do_not_call_flush_at_each_write) {
@@ -1316,6 +2023,7 @@ os_file_pwrite(
ut_a(TRUE == os_file_flush(file));
}
+# endif /* UNIV_DO_FLUSH */
os_mutex_exit(os_file_seek_mutexes[i]);
@@ -1350,7 +2058,7 @@ os_file_read(
ibool retry;
ulint i;
- ut_a((offset & 0xFFFFFFFF) == offset);
+ ut_a((offset & 0xFFFFFFFFUL) == offset);
os_n_file_reads++;
os_bytes_read_since_printout += n;
@@ -1360,8 +2068,8 @@ try_again:
ut_ad(buf);
ut_ad(n > 0);
- low = offset;
- high = offset_high;
+ low = (DWORD) offset;
+ high = (DWORD) offset_high;
/* Protect the seek / read operation with a mutex */
i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
@@ -1377,7 +2085,7 @@ try_again:
goto error_handling;
}
- ret = ReadFile(file, buf, n, &len, NULL);
+ ret = ReadFile(file, buf, (DWORD) n, &len, NULL);
os_mutex_exit(os_file_seek_mutexes[i]);
@@ -1397,6 +2105,11 @@ try_again:
return(TRUE);
}
+
+ fprintf(stderr,
+"InnoDB: Error: tried to read %lu bytes at offset %lu %lu.\n"
+"InnoDB: Was only able to read %ld.\n", (ulong)n, (ulong)offset_high,
+ (ulong)offset, (long)ret);
#endif
#ifdef __WIN__
error_handling:
@@ -1410,9 +2123,9 @@ error_handling:
fprintf(stderr,
"InnoDB: Fatal error: cannot read from file. OS error number %lu.\n",
#ifdef __WIN__
- (ulint)GetLastError()
+ (ulong) GetLastError()
#else
- (ulint)errno
+ (ulong) errno
#endif
);
fflush(stderr);
@@ -1423,6 +2136,92 @@ error_handling:
}
/***********************************************************************
+Requests a synchronous positioned read operation. This function does not do
+any error handling. In case of error it returns FALSE. */
+
+ibool
+os_file_read_no_error_handling(
+/*===========================*/
+ /* out: TRUE if request was
+ successful, FALSE if fail */
+ os_file_t file, /* in: handle to a file */
+ void* buf, /* in: buffer where to read */
+ ulint offset, /* in: least significant 32 bits of file
+ offset where to read */
+ ulint offset_high, /* in: most significant 32 bits of
+ offset */
+ ulint n) /* in: number of bytes to read */
+{
+#ifdef __WIN__
+ BOOL ret;
+ DWORD len;
+ DWORD ret2;
+ DWORD low;
+ DWORD high;
+ ibool retry;
+ ulint i;
+
+ ut_a((offset & 0xFFFFFFFFUL) == offset);
+
+ os_n_file_reads++;
+ os_bytes_read_since_printout += n;
+
+try_again:
+ ut_ad(file);
+ ut_ad(buf);
+ ut_ad(n > 0);
+
+ low = (DWORD) offset;
+ high = (DWORD) offset_high;
+
+ /* Protect the seek / read operation with a mutex */
+ i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
+
+ os_mutex_enter(os_file_seek_mutexes[i]);
+
+ ret2 = SetFilePointer(file, low, &high, FILE_BEGIN);
+
+ if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) {
+
+ os_mutex_exit(os_file_seek_mutexes[i]);
+
+ goto error_handling;
+ }
+
+ ret = ReadFile(file, buf, (DWORD) n, &len, NULL);
+
+ os_mutex_exit(os_file_seek_mutexes[i]);
+
+ if (ret && len == n) {
+ return(TRUE);
+ }
+#else
+ ibool retry;
+ ssize_t ret;
+
+ os_bytes_read_since_printout += n;
+
+try_again:
+ ret = os_file_pread(file, buf, n, offset, offset_high);
+
+ if ((ulint)ret == n) {
+
+ return(TRUE);
+ }
+#endif
+#ifdef __WIN__
+error_handling:
+#endif
+ retry = os_file_handle_error_no_exit(NULL, "read");
+
+ if (retry) {
+ goto try_again;
+ }
+
+ return(FALSE);
+}
+
+/***********************************************************************
Requests a synchronous write operation. */
ibool
@@ -1430,10 +2229,10 @@ os_file_write(
/*==========*/
/* out: TRUE if request was
successful, FALSE if fail */
- char* name, /* in: name of the file or path as a
+ const char* name, /* in: name of the file or path as a
null-terminated string */
os_file_t file, /* in: handle to a file */
- void* buf, /* in: buffer from which to write */
+ const void* buf, /* in: buffer from which to write */
ulint offset, /* in: least significant 32 bits of file
offset where to write */
ulint offset_high, /* in: most significant 32 bits of
@@ -1458,8 +2257,8 @@ os_file_write(
ut_ad(buf);
ut_ad(n > 0);
retry:
- low = offset;
- high = offset_high;
+ low = (DWORD) offset;
+ high = (DWORD) offset_high;
/* Protect the seek / write operation with a mutex */
i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
@@ -1480,20 +2279,22 @@ retry:
"InnoDB: Some operating system error numbers are described at\n"
"InnoDB: "
"http://dev.mysql.com/doc/mysql/en/Operating_System_error_codes.html\n",
- name, offset_high, offset,
- (ulint)GetLastError());
+ name, (ulong) offset_high, (ulong) offset,
+ (ulong) GetLastError());
return(FALSE);
}
- ret = WriteFile(file, buf, n, &len, NULL);
+ ret = WriteFile(file, buf, (DWORD) n, &len, NULL);
/* Always do fsync to reduce the probability that when the OS crashes,
a database page is only partially physically written to disk. */
+# ifdef UNIV_DO_FLUSH
if (!os_do_not_call_flush_at_each_write) {
ut_a(TRUE == os_file_flush(file));
}
+# endif /* UNIV_DO_FLUSH */
os_mutex_exit(os_file_seek_mutexes[i]);
@@ -1527,12 +2328,12 @@ retry:
"InnoDB: Operating system error number %lu.\n"
"InnoDB: Check that your OS and file system support files of this size.\n"
"InnoDB: Check also that the disk is not full or a disk quota exceeded.\n",
- name, offset_high, offset, n, (ulint)len,
- err);
+ name, (ulong) offset_high, (ulong) offset,
+ (ulong) n, (ulong) len, (ulong) err);
if (strerror((int)err) != NULL) {
fprintf(stderr,
-"InnoDB: Error number %lu means '%s'.\n", err, strerror((int)err));
+"InnoDB: Error number %lu means '%s'.\n", (ulong) err, strerror((int)err));
}
fprintf(stderr,
@@ -1583,6 +2384,259 @@ retry:
#endif
}
+/***********************************************************************
+Check the existence and type of the given file. */
+
+ibool
+os_file_status(
+/*===========*/
+ /* out: TRUE if call succeeded */
+ const char* path, /* in: pathname of the file */
+ ibool* exists, /* out: TRUE if file exists */
+ os_file_type_t* type) /* out: type of the file (if it exists) */
+{
+#ifdef __WIN__
+ int ret;
+ struct _stat statinfo;
+
+ ret = _stat(path, &statinfo);
+ if (ret && (errno == ENOENT || errno == ENOTDIR)) {
+ /* file does not exist */
+ *exists = FALSE;
+ return(TRUE);
+ } else if (ret) {
+ /* file exists, but stat call failed */
+
+ os_file_handle_error_no_exit(path, "stat");
+
+ return(FALSE);
+ }
+
+ if (_S_IFDIR & statinfo.st_mode) {
+ *type = OS_FILE_TYPE_DIR;
+ } else if (_S_IFREG & statinfo.st_mode) {
+ *type = OS_FILE_TYPE_FILE;
+ } else {
+ *type = OS_FILE_TYPE_UNKNOWN;
+ }
+
+ *exists = TRUE;
+
+ return(TRUE);
+#else
+ int ret;
+ struct stat statinfo;
+
+ ret = stat(path, &statinfo);
+ if (ret && (errno == ENOENT || errno == ENOTDIR)) {
+ /* file does not exist */
+ *exists = FALSE;
+ return(TRUE);
+ } else if (ret) {
+ /* file exists, but stat call failed */
+
+ os_file_handle_error_no_exit(path, "stat");
+
+ return(FALSE);
+ }
+
+ if (S_ISDIR(statinfo.st_mode)) {
+ *type = OS_FILE_TYPE_DIR;
+ } else if (S_ISLNK(statinfo.st_mode)) {
+ *type = OS_FILE_TYPE_LINK;
+ } else if (S_ISREG(statinfo.st_mode)) {
+ *type = OS_FILE_TYPE_FILE;
+ } else {
+ *type = OS_FILE_TYPE_UNKNOWN;
+ }
+
+ *exists = TRUE;
+
+ return(TRUE);
+#endif
+}
+
+/***********************************************************************
+This function returns information about the specified file */
+
+ibool
+os_file_get_status(
+/*===========*/
+ /* out: TRUE if stat information found */
+ const char* path, /* in: pathname of the file */
+ os_file_stat_t* stat_info) /* information of a file in a directory */
+{
+#ifdef __WIN__
+ int ret;
+ struct _stat statinfo;
+
+ ret = _stat(path, &statinfo);
+ if (ret && (errno == ENOENT || errno == ENOTDIR)) {
+ /* file does not exist */
+
+ return(FALSE);
+ } else if (ret) {
+ /* file exists, but stat call failed */
+
+ os_file_handle_error_no_exit(path, "stat");
+
+ return(FALSE);
+ }
+ if (_S_IFDIR & statinfo.st_mode) {
+ stat_info->type = OS_FILE_TYPE_DIR;
+ } else if (_S_IFREG & statinfo.st_mode) {
+ stat_info->type = OS_FILE_TYPE_FILE;
+ } else {
+ stat_info->type = OS_FILE_TYPE_UNKNOWN;
+ }
+
+ stat_info->ctime = statinfo.st_ctime;
+ stat_info->atime = statinfo.st_atime;
+ stat_info->mtime = statinfo.st_mtime;
+ stat_info->size = statinfo.st_size;
+
+ return(TRUE);
+#else
+ int ret;
+ struct stat statinfo;
+
+ ret = stat(path, &statinfo);
+
+ if (ret && (errno == ENOENT || errno == ENOTDIR)) {
+ /* file does not exist */
+
+ return(FALSE);
+ } else if (ret) {
+ /* file exists, but stat call failed */
+
+ os_file_handle_error_no_exit(path, "stat");
+
+ return(FALSE);
+ }
+
+ if (S_ISDIR(statinfo.st_mode)) {
+ stat_info->type = OS_FILE_TYPE_DIR;
+ } else if (S_ISLNK(statinfo.st_mode)) {
+ stat_info->type = OS_FILE_TYPE_LINK;
+ } else if (S_ISREG(statinfo.st_mode)) {
+ stat_info->type = OS_FILE_TYPE_FILE;
+ } else {
+ stat_info->type = OS_FILE_TYPE_UNKNOWN;
+ }
+
+ stat_info->ctime = statinfo.st_ctime;
+ stat_info->atime = statinfo.st_atime;
+ stat_info->mtime = statinfo.st_mtime;
+ stat_info->size = statinfo.st_size;
+
+ return(TRUE);
+#endif
+}
+
+/* path name separator character */
+#ifdef __WIN__
+# define OS_FILE_PATH_SEPARATOR '\\'
+#else
+# define OS_FILE_PATH_SEPARATOR '/'
+#endif
+
+/********************************************************************
+The function os_file_dirname returns a directory component of a
+null-terminated pathname string. In the usual case, dirname returns
+the string up to, but not including, the final '/', and basename
+is the component following the final '/'. Trailing '/' charac­
+ters are not counted as part of the pathname.
+
+If path does not contain a slash, dirname returns the string ".".
+
+Concatenating the string returned by dirname, a "/", and the basename
+yields a complete pathname.
+
+The return value is a copy of the directory component of the pathname.
+The copy is allocated from heap. It is the caller responsibility
+to free it after it is no longer needed.
+
+The following list of examples (taken from SUSv2) shows the strings
+returned by dirname and basename for different paths:
+
+ path dirname basename
+ "/usr/lib" "/usr" "lib"
+ "/usr/" "/" "usr"
+ "usr" "." "usr"
+ "/" "/" "/"
+ "." "." "."
+ ".." "." ".."
+*/
+
+char*
+os_file_dirname(
+/*============*/
+ /* out, own: directory component of the
+ pathname */
+ const char* path) /* in: pathname */
+{
+ /* Find the offset of the last slash */
+ const char* last_slash = strrchr(path, OS_FILE_PATH_SEPARATOR);
+ if (!last_slash) {
+ /* No slash in the path, return "." */
+
+ return(mem_strdup("."));
+ }
+
+ /* Ok, there is a slash */
+
+ if (last_slash == path) {
+ /* last slash is the first char of the path */
+
+ return(mem_strdup("/"));
+ }
+
+ /* Non-trivial directory component */
+
+ return(mem_strdupl(path, last_slash - path));
+}
+
+/********************************************************************
+Creates all missing subdirectories along the given path. */
+
+ibool
+os_file_create_subdirs_if_needed(
+/*=============================*/
+ /* out: TRUE if call succeeded
+ FALSE otherwise */
+ const char* path) /* in: path name */
+{
+ char* subdir;
+ ibool success, subdir_exists;
+ os_file_type_t type;
+
+ subdir = os_file_dirname(path);
+ if (strlen(subdir) == 1
+ && (*subdir == OS_FILE_PATH_SEPARATOR || *subdir == '.')) {
+ /* subdir is root or cwd, nothing to do */
+ mem_free(subdir);
+
+ return(TRUE);
+ }
+
+ /* Test if subdir exists */
+ success = os_file_status(subdir, &subdir_exists, &type);
+ if (success && !subdir_exists) {
+ /* subdir does not exist, create it */
+ success = os_file_create_subdirs_if_needed(subdir);
+ if (!success) {
+ mem_free(subdir);
+
+ return(FALSE);
+ }
+ success = os_file_create_directory(subdir, FALSE);
+ }
+
+ mem_free(subdir);
+
+ return(success);
+}
+
/********************************************************************
Returns a pointer to the nth slot in the aio array. */
static
@@ -1950,7 +3004,7 @@ os_aio_array_reserve_slot(
void* message2,/* in: message to be passed along with
the aio operation */
os_file_t file, /* in: file handle */
- char* name, /* in: name of the file or path as a
+ const char* name, /* in: name of the file or path as a
null-terminated string */
void* buf, /* in: buffer where to read or from which
to write */
@@ -2197,7 +3251,7 @@ os_aio(
because i/os are not actually handled until
all have been posted: use with great
caution! */
- char* name, /* in: name of the file or path as a
+ const char* name, /* in: name of the file or path as a
null-terminated string */
os_file_t file, /* in: handle to a file */
void* buf, /* in: buffer where to read or from which
@@ -2218,7 +3272,7 @@ os_aio(
#ifdef WIN_ASYNC_IO
ibool retval;
BOOL ret = TRUE;
- DWORD len = n;
+ DWORD len = (DWORD) n;
void* dummy_mess1;
void* dummy_mess2;
ulint dummy_type;
@@ -2454,10 +3508,12 @@ os_aio_windows_handle(
if (ret && len == slot->len) {
ret_val = TRUE;
+# ifdef UNIV_DO_FLUSH
if (slot->type == OS_FILE_WRITE
&& !os_do_not_call_flush_at_each_write) {
ut_a(TRUE == os_file_flush(slot->file));
}
+# endif /* UNIV_DO_FLUSH */
} else {
os_file_handle_error(slot->name, "Windows aio");
@@ -2538,10 +3594,12 @@ os_aio_posix_handle(
*message1 = slot->message1;
*message2 = slot->message2;
+# ifdef UNIV_DO_FLUSH
if (slot->type == OS_FILE_WRITE
&& !os_do_not_call_flush_at_each_write) {
ut_a(TRUE == os_file_flush(slot->file));
}
+# endif /* UNIV_DO_FLUSH */
os_mutex_exit(array->mutex);
@@ -2584,7 +3642,7 @@ os_aio_simulated_handle(
ulint biggest_age;
ulint age;
byte* combined_buf;
- byte* combined_buf2= 0; /* Remove warning */
+ byte* combined_buf2;
ibool ret;
ulint n;
ulint i;
@@ -2629,7 +3687,7 @@ restart:
if (os_aio_print_debug) {
fprintf(stderr,
-"InnoDB: i/o for slot %lu already done, returning\n", i);
+"InnoDB: i/o for slot %lu already done, returning\n", (ulong) i);
}
ret = TRUE;
@@ -2747,6 +3805,7 @@ consecutive_loop:
if (n_consecutive == 1) {
/* We can use the buffer of the i/o request */
combined_buf = slot->buf;
+ combined_buf2 = NULL;
} else {
combined_buf2 = ut_malloc(total_len + UNIV_PAGE_SIZE);
@@ -2778,8 +3837,8 @@ consecutive_loop:
if (os_aio_print_debug) {
fprintf(stderr,
"InnoDB: doing i/o of type %lu at offset %lu %lu, length %lu\n",
- slot->type, slot->offset_high, slot->offset,
- total_len);
+ (ulong) slot->type, (ulong) slot->offset_high,
+ (ulong) slot->offset, (ulong) total_len);
}
/* Do the i/o with ordinary, synchronous i/o functions: */
@@ -2789,8 +3848,9 @@ consecutive_loop:
|| (slot->offset % UNIV_PAGE_SIZE != 0)) {
fprintf(stderr,
"InnoDB: Error: trying a displaced write to %s %lu %lu, len %lu\n",
- slot->name, slot->offset_high,
- slot->offset, total_len);
+ slot->name, (ulong) slot->offset_high,
+ (ulong) slot->offset,
+ (ulong) total_len);
ut_error;
}
@@ -2844,7 +3904,7 @@ consecutive_loop:
}
}
- if (n_consecutive > 1) {
+ if (combined_buf2) {
ut_free(combined_buf2);
}
@@ -2893,7 +3953,7 @@ recommended_sleep:
if (os_aio_print_debug) {
fprintf(stderr,
"InnoDB: i/o handler thread for i/o segment %lu wakes up\n",
- global_segment);
+ (ulong) global_segment);
}
goto restart;
@@ -2969,7 +4029,7 @@ os_aio_print(
ulint i;
for (i = 0; i < srv_n_file_io_threads; i++) {
- fprintf(file, "I/O thread %lu state: %s (%s)", i,
+ fprintf(file, "I/O thread %lu state: %s (%s)", (ulong) i,
srv_io_thread_op_info[i],
srv_io_thread_function[i]);
@@ -3008,7 +4068,7 @@ loop:
ut_a(array->n_reserved == n_reserved);
- fprintf(file, " %lu", n_reserved);
+ fprintf(file, " %lu", (ulong) n_reserved);
os_mutex_exit(array->mutex);
@@ -3048,19 +4108,22 @@ loop:
fprintf(file,
"Pending flushes (fsync) log: %lu; buffer pool: %lu\n"
"%lu OS file reads, %lu OS file writes, %lu OS fsyncs\n",
- fil_n_pending_log_flushes, fil_n_pending_tablespace_flushes,
- os_n_file_reads, os_n_file_writes, os_n_fsyncs);
+ (ulong) fil_n_pending_log_flushes,
+ (ulong) fil_n_pending_tablespace_flushes,
+ (ulong) os_n_file_reads, (ulong) os_n_file_writes,
+ (ulong) os_n_fsyncs);
if (os_file_n_pending_preads != 0 || os_file_n_pending_pwrites != 0) {
fprintf(file,
"%lu pending preads, %lu pending pwrites\n",
- os_file_n_pending_preads, os_file_n_pending_pwrites);
+ (ulong) os_file_n_pending_preads,
+ (ulong) os_file_n_pending_pwrites);
}
if (os_n_file_reads == os_n_file_reads_old) {
avg_bytes_read = 0.0;
} else {
- avg_bytes_read = os_bytes_read_since_printout /
+ avg_bytes_read = (double) os_bytes_read_since_printout /
(os_n_file_reads - os_n_file_reads_old);
}
@@ -3068,7 +4131,7 @@ loop:
"%.2f reads/s, %lu avg bytes/read, %.2f writes/s, %.2f fsyncs/s\n",
(os_n_file_reads - os_n_file_reads_old)
/ time_elapsed,
- (ulint)avg_bytes_read,
+ (ulong)avg_bytes_read,
(os_n_file_writes - os_n_file_writes_old)
/ time_elapsed,
(os_n_fsyncs - os_n_fsyncs_old)
diff --git a/innobase/os/os0proc.c b/innobase/os/os0proc.c
index 87a0bfb9e92..2f155788420 100644
--- a/innobase/os/os0proc.c
+++ b/innobase/os/os0proc.c
@@ -12,11 +12,469 @@ Created 9/30/1995 Heikki Tuuri
#include "os0proc.ic"
#endif
+#include "ut0mem.h"
+#include "ut0byte.h"
+
+
+/*
+How to get AWE to compile on Windows?
+-------------------------------------
+
+In the project settings of the innobase project the Visual C++ source,
+__WIN2000__ has to be defined.
+
+The Visual C++ has to be relatively recent and _WIN32_WINNT has to be
+defined to a value >= 0x0500 when windows.h is included.
+
+#define _WIN32_WINNT 0x0500
+
+Where does AWE work?
+-------------------
+
+See the error message in os_awe_allocate_physical_mem().
+
+How to assign privileges for mysqld to use AWE?
+-----------------------------------------------
+
+See the error message in os_awe_enable_lock_pages_in_mem().
+
+Use Windows AWE functions in this order
+---------------------------------------
+
+(1) os_awe_enable_lock_pages_in_mem();
+(2) os_awe_allocate_physical_mem();
+(3) os_awe_allocate_virtual_mem_window();
+(4) os_awe_map_physical_mem_to_window().
+
+To test 'AWE' in a computer which does not have the AWE API,
+you can compile with UNIV_SIMULATE_AWE defined in this file.
+*/
+
+#ifdef UNIV_SIMULATE_AWE
+/* If we simulate AWE, we allocate the 'physical memory' here */
+byte* os_awe_simulate_mem;
+ulint os_awe_simulate_mem_size;
+os_awe_t* os_awe_simulate_page_info;
+byte* os_awe_simulate_window;
+ulint os_awe_simulate_window_size;
+/* In simulated AWE the following contains a NULL pointer or a pointer
+to a mapped 'physical page' for each 4 kB page in the AWE window */
+byte** os_awe_simulate_map;
+#endif
+
+#ifdef __WIN2000__
+os_awe_t* os_awe_page_info;
+ulint os_awe_n_pages;
+byte* os_awe_window;
+ulint os_awe_window_size;
+#endif
+
+/********************************************************************
+Windows AWE support. Tries to enable the "lock pages in memory" privilege for
+the current process so that the current process can allocate memory-locked
+virtual address space to act as the window where AWE maps physical memory. */
+
+ibool
+os_awe_enable_lock_pages_in_mem(void)
+/*=================================*/
+ /* out: TRUE if success, FALSE if error;
+ prints error info to stderr if no success */
+{
+#ifdef UNIV_SIMULATE_AWE
+
+ return(TRUE);
+
+#elif defined(__WIN2000__)
+ struct {
+ DWORD Count;
+ LUID_AND_ATTRIBUTES Privilege[1];
+ } Info;
+ HANDLE hProcess;
+ HANDLE Token;
+ BOOL Result;
+
+ hProcess = GetCurrentProcess();
+
+ /* Open the token of the current process */
+
+ Result = OpenProcessToken(hProcess,
+ TOKEN_ADJUST_PRIVILEGES,
+ &Token);
+ if (Result != TRUE) {
+ fprintf(stderr,
+ "InnoDB: AWE: Cannot open process token, error %lu\n",
+ (ulint)GetLastError());
+ return(FALSE);
+ }
+
+ Info.Count = 1;
+
+ Info.Privilege[0].Attributes = SE_PRIVILEGE_ENABLED;
+
+ /* Get the local unique identifier (LUID) of the SE_LOCK_MEMORY
+ privilege */
+
+ Result = LookupPrivilegeValue(NULL, SE_LOCK_MEMORY_NAME,
+ &(Info.Privilege[0].Luid));
+ if (Result != TRUE) {
+ fprintf(stderr,
+ "InnoDB: AWE: Cannot get local privilege value for %s, error %lu.\n",
+ SE_LOCK_MEMORY_NAME, (ulint)GetLastError());
+
+ return(FALSE);
+ }
+
+ /* Try to adjust the privilege */
+
+ Result = AdjustTokenPrivileges(Token, FALSE,
+ (PTOKEN_PRIVILEGES)&Info,
+ 0, NULL, NULL);
+ /* Check the result */
+
+ if (Result != TRUE) {
+ fprintf(stderr,
+ "InnoDB: AWE: Cannot adjust process token privileges, error %u.\n",
+ GetLastError());
+ return(FALSE);
+ } else if (GetLastError() != ERROR_SUCCESS) {
+ fprintf(stderr,
+"InnoDB: AWE: Cannot enable SE_LOCK_MEMORY privilege, error %lu.\n"
+"InnoDB: In Windows XP Home you cannot use AWE. In Windows 2000 and XP\n"
+"InnoDB: Professional you must go to the Control Panel, to\n"
+"InnoDB: Security Settings, to Local Policies, and enable\n"
+"InnoDB: the 'lock pages in memory' privilege for the user who runs\n"
+"InnoDB: the MySQL server.\n", GetLastError());
+
+ return(FALSE);
+ }
+
+ CloseHandle(Token);
+
+ return(TRUE);
+#else
#ifdef __WIN__
-#include <windows.h>
+ fprintf(stderr,
+"InnoDB: AWE: Error: to use AWE you must use a ...-nt MySQL executable.\n");
+#endif
+ return(FALSE);
#endif
+}
-#include "ut0mem.h"
+/********************************************************************
+Allocates physical RAM memory up to 64 GB in an Intel 32-bit x86
+processor. */
+
+ibool
+os_awe_allocate_physical_mem(
+/*=========================*/
+ /* out: TRUE if success */
+ os_awe_t** page_info, /* out, own: array of opaque data containing
+ the info for allocated physical memory pages;
+ each allocated 4 kB physical memory page has
+ one slot of type os_awe_t in the array */
+ ulint n_megabytes) /* in: number of megabytes to allocate */
+{
+#ifdef UNIV_SIMULATE_AWE
+ os_awe_simulate_page_info = ut_malloc(sizeof(os_awe_t) *
+ n_megabytes * ((1024 * 1024) / OS_AWE_X86_PAGE_SIZE));
+
+ os_awe_simulate_mem = ut_align(ut_malloc(
+ 4096 + 1024 * 1024 * n_megabytes),
+ 4096);
+ os_awe_simulate_mem_size = n_megabytes * 1024 * 1024;
+
+ *page_info = os_awe_simulate_page_info;
+
+ return(TRUE);
+
+#elif defined(__WIN2000__)
+ BOOL bResult;
+ os_awe_t NumberOfPages; /* Question: why does Windows
+ use the name ULONG_PTR for
+ a scalar integer type? Maybe
+ because we may also refer to
+ &NumberOfPages? */
+ os_awe_t NumberOfPagesInitial;
+ SYSTEM_INFO sSysInfo;
+ int PFNArraySize;
+
+ if (n_megabytes > 64 * 1024) {
+
+ fprintf(stderr,
+"InnoDB: AWE: Error: tried to allocate %lu MB.\n"
+"InnoDB: AWE cannot allocate more than 64 GB in any computer.\n", n_megabytes);
+
+ return(FALSE);
+ }
+
+ GetSystemInfo(&sSysInfo); /* fill the system information structure */
+
+ if ((ulint)OS_AWE_X86_PAGE_SIZE != (ulint)sSysInfo.dwPageSize) {
+ fprintf(stderr,
+"InnoDB: AWE: Error: this computer has a page size of %lu.\n"
+"InnoDB: Should be 4096 bytes for InnoDB AWE support to work.\n",
+ (ulint)sSysInfo.dwPageSize);
+
+ return(FALSE);
+ }
+
+ /* Calculate the number of pages of memory to request */
+
+ NumberOfPages = n_megabytes * ((1024 * 1024) / OS_AWE_X86_PAGE_SIZE);
+
+ /* Calculate the size of page_info for allocated physical pages */
+
+ PFNArraySize = NumberOfPages * sizeof(os_awe_t);
+
+ *page_info = (os_awe_t*)HeapAlloc(GetProcessHeap(), 0, PFNArraySize);
+
+ if (*page_info == NULL) {
+ fprintf(stderr,
+"InnoDB: AWE: Failed to allocate page info array from process heap, error %lu\n",
+ (ulint)GetLastError());
+
+ return(FALSE);
+ }
+
+ ut_total_allocated_memory += PFNArraySize;
+
+ /* Enable this process' privilege to lock pages to physical memory */
+
+ if (!os_awe_enable_lock_pages_in_mem()) {
+
+ return(FALSE);
+ }
+
+ /* Allocate the physical memory */
+
+ NumberOfPagesInitial = NumberOfPages;
+
+ os_awe_page_info = *page_info;
+ os_awe_n_pages = (ulint)NumberOfPages;
+
+ /* Compilation note: if the compiler complains the function is not
+ defined, see the note at the start of this file */
+
+ bResult = AllocateUserPhysicalPages(GetCurrentProcess(),
+ &NumberOfPages,
+ *page_info);
+ if (bResult != TRUE) {
+ fprintf(stderr,
+"InnoDB: AWE: Cannot allocate physical pages, error %lu.\n",
+ (ulint)GetLastError());
+
+ return(FALSE);
+ }
+
+ if (NumberOfPagesInitial != NumberOfPages) {
+ fprintf(stderr,
+"InnoDB: AWE: Error: allocated only %lu pages of %lu requested.\n"
+"InnoDB: Check that you have enough free RAM.\n"
+"InnoDB: In Windows XP Professional and 2000 Professional\n"
+"InnoDB: Windows PAE size is max 4 GB. In 2000 and .NET\n"
+"InnoDB: Advanced Servers and 2000 Datacenter Server it is 32 GB,\n"
+"InnoDB: and in .NET Datacenter Server it is 64 GB.\n"
+"InnoDB: A Microsoft web page said that the processor must be an Intel\n"
+"InnoDB: processor.\n",
+ (ulint)NumberOfPages,
+ (ulint)NumberOfPagesInitial);
+
+ return(FALSE);
+ }
+
+ fprintf(stderr,
+"InnoDB: Using Address Windowing Extensions (AWE); allocated %lu MB\n",
+ n_megabytes);
+
+ return(TRUE);
+#else
+ return(FALSE);
+#endif
+}
+
+/********************************************************************
+Allocates a window in the virtual address space where we can map then
+pages of physical memory. */
+
+byte*
+os_awe_allocate_virtual_mem_window(
+/*===============================*/
+ /* out, own: allocated memory, or NULL if did not
+ succeed */
+ ulint size) /* in: virtual memory allocation size in bytes, must
+ be < 2 GB */
+{
+#ifdef UNIV_SIMULATE_AWE
+ ulint i;
+
+ os_awe_simulate_window = ut_align(ut_malloc(4096 + size), 4096);
+ os_awe_simulate_window_size = size;
+
+ os_awe_simulate_map = ut_malloc(sizeof(byte*) * (size / 4096));
+
+ for (i = 0; i < (size / 4096); i++) {
+ *(os_awe_simulate_map + i) = NULL;
+ }
+
+ return(os_awe_simulate_window);
+
+#elif defined(__WIN2000__)
+ byte* ptr;
+
+ if (size > (ulint)0x7FFFFFFFUL) {
+ fprintf(stderr,
+"InnoDB: AWE: Cannot allocate %lu bytes of virtual memory\n", size);
+
+ return(NULL);
+ }
+
+ ptr = VirtualAlloc(NULL, (SIZE_T)size, MEM_RESERVE | MEM_PHYSICAL,
+ PAGE_READWRITE);
+ if (ptr == NULL) {
+ fprintf(stderr,
+"InnoDB: AWE: Cannot allocate %lu bytes of virtual memory, error %lu\n",
+ size, (ulint)GetLastError());
+
+ return(NULL);
+ }
+
+ os_awe_window = ptr;
+ os_awe_window_size = size;
+
+ ut_total_allocated_memory += size;
+
+ return(ptr);
+#else
+ return(NULL);
+#endif
+}
+
+/********************************************************************
+With this function you can map parts of physical memory allocated with
+the ..._allocate_physical_mem to the virtual address space allocated with
+the previous function. Intel implements this so that the process page
+tables are updated accordingly. A test on a 1.5 GHz AMD processor and XP
+showed that this takes < 1 microsecond, much better than the estimated 80 us
+for copying a 16 kB page memory to memory. But, the operation will at least
+partially invalidate the translation lookaside buffer (TLB) of all
+processors. Under a real-world load the performance hit may be bigger. */
+
+ibool
+os_awe_map_physical_mem_to_window(
+/*==============================*/
+ /* out: TRUE if success; the function
+ calls exit(1) in case of an error */
+ byte* ptr, /* in: a page-aligned pointer to
+ somewhere in the virtual address
+ space window; we map the physical mem
+ pages here */
+ ulint n_mem_pages, /* in: number of 4 kB mem pages to
+ map */
+ os_awe_t* page_info) /* in: array of page infos for those
+ pages; each page has one slot in the
+ array */
+{
+#ifdef UNIV_SIMULATE_AWE
+ ulint i;
+ byte** map;
+ byte* page;
+ byte* phys_page;
+
+ ut_a(ptr >= os_awe_simulate_window);
+ ut_a(ptr < os_awe_simulate_window + os_awe_simulate_window_size);
+ ut_a(page_info >= os_awe_simulate_page_info);
+ ut_a(page_info < os_awe_simulate_page_info +
+ (os_awe_simulate_mem_size / 4096));
+
+ /* First look if some other 'physical pages' are mapped at ptr,
+ and copy them back to where they were if yes */
+
+ map = os_awe_simulate_map
+ + ((ulint)(ptr - os_awe_simulate_window)) / 4096;
+ page = ptr;
+
+ for (i = 0; i < n_mem_pages; i++) {
+ if (*map != NULL) {
+ ut_memcpy(*map, page, 4096);
+ }
+ map++;
+ page += 4096;
+ }
+
+ /* Then copy to ptr the 'physical pages' determined by page_info; we
+ assume page_info is a segment of the array we created at the start */
+
+ phys_page = os_awe_simulate_mem
+ + (ulint)(page_info - os_awe_simulate_page_info)
+ * 4096;
+
+ ut_memcpy(ptr, phys_page, n_mem_pages * 4096);
+
+ /* Update the map */
+
+ map = os_awe_simulate_map
+ + ((ulint)(ptr - os_awe_simulate_window)) / 4096;
+
+ for (i = 0; i < n_mem_pages; i++) {
+ *map = phys_page;
+
+ map++;
+ phys_page += 4096;
+ }
+
+ return(TRUE);
+
+#elif defined(__WIN2000__)
+ BOOL bResult;
+ os_awe_t n_pages;
+
+ n_pages = (os_awe_t)n_mem_pages;
+
+ if (!(ptr >= os_awe_window)) {
+ fprintf(stderr,
+"InnoDB: AWE: Error: trying to map to address %lx but AWE window start %lx\n",
+ (ulint)ptr, (ulint)os_awe_window);
+ ut_a(0);
+ }
+
+ if (!(ptr <= os_awe_window + os_awe_window_size - UNIV_PAGE_SIZE)) {
+ fprintf(stderr,
+"InnoDB: AWE: Error: trying to map to address %lx but AWE window end %lx\n",
+ (ulint)ptr, (ulint)os_awe_window + os_awe_window_size);
+ ut_a(0);
+ }
+
+ if (!(page_info >= os_awe_page_info)) {
+ fprintf(stderr,
+"InnoDB: AWE: Error: trying to map page info at %lx but array start %lx\n",
+ (ulint)page_info, (ulint)os_awe_page_info);
+ ut_a(0);
+ }
+
+ if (!(page_info <= os_awe_page_info + (os_awe_n_pages - 4))) {
+ fprintf(stderr,
+"InnoDB: AWE: Error: trying to map page info at %lx but array end %lx\n",
+ (ulint)page_info, (ulint)(os_awe_page_info + os_awe_n_pages));
+ ut_a(0);
+ }
+
+ bResult = MapUserPhysicalPages((PVOID)ptr, n_pages, page_info);
+
+ if (bResult != TRUE) {
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+" InnoDB: AWE: Mapping of %lu physical pages to address %lx failed,\n"
+"InnoDB: error %lu.\n"
+"InnoDB: Cannot continue operation.\n",
+ n_mem_pages, (ulint)ptr, (ulint)GetLastError());
+ exit(1);
+ }
+
+ return(TRUE);
+#else
+ return(FALSE);
+#endif
+}
/********************************************************************
Converts the current process id to a number. It is not guaranteed that the
diff --git a/innobase/os/os0sync.c b/innobase/os/os0sync.c
index 827d68501db..4ad9473fe66 100644
--- a/innobase/os/os0sync.c
+++ b/innobase/os/os0sync.c
@@ -109,9 +109,9 @@ must be reset explicitly by calling sync_os_reset_event. */
os_event_t
os_event_create(
/*============*/
- /* out: the event handle */
- char* name) /* in: the name of the event, if NULL
- the event is created without a name */
+ /* out: the event handle */
+ const char* name) /* in: the name of the event, if NULL
+ the event is created without a name */
{
#ifdef __WIN__
os_event_t event;
@@ -125,7 +125,7 @@ os_event_create(
if (!event->handle) {
fprintf(stderr,
"InnoDB: Could not create a Windows event semaphore; Windows error %lu\n",
- (ulint)GetLastError());
+ (ulong) GetLastError());
}
#else /* Unix */
os_event_t event;
@@ -166,9 +166,9 @@ reset when a single thread is released. Works only in Windows. */
os_event_t
os_event_create_auto(
/*=================*/
- /* out: the event handle */
- char* name) /* in: the name of the event, if NULL
- the event is created without a name */
+ /* out: the event handle */
+ const char* name) /* in: the name of the event, if NULL
+ the event is created without a name */
{
os_event_t event;
@@ -182,7 +182,7 @@ os_event_create_auto(
if (!event->handle) {
fprintf(stderr,
"InnoDB: Could not create a Windows auto event semaphore; Windows error %lu\n",
- (ulint)GetLastError());
+ (ulong) GetLastError());
}
/* Put to the list of events */
@@ -361,7 +361,7 @@ os_event_wait_time(
ut_a(event);
if (time != OS_SYNC_INFINITE_TIME) {
- err = WaitForSingleObject(event->handle, time / 1000);
+ err = WaitForSingleObject(event->handle, (DWORD) time / 1000);
} else {
err = WaitForSingleObject(event->handle, INFINITE);
}
@@ -408,11 +408,11 @@ os_event_wait_multiple(
ut_a(native_event_array);
ut_a(n > 0);
- index = WaitForMultipleObjects(n, native_event_array,
+ index = WaitForMultipleObjects((DWORD) n, native_event_array,
FALSE, /* Wait for any 1 event */
INFINITE); /* Infinite wait time
limit */
- ut_a(index >= WAIT_OBJECT_0);
+ ut_a(index >= WAIT_OBJECT_0); /* NOTE: Pointless comparision */
ut_a(index < WAIT_OBJECT_0 + n);
if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
@@ -430,9 +430,9 @@ mutex semaphore of InnoDB itself (mutex_t) should be used where possible. */
os_mutex_t
os_mutex_create(
/*============*/
- /* out: the mutex handle */
- char* name) /* in: the name of the mutex, if NULL
- the mutex is created without a name */
+ /* out: the mutex handle */
+ const char* name) /* in: the name of the mutex, if NULL
+ the mutex is created without a name */
{
#ifdef __WIN__
HANDLE mutex;
@@ -631,7 +631,21 @@ os_fast_mutex_free(
DeleteCriticalSection((LPCRITICAL_SECTION) fast_mutex);
#else
- ut_a(0 == pthread_mutex_destroy(fast_mutex));
+ int ret;
+
+ ret = pthread_mutex_destroy(fast_mutex);
+
+ if (ret != 0) {
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+" InnoDB: error: return value %lu when calling\n"
+"InnoDB: pthread_mutex_destroy().\n", (ulint)ret);
+ fprintf(stderr,
+"InnoDB: Byte contents of the pthread mutex at %p:\n", fast_mutex);
+ ut_print_buf(stderr, (const byte*)fast_mutex,
+ sizeof(os_fast_mutex_t));
+ fprintf(stderr, "\n");
+ }
#endif
if (os_sync_mutex_inited) {
/* When freeing the last mutexes, we have
diff --git a/innobase/os/os0thread.c b/innobase/os/os0thread.c
index cb72310f23d..e1a1119cfd4 100644
--- a/innobase/os/os0thread.c
+++ b/innobase/os/os0thread.c
@@ -100,7 +100,7 @@ os_thread_create(
{
#ifdef __WIN__
os_thread_t thread;
- ulint win_thread_id;
+ DWORD win_thread_id;
os_mutex_enter(os_sync_mutex);
os_thread_count++;
@@ -210,6 +210,15 @@ os_thread_exit(
#endif
}
+#ifdef HAVE_PTHREAD_JOIN
+int
+os_thread_join(
+/*=============*/
+ os_thread_id_t thread_id) /* in: id of the thread to join */
+{
+ return pthread_join(thread_id, NULL);
+}
+#endif
/*********************************************************************
Returns handle to the current thread. */
@@ -253,7 +262,7 @@ os_thread_sleep(
ulint tm) /* in: time in microseconds */
{
#ifdef __WIN__
- Sleep(tm / 1000);
+ Sleep((DWORD) tm / 1000);
#elif defined(__NETWARE__)
delay(tm / 1000);
#else