diff options
Diffstat (limited to 'innobase/os')
-rw-r--r-- | innobase/os/makefilewin | 17 | ||||
-rw-r--r-- | innobase/os/os0file.c | 1369 | ||||
-rw-r--r-- | innobase/os/os0proc.c | 462 | ||||
-rw-r--r-- | innobase/os/os0sync.c | 44 | ||||
-rw-r--r-- | innobase/os/os0thread.c | 13 |
5 files changed, 1716 insertions, 189 deletions
diff --git a/innobase/os/makefilewin b/innobase/os/makefilewin deleted file mode 100644 index 8bc8d08611b..00000000000 --- a/innobase/os/makefilewin +++ /dev/null @@ -1,17 +0,0 @@ -include ..\include\makefile.i - -os.lib: os0sync.obj os0thread.obj os0proc.obj os0file.obj - lib -out:..\libs\os.lib os0sync.obj os0thread.obj os0proc.obj os0file.obj - -os0sync.obj: os0sync.c - $(CCOM) $(CFLW) -c os0sync.c - -os0thread.obj: os0thread.c - $(CCOM) $(CFLW) -c os0thread.c - -os0proc.obj: os0proc.c - $(CCOM) $(CFLW) -c os0proc.c - -os0file.obj: os0file.c - $(CCOM) $(CFLW) -c os0file.c - diff --git a/innobase/os/os0file.c b/innobase/os/os0file.c index cadf1c0385f..49f88c0d62a 100644 --- a/innobase/os/os0file.c +++ b/innobase/os/os0file.c @@ -11,6 +11,7 @@ Created 10/21/1995 Heikki Tuuri #include "os0thread.h" #include "ut0mem.h" #include "srv0srv.h" +#include "srv0start.h" #include "fil0fil.h" #include "buf0buf.h" @@ -32,9 +33,13 @@ ulint os_innodb_umask = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP; ulint os_innodb_umask = 0; #endif +#ifdef UNIV_DO_FLUSH /* If the following is set to TRUE, we do not call os_file_flush in every -os_file_write. We can set this TRUE if the doublewrite buffer is used. */ +os_file_write. We can set this TRUE when the doublewrite buffer is used. */ ibool os_do_not_call_flush_at_each_write = FALSE; +#else +/* We do not call os_file_flush in every os_file_write. */ +#endif /* UNIV_DO_FLUSH */ /* We use these mutexes to protect lseek + file i/o operation, if the OS does not provide an atomic pread or pwrite, or similar */ @@ -69,7 +74,7 @@ struct os_aio_slot_struct{ bytes */ ulint offset_high; /* 32 high bits of file offset */ os_file_t file; /* file where to read or write */ - char* name; /* file name or path */ + const char* name; /* file name or path */ ibool io_already_done;/* used only in simulated aio: TRUE if the physical i/o already made and only the slot message @@ -154,7 +159,6 @@ os_mutex_t os_file_count_mutex; ulint os_file_n_pending_preads = 0; ulint os_file_n_pending_pwrites = 0; - /*************************************************************************** Gets the operating system version. Currently works only on Windows. */ @@ -198,9 +202,12 @@ overwrite the error number). If the number is not known to this program, the OS error number + 100 is returned. */ ulint -os_file_get_last_error(void) -/*========================*/ - /* out: error number, or OS error number + 100 */ +os_file_get_last_error( +/*===================*/ + /* out: error number, or OS error + number + 100 */ + ibool report_all_errors) /* in: TRUE if we want an error message + printed of all errors */ { ulint err; @@ -208,26 +215,29 @@ os_file_get_last_error(void) err = (ulint) GetLastError(); - if (err != ERROR_DISK_FULL && err != ERROR_FILE_EXISTS) { + if (report_all_errors + || (err != ERROR_DISK_FULL && err != ERROR_FILE_EXISTS)) { + ut_print_timestamp(stderr); fprintf(stderr, - " InnoDB: Operating system error number %lu in a file operation.\n" - "InnoDB: See http://dev.mysql.com/doc/mysql/en/InnoDB.html\n" - "InnoDB: for installation help.\n", - err); + " InnoDB: Operating system error number %lu in a file operation.\n", (ulong) err); if (err == ERROR_PATH_NOT_FOUND) { - fprintf(stderr, - "InnoDB: The error means the system cannot find the path specified.\n" - "InnoDB: In installation you must create directories yourself, InnoDB\n" - "InnoDB: does not create them.\n"); + fprintf(stderr, + "InnoDB: The error means the system cannot find the path specified.\n"); + + if (srv_is_being_started) { + fprintf(stderr, + "InnoDB: If you are installing InnoDB, remember that you must create\n" + "InnoDB: directories yourself, InnoDB does not create them.\n"); + } } else if (err == ERROR_ACCESS_DENIED) { - fprintf(stderr, + fprintf(stderr, "InnoDB: The error means mysqld does not have the access rights to\n" "InnoDB: the directory. It may also be you have created a subdirectory\n" "InnoDB: of the same name as a data file.\n"); } else { - fprintf(stderr, + fprintf(stderr, "InnoDB: Some operating system error numbers are described at\n" "InnoDB: " "http://dev.mysql.com/doc/mysql/en/Operating_System_error_codes.html\n"); @@ -248,31 +258,33 @@ os_file_get_last_error(void) #else err = (ulint) errno; - if (err != ENOSPC && err != EEXIST) { - ut_print_timestamp(stderr); + if (report_all_errors + || (err != ENOSPC && err != EEXIST)) { + ut_print_timestamp(stderr); fprintf(stderr, - " InnoDB: Operating system error number %lu in a file operation.\n" - "InnoDB: See http://dev.mysql.com/doc/mysql/en/InnoDB.html\n" - "InnoDB: for installation help.\n", - err); + " InnoDB: Operating system error number %lu in a file operation.\n", (ulong) err); if (err == ENOENT) { - fprintf(stderr, - "InnoDB: The error means the system cannot find the path specified.\n" - "InnoDB: In installation you must create directories yourself, InnoDB\n" - "InnoDB: does not create them.\n"); + fprintf(stderr, + "InnoDB: The error means the system cannot find the path specified.\n"); + + if (srv_is_being_started) { + fprintf(stderr, + "InnoDB: If you are installing InnoDB, remember that you must create\n" + "InnoDB: directories yourself, InnoDB does not create them.\n"); + } } else if (err == EACCES) { - fprintf(stderr, + fprintf(stderr, "InnoDB: The error means mysqld does not have the access rights to\n" "InnoDB: the directory.\n"); } else { - if (strerror((int)err) != NULL) { + if (strerror((int)err) != NULL) { fprintf(stderr, "InnoDB: Error number %lu means '%s'.\n", err, strerror((int)err)); - } + } - fprintf(stderr, + fprintf(stderr, "InnoDB: Some operating system error numbers are described at\n" "InnoDB: " "http://dev.mysql.com/doc/mysql/en/Operating_System_error_codes.html\n"); @@ -310,7 +322,7 @@ os_file_handle_error( { ulint err; - err = os_file_get_last_error(); + err = os_file_get_last_error(FALSE); if (err == OS_FILE_DISK_FULL) { /* We only print a warning about disk full once */ @@ -337,6 +349,7 @@ os_file_handle_error( return(FALSE); } else if (err == OS_FILE_AIO_RESOURCES_RESERVED) { + return(TRUE); } else if (err == OS_FILE_ALREADY_EXISTS) { @@ -359,6 +372,106 @@ os_file_handle_error( return(FALSE); } +#undef USE_FILE_LOCK +#define USE_FILE_LOCK +#if defined(UNIV_HOTBACKUP) || defined(__WIN__) || defined(__FreeBSD__) || defined(__NETWARE__) +/* InnoDB Hot Backup does not lock the data files. + * On Windows, mandatory locking is used. + * On FreeBSD with LinuxThreads, advisory locking does not work properly. + */ +# undef USE_FILE_LOCK +#endif +#ifdef USE_FILE_LOCK +/******************************************************************** +Obtain an exclusive lock on a file. */ +static +int +os_file_lock( +/*=========*/ + /* out: 0 on success */ + int fd, /* in: file descriptor */ + const char* name) /* in: file name */ +{ + struct flock lk; + lk.l_type = F_WRLCK; + lk.l_whence = SEEK_SET; + lk.l_start = lk.l_len = 0; + if (fcntl(fd, F_SETLK, &lk) == -1) { + fprintf(stderr, + "InnoDB: Unable to lock %s, error: %d\n", name, errno); + + if (errno == EAGAIN || errno == EACCES) { + fprintf(stderr, +"InnoDB: Check that you do not already have another mysqld process\n" +"InnoDB: using the same InnoDB data or log files.\n"); + } + + return(-1); + } + + return(0); +} +#endif /* USE_FILE_LOCK */ + +/******************************************************************** +Does error handling when a file operation fails. */ +static +ibool +os_file_handle_error_no_exit( +/*=========================*/ + /* out: TRUE if we should retry the + operation */ + const char* name, /* in: name of a file or NULL */ + const char* operation)/* in: operation */ +{ + ulint err; + + err = os_file_get_last_error(FALSE); + + if (err == OS_FILE_DISK_FULL) { + /* We only print a warning about disk full once */ + + if (os_has_said_disk_full) { + + return(FALSE); + } + + if (name) { + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: Encountered a problem with file %s\n", name); + } + + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: Disk is full. Try to clean the disk to free space.\n"); + + os_has_said_disk_full = TRUE; + + fflush(stderr); + + return(FALSE); + + } else if (err == OS_FILE_AIO_RESOURCES_RESERVED) { + + return(TRUE); + + } else if (err == OS_FILE_ALREADY_EXISTS) { + + return(FALSE); + } else { + if (name) { + fprintf(stderr, "InnoDB: File name %s\n", name); + } + + fprintf(stderr, "InnoDB: File operation call: '%s'.\n", + operation); + return (FALSE); + } + + return(FALSE); /* not reached */ +} + /******************************************************************** Creates the seek mutexes used in positioned reads and writes. */ @@ -450,21 +563,313 @@ os_file_create_tmpfile(void) return(file); } +/*************************************************************************** +The os_file_opendir() function opens a directory stream corresponding to the +directory named by the dirname argument. The directory stream is positioned +at the first entry. In both Unix and Windows we automatically skip the '.' +and '..' items at the start of the directory listing. */ + +os_file_dir_t +os_file_opendir( +/*============*/ + /* out: directory stream, NULL if + error */ + const char* dirname, /* in: directory name; it must not + contain a trailing '\' or '/' */ + ibool error_is_fatal) /* in: TRUE if we should treat an + error as a fatal error; if we try to + open symlinks then we do not wish a + fatal error if it happens not to be + a directory */ +{ + os_file_dir_t dir; +#ifdef __WIN__ + LPWIN32_FIND_DATA lpFindFileData; + char path[OS_FILE_MAX_PATH + 3]; + + ut_a(strlen(dirname) < OS_FILE_MAX_PATH); + + strcpy(path, dirname); + strcpy(path + strlen(path), "\\*"); + + /* Note that in Windows opening the 'directory stream' also retrieves + the first entry in the directory. Since it is '.', that is no problem, + as we will skip over the '.' and '..' entries anyway. */ + + lpFindFileData = ut_malloc(sizeof(WIN32_FIND_DATA)); + + dir = FindFirstFile(path, lpFindFileData); + + ut_free(lpFindFileData); + + if (dir == INVALID_HANDLE_VALUE) { + + if (error_is_fatal) { + os_file_handle_error(dirname, "opendir"); + } + + return(NULL); + } + + return(dir); +#else + dir = opendir(dirname); + + if (dir == NULL && error_is_fatal) { + os_file_handle_error(dirname, "opendir"); + } + + return(dir); +#endif +} + +/*************************************************************************** +Closes a directory stream. */ + +int +os_file_closedir( +/*=============*/ + /* out: 0 if success, -1 if failure */ + os_file_dir_t dir) /* in: directory stream */ +{ +#ifdef __WIN__ + BOOL ret; + + ret = FindClose(dir); + + if (!ret) { + os_file_handle_error_no_exit(NULL, "closedir"); + + return(-1); + } + + return(0); +#else + int ret; + + ret = closedir(dir); + + if (ret) { + os_file_handle_error_no_exit(NULL, "closedir"); + } + + return(ret); +#endif +} + +/*************************************************************************** +This function returns information of the next file in the directory. We jump +over the '.' and '..' entries in the directory. */ + +int +os_file_readdir_next_file( +/*======================*/ + /* out: 0 if ok, -1 if error, 1 if at the end + of the directory */ + const char* dirname,/* in: directory name or path */ + os_file_dir_t dir, /* in: directory stream */ + os_file_stat_t* info) /* in/out: buffer where the info is returned */ +{ +#ifdef __WIN__ + LPWIN32_FIND_DATA lpFindFileData; + BOOL ret; + + lpFindFileData = ut_malloc(sizeof(WIN32_FIND_DATA)); +next_file: + ret = FindNextFile(dir, lpFindFileData); + + if (ret) { + ut_a(strlen(lpFindFileData->cFileName) < OS_FILE_MAX_PATH); + + if (strcmp(lpFindFileData->cFileName, ".") == 0 + || strcmp(lpFindFileData->cFileName, "..") == 0) { + + goto next_file; + } + + strcpy(info->name, lpFindFileData->cFileName); + + info->size = (ib_longlong)(lpFindFileData->nFileSizeLow) + + (((ib_longlong)(lpFindFileData->nFileSizeHigh)) << 32); + + if (lpFindFileData->dwFileAttributes + & FILE_ATTRIBUTE_REPARSE_POINT) { +/* TODO: test Windows symlinks */ +/* TODO: MySQL has apparently its own symlink implementation in Windows, +dbname.sym can redirect a database directory: +http://www.mysql.com/doc/en/Windows_symbolic_links.html */ + info->type = OS_FILE_TYPE_LINK; + } else if (lpFindFileData->dwFileAttributes + & FILE_ATTRIBUTE_DIRECTORY) { + info->type = OS_FILE_TYPE_DIR; + } else { + /* It is probably safest to assume that all other + file types are normal. Better to check them rather + than blindly skip them. */ + + info->type = OS_FILE_TYPE_FILE; + } + } + + ut_free(lpFindFileData); + + if (ret) { + return(0); + } else if (GetLastError() == ERROR_NO_MORE_FILES) { + + return(1); + } else { + os_file_handle_error_no_exit(dirname, + "readdir_next_file"); + return(-1); + } +#else + struct dirent* ent; + char* full_path; + int ret; + struct stat statinfo; +#ifdef HAVE_READDIR_R + char dirent_buf[sizeof(struct dirent) + _POSIX_PATH_MAX + + 100]; + /* In /mysys/my_lib.c, _POSIX_PATH_MAX + 1 is used as + the max file name len; but in most standards, the + length is NAME_MAX; we add 100 to be even safer */ +#endif + +next_file: + +#ifdef HAVE_READDIR_R + ret = readdir_r(dir, (struct dirent*)dirent_buf, &ent); + + if (ret != 0) { + fprintf(stderr, +"InnoDB: cannot read directory %s, error %lu\n", dirname, (ulong)ret); + + return(-1); + } + + if (ent == NULL) { + /* End of directory */ + + return(1); + } + + ut_a(strlen(ent->d_name) < _POSIX_PATH_MAX + 100 - 1); +#else + ent = readdir(dir); + + if (ent == NULL) { + + return(1); + } +#endif + ut_a(strlen(ent->d_name) < OS_FILE_MAX_PATH); + + if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0) { + + goto next_file; + } + + strcpy(info->name, ent->d_name); + + full_path = ut_malloc(strlen(dirname) + strlen(ent->d_name) + 10); + + sprintf(full_path, "%s/%s", dirname, ent->d_name); + + ret = stat(full_path, &statinfo); + + if (ret) { + os_file_handle_error_no_exit(full_path, "stat"); + + ut_free(full_path); + + return(-1); + } + + info->size = (ib_longlong)statinfo.st_size; + + if (S_ISDIR(statinfo.st_mode)) { + info->type = OS_FILE_TYPE_DIR; + } else if (S_ISLNK(statinfo.st_mode)) { + info->type = OS_FILE_TYPE_LINK; + } else if (S_ISREG(statinfo.st_mode)) { + info->type = OS_FILE_TYPE_FILE; + } else { + info->type = OS_FILE_TYPE_UNKNOWN; + } + + ut_free(full_path); + + return(0); +#endif +} + +/********************************************************************* +This function attempts to create a directory named pathname. The new directory +gets default permissions. On Unix the permissions are (0770 & ~umask). If the +directory exists already, nothing is done and the call succeeds, unless the +fail_if_exists arguments is true. */ + +ibool +os_file_create_directory( +/*=====================*/ + /* out: TRUE if call succeeds, + FALSE on error */ + const char* pathname, /* in: directory name as + null-terminated string */ + ibool fail_if_exists) /* in: if TRUE, pre-existing directory + is treated as an error. */ +{ +#ifdef __WIN__ + BOOL rcode; + + rcode = CreateDirectory(pathname, NULL); + if (!(rcode != 0 || + (GetLastError() == ERROR_ALREADY_EXISTS && !fail_if_exists))) { + /* failure */ + os_file_handle_error(pathname, "CreateDirectory"); + + return(FALSE); + } + + return (TRUE); +#else + int rcode; + + rcode = mkdir(pathname, 0770); + + if (!(rcode == 0 || (errno == EEXIST && !fail_if_exists))) { + /* failure */ + os_file_handle_error(pathname, "mkdir"); + + return(FALSE); + } + + return (TRUE); +#endif +} + /******************************************************************** A simple function to open or create a file. */ os_file_t os_file_create_simple( /*==================*/ - /* out, own: handle to the file, not defined if error, - error number can be retrieved with os_get_last_error */ - char* name, /* in: name of the file or path as a null-terminated - string */ - ulint create_mode,/* in: OS_FILE_OPEN if an existing file is opened - (if does not exist, error), or OS_FILE_CREATE if a new - file is created (if exists, error) */ - ulint access_type,/* in: OS_FILE_READ_ONLY or OS_FILE_READ_WRITE */ - ibool* success)/* out: TRUE if succeed, FALSE if error */ + /* out, own: handle to the file, not defined + if error, error number can be retrieved with + os_file_get_last_error */ + const char* name, /* in: name of the file or path as a + null-terminated string */ + ulint create_mode,/* in: OS_FILE_OPEN if an existing file is + opened (if does not exist, error), or + OS_FILE_CREATE if a new file is created + (if exists, error), or + OS_FILE_CREATE_PATH if new file + (if exists, error) and subdirectories along + its path are created (if needed)*/ + ulint access_type,/* in: OS_FILE_READ_ONLY or + OS_FILE_READ_WRITE */ + ibool* success)/* out: TRUE if succeed, FALSE if error */ { #ifdef __WIN__ os_file_t file; @@ -480,6 +885,14 @@ try_again: create_flag = OPEN_EXISTING; } else if (create_mode == OS_FILE_CREATE) { create_flag = CREATE_NEW; + } else if (create_mode == OS_FILE_CREATE_PATH) { + /* create subdirs along the path if needed */ + *success = os_file_create_subdirs_if_needed(name); + if (!*success) { + ut_error; + } + create_flag = CREATE_NEW; + create_mode = OS_FILE_CREATE; } else { create_flag = 0; ut_error; @@ -496,8 +909,9 @@ try_again: file = CreateFile(name, access, - FILE_SHARE_READ,/* file can be read also by other - processes */ + FILE_SHARE_READ | FILE_SHARE_WRITE, + /* file can be read ansd written also + by other processes */ NULL, /* default security attributes */ create_flag, attributes, @@ -533,6 +947,14 @@ try_again: } } else if (create_mode == OS_FILE_CREATE) { create_flag = O_RDWR | O_CREAT | O_EXCL; + } else if (create_mode == OS_FILE_CREATE_PATH) { + /* create subdirs along the path if needed */ + *success = os_file_create_subdirs_if_needed(name); + if (!*success) { + return (-1); + } + create_flag = O_RDWR | O_CREAT | O_EXCL; + create_mode = OS_FILE_CREATE; } else { create_flag = 0; ut_error; @@ -554,6 +976,13 @@ try_again: if (retry) { goto try_again; } +#ifdef USE_FILE_LOCK + } else if (access_type == OS_FILE_READ_WRITE + && os_file_lock(file, name)) { + *success = FALSE; + close(file); + file = -1; +#endif } else { *success = TRUE; } @@ -568,21 +997,27 @@ A simple function to open or create a file. */ os_file_t os_file_create_simple_no_error_handling( /*====================================*/ - /* out, own: handle to the file, not defined if error, - error number can be retrieved with os_get_last_error */ - char* name, /* in: name of the file or path as a null-terminated - string */ - ulint create_mode,/* in: OS_FILE_OPEN if an existing file is opened - (if does not exist, error), or OS_FILE_CREATE if a new - file is created (if exists, error) */ - ulint access_type,/* in: OS_FILE_READ_ONLY or OS_FILE_READ_WRITE */ - ibool* success)/* out: TRUE if succeed, FALSE if error */ + /* out, own: handle to the file, not defined + if error, error number can be retrieved with + os_file_get_last_error */ + const char* name, /* in: name of the file or path as a + null-terminated string */ + ulint create_mode,/* in: OS_FILE_OPEN if an existing file + is opened (if does not exist, error), or + OS_FILE_CREATE if a new file is created + (if exists, error) */ + ulint access_type,/* in: OS_FILE_READ_ONLY, + OS_FILE_READ_WRITE, or + OS_FILE_READ_ALLOW_DELETE; the last option is + used by a backup program reading the file */ + ibool* success)/* out: TRUE if succeed, FALSE if error */ { #ifdef __WIN__ os_file_t file; DWORD create_flag; DWORD access; DWORD attributes = 0; + DWORD share_mode = FILE_SHARE_READ | FILE_SHARE_WRITE; ut_a(name); @@ -599,6 +1034,13 @@ os_file_create_simple_no_error_handling( access = GENERIC_READ; } else if (access_type == OS_FILE_READ_WRITE) { access = GENERIC_READ | GENERIC_WRITE; + } else if (access_type == OS_FILE_READ_ALLOW_DELETE) { + access = GENERIC_READ; + share_mode = FILE_SHARE_DELETE | FILE_SHARE_READ + | FILE_SHARE_WRITE; /* A backup program has to give + mysqld the maximum freedom to + do what it likes with the + file */ } else { access = 0; ut_error; @@ -606,8 +1048,7 @@ os_file_create_simple_no_error_handling( file = CreateFile(name, access, - FILE_SHARE_READ,/* file can be read also by other - processes */ + share_mode, NULL, /* default security attributes */ create_flag, attributes, @@ -648,6 +1089,13 @@ os_file_create_simple_no_error_handling( if (file == -1) { *success = FALSE; +#ifdef USE_FILE_LOCK + } else if (access_type == OS_FILE_READ_WRITE + && os_file_lock(file, name)) { + *success = FALSE; + close(file); + file = -1; +#endif } else { *success = TRUE; } @@ -662,33 +1110,43 @@ Opens an existing file or creates a new. */ os_file_t os_file_create( /*===========*/ - /* out, own: handle to the file, not defined if error, - error number can be retrieved with os_get_last_error */ - char* name, /* in: name of the file or path as a null-terminated - string */ - ulint create_mode, /* in: OS_FILE_OPEN if an existing file is opened - (if does not exist, error), or OS_FILE_CREATE if a new - file is created (if exists, error), OS_FILE_OVERWRITE - if a new is created or an old overwritten */ - ulint purpose,/* in: OS_FILE_AIO, if asynchronous, non-buffered i/o - is desired, OS_FILE_NORMAL, if any normal file; - NOTE that it also depends on type, os_aio_.. and srv_.. - variables whether we really use async i/o or - unbuffered i/o: look in the function source code for - the exact rules */ - ulint type, /* in: OS_DATA_FILE or OS_LOG_FILE */ - ibool* success)/* out: TRUE if succeed, FALSE if error */ + /* out, own: handle to the file, not defined + if error, error number can be retrieved with + os_file_get_last_error */ + const char* name, /* in: name of the file or path as a + null-terminated string */ + ulint create_mode,/* in: OS_FILE_OPEN if an existing file + is opened (if does not exist, error), or + OS_FILE_CREATE if a new file is created + (if exists, error), + OS_FILE_OVERWRITE if a new file is created + or an old overwritten; + OS_FILE_OPEN_RAW, if a raw device or disk + partition should be opened */ + ulint purpose,/* in: OS_FILE_AIO, if asynchronous, + non-buffered i/o is desired, + OS_FILE_NORMAL, if any normal file; + NOTE that it also depends on type, os_aio_.. + and srv_.. variables whether we really use + async i/o or unbuffered i/o: look in the + function source code for the exact rules */ + ulint type, /* in: OS_DATA_FILE or OS_LOG_FILE */ + ibool* success)/* out: TRUE if succeed, FALSE if error */ { #ifdef __WIN__ os_file_t file; + DWORD share_mode = FILE_SHARE_READ; DWORD create_flag; DWORD attributes; ibool retry; - try_again: ut_a(name); - if (create_mode == OS_FILE_OPEN) { + if (create_mode == OS_FILE_OPEN_RAW) { + create_flag = OPEN_EXISTING; + share_mode = FILE_SHARE_WRITE; + } else if (create_mode == OS_FILE_OPEN + || create_mode == OS_FILE_OPEN_RETRY) { create_flag = OPEN_EXISTING; } else if (create_mode == OS_FILE_CREATE) { create_flag = CREATE_NEW; @@ -738,14 +1196,17 @@ try_again: file = CreateFile(name, GENERIC_READ | GENERIC_WRITE, /* read and write access */ - FILE_SHARE_READ,/* File can be read also by other + share_mode, /* File can be read also by other processes; we must give the read permission because of ibbackup. We do not give the write permission to others because if one would succeed to start 2 instances of mysqld on the SAME files, that could cause severe - database corruption! */ + database corruption! When opening + raw disk partitions, Microsoft manuals + say that we must give also the write + permission. */ NULL, /* default security attributes */ create_flag, attributes, @@ -755,8 +1216,8 @@ try_again: *success = FALSE; retry = os_file_handle_error(name, - create_mode == OS_FILE_OPEN ? - "open" : "create"); + create_mode == OS_FILE_CREATE ? + "create" : "open"); if (retry) { goto try_again; } @@ -776,17 +1237,15 @@ try_again: try_again: ut_a(name); - if (create_mode == OS_FILE_OPEN) { + if (create_mode == OS_FILE_OPEN || create_mode == OS_FILE_OPEN_RAW + || create_mode == OS_FILE_OPEN_RETRY) { mode_str = "OPEN"; - create_flag = O_RDWR; } else if (create_mode == OS_FILE_CREATE) { mode_str = "CREATE"; - create_flag = O_RDWR | O_CREAT | O_EXCL; } else if (create_mode == OS_FILE_OVERWRITE) { mode_str = "OVERWRITE"; - create_flag = O_RDWR | O_CREAT | O_TRUNC; } else { create_flag = 0; @@ -843,11 +1302,34 @@ try_again: *success = FALSE; retry = os_file_handle_error(name, - create_mode == OS_FILE_OPEN ? - "open" : "create"); + create_mode == OS_FILE_CREATE ? + "create" : "open"); if (retry) { goto try_again; } +#ifdef USE_FILE_LOCK + } else if (create_mode != OS_FILE_OPEN_RAW + && os_file_lock(file, name)) { + *success = FALSE; + if (create_mode == OS_FILE_OPEN_RETRY) { + int i; + ut_print_timestamp(stderr); + fputs(" InnoDB: Retrying to lock the first data file\n", + stderr); + for (i = 0; i < 100; i++) { + os_thread_sleep(1000000); + if (!os_file_lock(file, name)) { + *success = TRUE; + return(file); + } + } + ut_print_timestamp(stderr); + fputs(" InnoDB: Unable to open the first data file\n", + stderr); + } + close(file); + file = -1; +#endif } else { *success = TRUE; } @@ -857,6 +1339,168 @@ try_again: } /*************************************************************************** +Deletes a file if it exists. The file has to be closed before calling this. */ + +ibool +os_file_delete_if_exists( +/*=====================*/ + /* out: TRUE if success */ + const char* name) /* in: file path as a null-terminated string */ +{ +#ifdef __WIN__ + BOOL ret; + ulint count = 0; +loop: + /* In Windows, deleting an .ibd file may fail if ibbackup is copying + it */ + + ret = DeleteFile((LPCTSTR)name); + + if (ret) { + return(TRUE); + } + + if (GetLastError() == ERROR_FILE_NOT_FOUND) { + /* the file does not exist, this not an error */ + + return(TRUE); + } + + count++; + + if (count > 100 && 0 == (count % 10)) { + fprintf(stderr, +"InnoDB: Warning: cannot delete file %s\n" +"InnoDB: Are you running ibbackup to back up the file?\n", name); + + os_file_get_last_error(TRUE); /* print error information */ + } + + os_thread_sleep(1000000); /* sleep for a second */ + + if (count > 2000) { + + return(FALSE); + } + + goto loop; +#else + int ret; + + ret = unlink((const char*)name); + + if (ret != 0 && errno != ENOENT) { + os_file_handle_error_no_exit(name, "delete"); + + return(FALSE); + } + + return(TRUE); +#endif +} + +/*************************************************************************** +Deletes a file. The file has to be closed before calling this. */ + +ibool +os_file_delete( +/*===========*/ + /* out: TRUE if success */ + const char* name) /* in: file path as a null-terminated string */ +{ +#ifdef __WIN__ + BOOL ret; + ulint count = 0; +loop: + /* In Windows, deleting an .ibd file may fail if ibbackup is copying + it */ + + ret = DeleteFile((LPCTSTR)name); + + if (ret) { + return(TRUE); + } + + if (GetLastError() == ERROR_FILE_NOT_FOUND) { + /* If the file does not exist, we classify this as a 'mild' + error and return */ + + return(FALSE); + } + + count++; + + if (count > 100 && 0 == (count % 10)) { + fprintf(stderr, +"InnoDB: Warning: cannot delete file %s\n" +"InnoDB: Are you running ibbackup to back up the file?\n", name); + + os_file_get_last_error(TRUE); /* print error information */ + } + + os_thread_sleep(1000000); /* sleep for a second */ + + if (count > 2000) { + + return(FALSE); + } + + goto loop; +#else + int ret; + + ret = unlink((const char*)name); + + if (ret != 0) { + os_file_handle_error_no_exit(name, "delete"); + + return(FALSE); + } + + return(TRUE); +#endif +} + +/*************************************************************************** +Renames a file (can also move it to another directory). It is safest that the +file is closed before calling this function. */ + +ibool +os_file_rename( +/*===========*/ + /* out: TRUE if success */ + const char* oldpath,/* in: old file path as a null-terminated + string */ + const char* newpath)/* in: new file path */ +{ +#ifdef __WIN__ + BOOL ret; + + ret = MoveFile((LPCTSTR)oldpath, (LPCTSTR)newpath); + + if (ret) { + return(TRUE); + } + + os_file_handle_error(oldpath, "rename"); + + return(FALSE); +#else + int ret; + + ret = rename((const char*)oldpath, (const char*)newpath); + + if (ret != 0) { + os_file_handle_error(oldpath, "rename"); + + return(FALSE); + } + + return(TRUE); +#endif +} + +/*************************************************************************** Closes a file handle. In case of error, error number can be retrieved with os_file_get_last_error. */ @@ -878,6 +1522,7 @@ os_file_close( } os_file_handle_error(NULL, "close"); + return(FALSE); #else int ret; @@ -886,6 +1531,7 @@ os_file_close( if (ret == -1) { os_file_handle_error(NULL, "close"); + return(FALSE); } @@ -965,7 +1611,7 @@ os_file_get_size( } if (sizeof(off_t) > 4) { - *size = (ulint)(offs & 0xFFFFFFFF); + *size = (ulint)(offs & 0xFFFFFFFFUL); *size_high = (ulint)(offs >> 32); } else { *size = (ulint) offs; @@ -977,60 +1623,82 @@ os_file_get_size( } /*************************************************************************** -Sets a file size. This function can be used to extend or truncate a file. */ +Gets file size as a 64-bit integer ib_longlong. */ + +ib_longlong +os_file_get_size_as_iblonglong( +/*===========================*/ + /* out: size in bytes, -1 if error */ + os_file_t file) /* in: handle to a file */ +{ + ulint size; + ulint size_high; + ibool success; + + success = os_file_get_size(file, &size, &size_high); + + if (!success) { + + return(-1); + } + + return((((ib_longlong)size_high) << 32) + (ib_longlong)size); +} + +/*************************************************************************** +Write the specified number of zeros to a newly created file. */ ibool os_file_set_size( /*=============*/ /* out: TRUE if success */ - char* name, /* in: name of the file or path as a + const char* name, /* in: name of the file or path as a null-terminated string */ os_file_t file, /* in: handle to a file */ ulint size, /* in: least significant 32 bits of file size */ ulint size_high)/* in: most significant 32 bits of size */ { - ib_longlong offset; - ib_longlong low; - ulint n_bytes; + ib_longlong current_size; + ib_longlong desired_size; ibool ret; byte* buf; byte* buf2; - ulint i; + ulint buf_size; ut_a(size == (size & 0xFFFFFFFF)); - /* We use a very big 8 MB buffer in writing because Linux may be - extremely slow in fsync on 1 MB writes */ + current_size = 0; + desired_size = (ib_longlong)size + (((ib_longlong)size_high) << 32); - buf2 = ut_malloc(UNIV_PAGE_SIZE * 513); + /* Write up to 1 megabyte at a time. */ + buf_size = ut_min(64, (ulint) (desired_size / UNIV_PAGE_SIZE)) + * UNIV_PAGE_SIZE; + buf2 = ut_malloc(buf_size + UNIV_PAGE_SIZE); /* Align the buffer for possible raw i/o */ buf = ut_align(buf2, UNIV_PAGE_SIZE); /* Write buffer full of zeros */ - for (i = 0; i < UNIV_PAGE_SIZE * 512; i++) { - buf[i] = '\0'; - } + memset(buf, 0, buf_size); - offset = 0; - low = (ib_longlong)size + (((ib_longlong)size_high) << 32); - - if (low >= (ib_longlong)(100 * 1024 * 1024)) { + if (desired_size >= (ib_longlong)(100 * 1024 * 1024)) { fprintf(stderr, "InnoDB: Progress in MB:"); } - while (offset < low) { - if (low - offset < UNIV_PAGE_SIZE * 512) { - n_bytes = (ulint)(low - offset); - } else { - n_bytes = UNIV_PAGE_SIZE * 512; - } - + while (current_size < desired_size) { + ulint n_bytes; + + if (desired_size - current_size < (ib_longlong) buf_size) { + n_bytes = (ulint) (desired_size - current_size); + } else { + n_bytes = buf_size; + } + ret = os_file_write(name, file, buf, - (ulint)(offset & 0xFFFFFFFF), - (ulint)(offset >> 32), + (ulint)(current_size & 0xFFFFFFFF), + (ulint)(current_size >> 32), n_bytes); if (!ret) { ut_free(buf2); @@ -1038,18 +1706,18 @@ os_file_set_size( } /* Print about progress for each 100 MB written */ - if ((offset + n_bytes) / (ib_longlong)(100 * 1024 * 1024) - != offset / (ib_longlong)(100 * 1024 * 1024)) { + if ((current_size + n_bytes) / (ib_longlong)(100 * 1024 * 1024) + != current_size / (ib_longlong)(100 * 1024 * 1024)) { fprintf(stderr, " %lu00", - (ulint)((offset + n_bytes) + (ulong) ((current_size + n_bytes) / (ib_longlong)(100 * 1024 * 1024))); } - offset += n_bytes; + current_size += n_bytes; } - if (low >= (ib_longlong)(100 * 1024 * 1024)) { + if (desired_size >= (ib_longlong)(100 * 1024 * 1024)) { fprintf(stderr, "\n"); } @@ -1105,6 +1773,15 @@ os_file_flush( return(TRUE); } + /* Since Windows returns ERROR_INVALID_FUNCTION if the 'file' is + actually a raw device, we choose to ignore that error if we are using + raw disks */ + + if (srv_start_raw_disk_in_use && GetLastError() + == ERROR_INVALID_FUNCTION) { + return(TRUE); + } + os_file_handle_error(NULL, "flush"); /* It is a fatal error if a file flush does not succeed, because then @@ -1115,7 +1792,33 @@ os_file_flush( #else int ret; -#ifdef HAVE_FDATASYNC +#if defined(HAVE_DARWIN_THREADS) +# ifndef F_FULLFSYNC + /* The following definition is from the Mac OS X 10.3 <sys/fcntl.h> */ +# define F_FULLFSYNC 51 /* fsync + ask the drive to flush to the media */ +# elif F_FULLFSYNC != 51 +# error "F_FULLFSYNC != 51: ABI incompatibility with Mac OS X 10.3" +# endif + /* Apple has disabled fsync() for internal disk drives in OS X. That + caused corruption for a user when he tested a power outage. Let us in + OS X use a nonstandard flush method recommended by an Apple + engineer. */ + + if (!srv_have_fullfsync) { + /* If we are not on an operating system that supports this, + then fall back to a plain fsync. */ + + ret = fsync(file); + } else { + ret = fcntl(file, F_FULLFSYNC, NULL); + + if (ret) { + /* If we are not on a file system that supports this, + then fall back to a plain fsync. */ + ret = fsync(file); + } + } +#elif HAVE_FDATASYNC ret = fdatasync(file); #else /* fprintf(stderr, "Flushing to file %p\n", file); */ @@ -1128,9 +1831,10 @@ os_file_flush( } /* Since Linux returns EINVAL if the 'file' is actually a raw device, - we choose to ignore that error */ + we choose to ignore that error if we are using raw disks */ + + if (srv_start_raw_disk_in_use && errno == EINVAL) { - if (errno == EINVAL) { return(TRUE); } @@ -1168,7 +1872,7 @@ os_file_pread( off_t offs; ssize_t n_bytes; - ut_a((offset & 0xFFFFFFFF) == offset); + ut_a((offset & 0xFFFFFFFFUL) == offset); /* If off_t is > 4 bytes in size, then we assume we can pass a 64-bit address */ @@ -1235,7 +1939,7 @@ os_file_pwrite( /*===========*/ /* out: number of bytes written, -1 if error */ os_file_t file, /* in: handle to a file */ - void* buf, /* in: buffer from where to write */ + const void* buf, /* in: buffer from where to write */ ulint n, /* in: number of bytes to write */ ulint offset, /* in: least significant 32 bits of file offset where to write */ @@ -1245,7 +1949,7 @@ os_file_pwrite( ssize_t ret; off_t offs; - ut_a((offset & 0xFFFFFFFF) == offset); + ut_a((offset & 0xFFFFFFFFUL) == offset); /* If off_t is > 4 bytes in size, then we assume we can pass a 64-bit address */ @@ -1274,6 +1978,7 @@ os_file_pwrite( os_file_n_pending_pwrites--; os_mutex_exit(os_file_count_mutex); +# ifdef UNIV_DO_FLUSH if (srv_unix_file_flush_method != SRV_UNIX_LITTLESYNC && srv_unix_file_flush_method != SRV_UNIX_NOSYNC && !os_do_not_call_flush_at_each_write) { @@ -1284,6 +1989,7 @@ os_file_pwrite( ut_a(TRUE == os_file_flush(file)); } +# endif /* UNIV_DO_FLUSH */ return(ret); #else @@ -1306,6 +2012,7 @@ os_file_pwrite( ret = write(file, buf, (ssize_t)n); +# ifdef UNIV_DO_FLUSH if (srv_unix_file_flush_method != SRV_UNIX_LITTLESYNC && srv_unix_file_flush_method != SRV_UNIX_NOSYNC && !os_do_not_call_flush_at_each_write) { @@ -1316,6 +2023,7 @@ os_file_pwrite( ut_a(TRUE == os_file_flush(file)); } +# endif /* UNIV_DO_FLUSH */ os_mutex_exit(os_file_seek_mutexes[i]); @@ -1350,7 +2058,7 @@ os_file_read( ibool retry; ulint i; - ut_a((offset & 0xFFFFFFFF) == offset); + ut_a((offset & 0xFFFFFFFFUL) == offset); os_n_file_reads++; os_bytes_read_since_printout += n; @@ -1360,8 +2068,8 @@ try_again: ut_ad(buf); ut_ad(n > 0); - low = offset; - high = offset_high; + low = (DWORD) offset; + high = (DWORD) offset_high; /* Protect the seek / read operation with a mutex */ i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES; @@ -1377,7 +2085,7 @@ try_again: goto error_handling; } - ret = ReadFile(file, buf, n, &len, NULL); + ret = ReadFile(file, buf, (DWORD) n, &len, NULL); os_mutex_exit(os_file_seek_mutexes[i]); @@ -1397,6 +2105,11 @@ try_again: return(TRUE); } + + fprintf(stderr, +"InnoDB: Error: tried to read %lu bytes at offset %lu %lu.\n" +"InnoDB: Was only able to read %ld.\n", (ulong)n, (ulong)offset_high, + (ulong)offset, (long)ret); #endif #ifdef __WIN__ error_handling: @@ -1410,9 +2123,9 @@ error_handling: fprintf(stderr, "InnoDB: Fatal error: cannot read from file. OS error number %lu.\n", #ifdef __WIN__ - (ulint)GetLastError() + (ulong) GetLastError() #else - (ulint)errno + (ulong) errno #endif ); fflush(stderr); @@ -1423,6 +2136,92 @@ error_handling: } /*********************************************************************** +Requests a synchronous positioned read operation. This function does not do +any error handling. In case of error it returns FALSE. */ + +ibool +os_file_read_no_error_handling( +/*===========================*/ + /* out: TRUE if request was + successful, FALSE if fail */ + os_file_t file, /* in: handle to a file */ + void* buf, /* in: buffer where to read */ + ulint offset, /* in: least significant 32 bits of file + offset where to read */ + ulint offset_high, /* in: most significant 32 bits of + offset */ + ulint n) /* in: number of bytes to read */ +{ +#ifdef __WIN__ + BOOL ret; + DWORD len; + DWORD ret2; + DWORD low; + DWORD high; + ibool retry; + ulint i; + + ut_a((offset & 0xFFFFFFFFUL) == offset); + + os_n_file_reads++; + os_bytes_read_since_printout += n; + +try_again: + ut_ad(file); + ut_ad(buf); + ut_ad(n > 0); + + low = (DWORD) offset; + high = (DWORD) offset_high; + + /* Protect the seek / read operation with a mutex */ + i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES; + + os_mutex_enter(os_file_seek_mutexes[i]); + + ret2 = SetFilePointer(file, low, &high, FILE_BEGIN); + + if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) { + + os_mutex_exit(os_file_seek_mutexes[i]); + + goto error_handling; + } + + ret = ReadFile(file, buf, (DWORD) n, &len, NULL); + + os_mutex_exit(os_file_seek_mutexes[i]); + + if (ret && len == n) { + return(TRUE); + } +#else + ibool retry; + ssize_t ret; + + os_bytes_read_since_printout += n; + +try_again: + ret = os_file_pread(file, buf, n, offset, offset_high); + + if ((ulint)ret == n) { + + return(TRUE); + } +#endif +#ifdef __WIN__ +error_handling: +#endif + retry = os_file_handle_error_no_exit(NULL, "read"); + + if (retry) { + goto try_again; + } + + return(FALSE); +} + +/*********************************************************************** Requests a synchronous write operation. */ ibool @@ -1430,10 +2229,10 @@ os_file_write( /*==========*/ /* out: TRUE if request was successful, FALSE if fail */ - char* name, /* in: name of the file or path as a + const char* name, /* in: name of the file or path as a null-terminated string */ os_file_t file, /* in: handle to a file */ - void* buf, /* in: buffer from which to write */ + const void* buf, /* in: buffer from which to write */ ulint offset, /* in: least significant 32 bits of file offset where to write */ ulint offset_high, /* in: most significant 32 bits of @@ -1458,8 +2257,8 @@ os_file_write( ut_ad(buf); ut_ad(n > 0); retry: - low = offset; - high = offset_high; + low = (DWORD) offset; + high = (DWORD) offset_high; /* Protect the seek / write operation with a mutex */ i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES; @@ -1480,20 +2279,22 @@ retry: "InnoDB: Some operating system error numbers are described at\n" "InnoDB: " "http://dev.mysql.com/doc/mysql/en/Operating_System_error_codes.html\n", - name, offset_high, offset, - (ulint)GetLastError()); + name, (ulong) offset_high, (ulong) offset, + (ulong) GetLastError()); return(FALSE); } - ret = WriteFile(file, buf, n, &len, NULL); + ret = WriteFile(file, buf, (DWORD) n, &len, NULL); /* Always do fsync to reduce the probability that when the OS crashes, a database page is only partially physically written to disk. */ +# ifdef UNIV_DO_FLUSH if (!os_do_not_call_flush_at_each_write) { ut_a(TRUE == os_file_flush(file)); } +# endif /* UNIV_DO_FLUSH */ os_mutex_exit(os_file_seek_mutexes[i]); @@ -1527,12 +2328,12 @@ retry: "InnoDB: Operating system error number %lu.\n" "InnoDB: Check that your OS and file system support files of this size.\n" "InnoDB: Check also that the disk is not full or a disk quota exceeded.\n", - name, offset_high, offset, n, (ulint)len, - err); + name, (ulong) offset_high, (ulong) offset, + (ulong) n, (ulong) len, (ulong) err); if (strerror((int)err) != NULL) { fprintf(stderr, -"InnoDB: Error number %lu means '%s'.\n", err, strerror((int)err)); +"InnoDB: Error number %lu means '%s'.\n", (ulong) err, strerror((int)err)); } fprintf(stderr, @@ -1583,6 +2384,259 @@ retry: #endif } +/*********************************************************************** +Check the existence and type of the given file. */ + +ibool +os_file_status( +/*===========*/ + /* out: TRUE if call succeeded */ + const char* path, /* in: pathname of the file */ + ibool* exists, /* out: TRUE if file exists */ + os_file_type_t* type) /* out: type of the file (if it exists) */ +{ +#ifdef __WIN__ + int ret; + struct _stat statinfo; + + ret = _stat(path, &statinfo); + if (ret && (errno == ENOENT || errno == ENOTDIR)) { + /* file does not exist */ + *exists = FALSE; + return(TRUE); + } else if (ret) { + /* file exists, but stat call failed */ + + os_file_handle_error_no_exit(path, "stat"); + + return(FALSE); + } + + if (_S_IFDIR & statinfo.st_mode) { + *type = OS_FILE_TYPE_DIR; + } else if (_S_IFREG & statinfo.st_mode) { + *type = OS_FILE_TYPE_FILE; + } else { + *type = OS_FILE_TYPE_UNKNOWN; + } + + *exists = TRUE; + + return(TRUE); +#else + int ret; + struct stat statinfo; + + ret = stat(path, &statinfo); + if (ret && (errno == ENOENT || errno == ENOTDIR)) { + /* file does not exist */ + *exists = FALSE; + return(TRUE); + } else if (ret) { + /* file exists, but stat call failed */ + + os_file_handle_error_no_exit(path, "stat"); + + return(FALSE); + } + + if (S_ISDIR(statinfo.st_mode)) { + *type = OS_FILE_TYPE_DIR; + } else if (S_ISLNK(statinfo.st_mode)) { + *type = OS_FILE_TYPE_LINK; + } else if (S_ISREG(statinfo.st_mode)) { + *type = OS_FILE_TYPE_FILE; + } else { + *type = OS_FILE_TYPE_UNKNOWN; + } + + *exists = TRUE; + + return(TRUE); +#endif +} + +/*********************************************************************** +This function returns information about the specified file */ + +ibool +os_file_get_status( +/*===========*/ + /* out: TRUE if stat information found */ + const char* path, /* in: pathname of the file */ + os_file_stat_t* stat_info) /* information of a file in a directory */ +{ +#ifdef __WIN__ + int ret; + struct _stat statinfo; + + ret = _stat(path, &statinfo); + if (ret && (errno == ENOENT || errno == ENOTDIR)) { + /* file does not exist */ + + return(FALSE); + } else if (ret) { + /* file exists, but stat call failed */ + + os_file_handle_error_no_exit(path, "stat"); + + return(FALSE); + } + if (_S_IFDIR & statinfo.st_mode) { + stat_info->type = OS_FILE_TYPE_DIR; + } else if (_S_IFREG & statinfo.st_mode) { + stat_info->type = OS_FILE_TYPE_FILE; + } else { + stat_info->type = OS_FILE_TYPE_UNKNOWN; + } + + stat_info->ctime = statinfo.st_ctime; + stat_info->atime = statinfo.st_atime; + stat_info->mtime = statinfo.st_mtime; + stat_info->size = statinfo.st_size; + + return(TRUE); +#else + int ret; + struct stat statinfo; + + ret = stat(path, &statinfo); + + if (ret && (errno == ENOENT || errno == ENOTDIR)) { + /* file does not exist */ + + return(FALSE); + } else if (ret) { + /* file exists, but stat call failed */ + + os_file_handle_error_no_exit(path, "stat"); + + return(FALSE); + } + + if (S_ISDIR(statinfo.st_mode)) { + stat_info->type = OS_FILE_TYPE_DIR; + } else if (S_ISLNK(statinfo.st_mode)) { + stat_info->type = OS_FILE_TYPE_LINK; + } else if (S_ISREG(statinfo.st_mode)) { + stat_info->type = OS_FILE_TYPE_FILE; + } else { + stat_info->type = OS_FILE_TYPE_UNKNOWN; + } + + stat_info->ctime = statinfo.st_ctime; + stat_info->atime = statinfo.st_atime; + stat_info->mtime = statinfo.st_mtime; + stat_info->size = statinfo.st_size; + + return(TRUE); +#endif +} + +/* path name separator character */ +#ifdef __WIN__ +# define OS_FILE_PATH_SEPARATOR '\\' +#else +# define OS_FILE_PATH_SEPARATOR '/' +#endif + +/******************************************************************** +The function os_file_dirname returns a directory component of a +null-terminated pathname string. In the usual case, dirname returns +the string up to, but not including, the final '/', and basename +is the component following the final '/'. Trailing '/' charac +ters are not counted as part of the pathname. + +If path does not contain a slash, dirname returns the string ".". + +Concatenating the string returned by dirname, a "/", and the basename +yields a complete pathname. + +The return value is a copy of the directory component of the pathname. +The copy is allocated from heap. It is the caller responsibility +to free it after it is no longer needed. + +The following list of examples (taken from SUSv2) shows the strings +returned by dirname and basename for different paths: + + path dirname basename + "/usr/lib" "/usr" "lib" + "/usr/" "/" "usr" + "usr" "." "usr" + "/" "/" "/" + "." "." "." + ".." "." ".." +*/ + +char* +os_file_dirname( +/*============*/ + /* out, own: directory component of the + pathname */ + const char* path) /* in: pathname */ +{ + /* Find the offset of the last slash */ + const char* last_slash = strrchr(path, OS_FILE_PATH_SEPARATOR); + if (!last_slash) { + /* No slash in the path, return "." */ + + return(mem_strdup(".")); + } + + /* Ok, there is a slash */ + + if (last_slash == path) { + /* last slash is the first char of the path */ + + return(mem_strdup("/")); + } + + /* Non-trivial directory component */ + + return(mem_strdupl(path, last_slash - path)); +} + +/******************************************************************** +Creates all missing subdirectories along the given path. */ + +ibool +os_file_create_subdirs_if_needed( +/*=============================*/ + /* out: TRUE if call succeeded + FALSE otherwise */ + const char* path) /* in: path name */ +{ + char* subdir; + ibool success, subdir_exists; + os_file_type_t type; + + subdir = os_file_dirname(path); + if (strlen(subdir) == 1 + && (*subdir == OS_FILE_PATH_SEPARATOR || *subdir == '.')) { + /* subdir is root or cwd, nothing to do */ + mem_free(subdir); + + return(TRUE); + } + + /* Test if subdir exists */ + success = os_file_status(subdir, &subdir_exists, &type); + if (success && !subdir_exists) { + /* subdir does not exist, create it */ + success = os_file_create_subdirs_if_needed(subdir); + if (!success) { + mem_free(subdir); + + return(FALSE); + } + success = os_file_create_directory(subdir, FALSE); + } + + mem_free(subdir); + + return(success); +} + /******************************************************************** Returns a pointer to the nth slot in the aio array. */ static @@ -1950,7 +3004,7 @@ os_aio_array_reserve_slot( void* message2,/* in: message to be passed along with the aio operation */ os_file_t file, /* in: file handle */ - char* name, /* in: name of the file or path as a + const char* name, /* in: name of the file or path as a null-terminated string */ void* buf, /* in: buffer where to read or from which to write */ @@ -2197,7 +3251,7 @@ os_aio( because i/os are not actually handled until all have been posted: use with great caution! */ - char* name, /* in: name of the file or path as a + const char* name, /* in: name of the file or path as a null-terminated string */ os_file_t file, /* in: handle to a file */ void* buf, /* in: buffer where to read or from which @@ -2218,7 +3272,7 @@ os_aio( #ifdef WIN_ASYNC_IO ibool retval; BOOL ret = TRUE; - DWORD len = n; + DWORD len = (DWORD) n; void* dummy_mess1; void* dummy_mess2; ulint dummy_type; @@ -2454,10 +3508,12 @@ os_aio_windows_handle( if (ret && len == slot->len) { ret_val = TRUE; +# ifdef UNIV_DO_FLUSH if (slot->type == OS_FILE_WRITE && !os_do_not_call_flush_at_each_write) { ut_a(TRUE == os_file_flush(slot->file)); } +# endif /* UNIV_DO_FLUSH */ } else { os_file_handle_error(slot->name, "Windows aio"); @@ -2538,10 +3594,12 @@ os_aio_posix_handle( *message1 = slot->message1; *message2 = slot->message2; +# ifdef UNIV_DO_FLUSH if (slot->type == OS_FILE_WRITE && !os_do_not_call_flush_at_each_write) { ut_a(TRUE == os_file_flush(slot->file)); } +# endif /* UNIV_DO_FLUSH */ os_mutex_exit(array->mutex); @@ -2584,7 +3642,7 @@ os_aio_simulated_handle( ulint biggest_age; ulint age; byte* combined_buf; - byte* combined_buf2= 0; /* Remove warning */ + byte* combined_buf2; ibool ret; ulint n; ulint i; @@ -2629,7 +3687,7 @@ restart: if (os_aio_print_debug) { fprintf(stderr, -"InnoDB: i/o for slot %lu already done, returning\n", i); +"InnoDB: i/o for slot %lu already done, returning\n", (ulong) i); } ret = TRUE; @@ -2747,6 +3805,7 @@ consecutive_loop: if (n_consecutive == 1) { /* We can use the buffer of the i/o request */ combined_buf = slot->buf; + combined_buf2 = NULL; } else { combined_buf2 = ut_malloc(total_len + UNIV_PAGE_SIZE); @@ -2778,8 +3837,8 @@ consecutive_loop: if (os_aio_print_debug) { fprintf(stderr, "InnoDB: doing i/o of type %lu at offset %lu %lu, length %lu\n", - slot->type, slot->offset_high, slot->offset, - total_len); + (ulong) slot->type, (ulong) slot->offset_high, + (ulong) slot->offset, (ulong) total_len); } /* Do the i/o with ordinary, synchronous i/o functions: */ @@ -2789,8 +3848,9 @@ consecutive_loop: || (slot->offset % UNIV_PAGE_SIZE != 0)) { fprintf(stderr, "InnoDB: Error: trying a displaced write to %s %lu %lu, len %lu\n", - slot->name, slot->offset_high, - slot->offset, total_len); + slot->name, (ulong) slot->offset_high, + (ulong) slot->offset, + (ulong) total_len); ut_error; } @@ -2844,7 +3904,7 @@ consecutive_loop: } } - if (n_consecutive > 1) { + if (combined_buf2) { ut_free(combined_buf2); } @@ -2893,7 +3953,7 @@ recommended_sleep: if (os_aio_print_debug) { fprintf(stderr, "InnoDB: i/o handler thread for i/o segment %lu wakes up\n", - global_segment); + (ulong) global_segment); } goto restart; @@ -2969,7 +4029,7 @@ os_aio_print( ulint i; for (i = 0; i < srv_n_file_io_threads; i++) { - fprintf(file, "I/O thread %lu state: %s (%s)", i, + fprintf(file, "I/O thread %lu state: %s (%s)", (ulong) i, srv_io_thread_op_info[i], srv_io_thread_function[i]); @@ -3008,7 +4068,7 @@ loop: ut_a(array->n_reserved == n_reserved); - fprintf(file, " %lu", n_reserved); + fprintf(file, " %lu", (ulong) n_reserved); os_mutex_exit(array->mutex); @@ -3048,19 +4108,22 @@ loop: fprintf(file, "Pending flushes (fsync) log: %lu; buffer pool: %lu\n" "%lu OS file reads, %lu OS file writes, %lu OS fsyncs\n", - fil_n_pending_log_flushes, fil_n_pending_tablespace_flushes, - os_n_file_reads, os_n_file_writes, os_n_fsyncs); + (ulong) fil_n_pending_log_flushes, + (ulong) fil_n_pending_tablespace_flushes, + (ulong) os_n_file_reads, (ulong) os_n_file_writes, + (ulong) os_n_fsyncs); if (os_file_n_pending_preads != 0 || os_file_n_pending_pwrites != 0) { fprintf(file, "%lu pending preads, %lu pending pwrites\n", - os_file_n_pending_preads, os_file_n_pending_pwrites); + (ulong) os_file_n_pending_preads, + (ulong) os_file_n_pending_pwrites); } if (os_n_file_reads == os_n_file_reads_old) { avg_bytes_read = 0.0; } else { - avg_bytes_read = os_bytes_read_since_printout / + avg_bytes_read = (double) os_bytes_read_since_printout / (os_n_file_reads - os_n_file_reads_old); } @@ -3068,7 +4131,7 @@ loop: "%.2f reads/s, %lu avg bytes/read, %.2f writes/s, %.2f fsyncs/s\n", (os_n_file_reads - os_n_file_reads_old) / time_elapsed, - (ulint)avg_bytes_read, + (ulong)avg_bytes_read, (os_n_file_writes - os_n_file_writes_old) / time_elapsed, (os_n_fsyncs - os_n_fsyncs_old) diff --git a/innobase/os/os0proc.c b/innobase/os/os0proc.c index 87a0bfb9e92..2f155788420 100644 --- a/innobase/os/os0proc.c +++ b/innobase/os/os0proc.c @@ -12,11 +12,469 @@ Created 9/30/1995 Heikki Tuuri #include "os0proc.ic" #endif +#include "ut0mem.h" +#include "ut0byte.h" + + +/* +How to get AWE to compile on Windows? +------------------------------------- + +In the project settings of the innobase project the Visual C++ source, +__WIN2000__ has to be defined. + +The Visual C++ has to be relatively recent and _WIN32_WINNT has to be +defined to a value >= 0x0500 when windows.h is included. + +#define _WIN32_WINNT 0x0500 + +Where does AWE work? +------------------- + +See the error message in os_awe_allocate_physical_mem(). + +How to assign privileges for mysqld to use AWE? +----------------------------------------------- + +See the error message in os_awe_enable_lock_pages_in_mem(). + +Use Windows AWE functions in this order +--------------------------------------- + +(1) os_awe_enable_lock_pages_in_mem(); +(2) os_awe_allocate_physical_mem(); +(3) os_awe_allocate_virtual_mem_window(); +(4) os_awe_map_physical_mem_to_window(). + +To test 'AWE' in a computer which does not have the AWE API, +you can compile with UNIV_SIMULATE_AWE defined in this file. +*/ + +#ifdef UNIV_SIMULATE_AWE +/* If we simulate AWE, we allocate the 'physical memory' here */ +byte* os_awe_simulate_mem; +ulint os_awe_simulate_mem_size; +os_awe_t* os_awe_simulate_page_info; +byte* os_awe_simulate_window; +ulint os_awe_simulate_window_size; +/* In simulated AWE the following contains a NULL pointer or a pointer +to a mapped 'physical page' for each 4 kB page in the AWE window */ +byte** os_awe_simulate_map; +#endif + +#ifdef __WIN2000__ +os_awe_t* os_awe_page_info; +ulint os_awe_n_pages; +byte* os_awe_window; +ulint os_awe_window_size; +#endif + +/******************************************************************** +Windows AWE support. Tries to enable the "lock pages in memory" privilege for +the current process so that the current process can allocate memory-locked +virtual address space to act as the window where AWE maps physical memory. */ + +ibool +os_awe_enable_lock_pages_in_mem(void) +/*=================================*/ + /* out: TRUE if success, FALSE if error; + prints error info to stderr if no success */ +{ +#ifdef UNIV_SIMULATE_AWE + + return(TRUE); + +#elif defined(__WIN2000__) + struct { + DWORD Count; + LUID_AND_ATTRIBUTES Privilege[1]; + } Info; + HANDLE hProcess; + HANDLE Token; + BOOL Result; + + hProcess = GetCurrentProcess(); + + /* Open the token of the current process */ + + Result = OpenProcessToken(hProcess, + TOKEN_ADJUST_PRIVILEGES, + &Token); + if (Result != TRUE) { + fprintf(stderr, + "InnoDB: AWE: Cannot open process token, error %lu\n", + (ulint)GetLastError()); + return(FALSE); + } + + Info.Count = 1; + + Info.Privilege[0].Attributes = SE_PRIVILEGE_ENABLED; + + /* Get the local unique identifier (LUID) of the SE_LOCK_MEMORY + privilege */ + + Result = LookupPrivilegeValue(NULL, SE_LOCK_MEMORY_NAME, + &(Info.Privilege[0].Luid)); + if (Result != TRUE) { + fprintf(stderr, + "InnoDB: AWE: Cannot get local privilege value for %s, error %lu.\n", + SE_LOCK_MEMORY_NAME, (ulint)GetLastError()); + + return(FALSE); + } + + /* Try to adjust the privilege */ + + Result = AdjustTokenPrivileges(Token, FALSE, + (PTOKEN_PRIVILEGES)&Info, + 0, NULL, NULL); + /* Check the result */ + + if (Result != TRUE) { + fprintf(stderr, + "InnoDB: AWE: Cannot adjust process token privileges, error %u.\n", + GetLastError()); + return(FALSE); + } else if (GetLastError() != ERROR_SUCCESS) { + fprintf(stderr, +"InnoDB: AWE: Cannot enable SE_LOCK_MEMORY privilege, error %lu.\n" +"InnoDB: In Windows XP Home you cannot use AWE. In Windows 2000 and XP\n" +"InnoDB: Professional you must go to the Control Panel, to\n" +"InnoDB: Security Settings, to Local Policies, and enable\n" +"InnoDB: the 'lock pages in memory' privilege for the user who runs\n" +"InnoDB: the MySQL server.\n", GetLastError()); + + return(FALSE); + } + + CloseHandle(Token); + + return(TRUE); +#else #ifdef __WIN__ -#include <windows.h> + fprintf(stderr, +"InnoDB: AWE: Error: to use AWE you must use a ...-nt MySQL executable.\n"); +#endif + return(FALSE); #endif +} -#include "ut0mem.h" +/******************************************************************** +Allocates physical RAM memory up to 64 GB in an Intel 32-bit x86 +processor. */ + +ibool +os_awe_allocate_physical_mem( +/*=========================*/ + /* out: TRUE if success */ + os_awe_t** page_info, /* out, own: array of opaque data containing + the info for allocated physical memory pages; + each allocated 4 kB physical memory page has + one slot of type os_awe_t in the array */ + ulint n_megabytes) /* in: number of megabytes to allocate */ +{ +#ifdef UNIV_SIMULATE_AWE + os_awe_simulate_page_info = ut_malloc(sizeof(os_awe_t) * + n_megabytes * ((1024 * 1024) / OS_AWE_X86_PAGE_SIZE)); + + os_awe_simulate_mem = ut_align(ut_malloc( + 4096 + 1024 * 1024 * n_megabytes), + 4096); + os_awe_simulate_mem_size = n_megabytes * 1024 * 1024; + + *page_info = os_awe_simulate_page_info; + + return(TRUE); + +#elif defined(__WIN2000__) + BOOL bResult; + os_awe_t NumberOfPages; /* Question: why does Windows + use the name ULONG_PTR for + a scalar integer type? Maybe + because we may also refer to + &NumberOfPages? */ + os_awe_t NumberOfPagesInitial; + SYSTEM_INFO sSysInfo; + int PFNArraySize; + + if (n_megabytes > 64 * 1024) { + + fprintf(stderr, +"InnoDB: AWE: Error: tried to allocate %lu MB.\n" +"InnoDB: AWE cannot allocate more than 64 GB in any computer.\n", n_megabytes); + + return(FALSE); + } + + GetSystemInfo(&sSysInfo); /* fill the system information structure */ + + if ((ulint)OS_AWE_X86_PAGE_SIZE != (ulint)sSysInfo.dwPageSize) { + fprintf(stderr, +"InnoDB: AWE: Error: this computer has a page size of %lu.\n" +"InnoDB: Should be 4096 bytes for InnoDB AWE support to work.\n", + (ulint)sSysInfo.dwPageSize); + + return(FALSE); + } + + /* Calculate the number of pages of memory to request */ + + NumberOfPages = n_megabytes * ((1024 * 1024) / OS_AWE_X86_PAGE_SIZE); + + /* Calculate the size of page_info for allocated physical pages */ + + PFNArraySize = NumberOfPages * sizeof(os_awe_t); + + *page_info = (os_awe_t*)HeapAlloc(GetProcessHeap(), 0, PFNArraySize); + + if (*page_info == NULL) { + fprintf(stderr, +"InnoDB: AWE: Failed to allocate page info array from process heap, error %lu\n", + (ulint)GetLastError()); + + return(FALSE); + } + + ut_total_allocated_memory += PFNArraySize; + + /* Enable this process' privilege to lock pages to physical memory */ + + if (!os_awe_enable_lock_pages_in_mem()) { + + return(FALSE); + } + + /* Allocate the physical memory */ + + NumberOfPagesInitial = NumberOfPages; + + os_awe_page_info = *page_info; + os_awe_n_pages = (ulint)NumberOfPages; + + /* Compilation note: if the compiler complains the function is not + defined, see the note at the start of this file */ + + bResult = AllocateUserPhysicalPages(GetCurrentProcess(), + &NumberOfPages, + *page_info); + if (bResult != TRUE) { + fprintf(stderr, +"InnoDB: AWE: Cannot allocate physical pages, error %lu.\n", + (ulint)GetLastError()); + + return(FALSE); + } + + if (NumberOfPagesInitial != NumberOfPages) { + fprintf(stderr, +"InnoDB: AWE: Error: allocated only %lu pages of %lu requested.\n" +"InnoDB: Check that you have enough free RAM.\n" +"InnoDB: In Windows XP Professional and 2000 Professional\n" +"InnoDB: Windows PAE size is max 4 GB. In 2000 and .NET\n" +"InnoDB: Advanced Servers and 2000 Datacenter Server it is 32 GB,\n" +"InnoDB: and in .NET Datacenter Server it is 64 GB.\n" +"InnoDB: A Microsoft web page said that the processor must be an Intel\n" +"InnoDB: processor.\n", + (ulint)NumberOfPages, + (ulint)NumberOfPagesInitial); + + return(FALSE); + } + + fprintf(stderr, +"InnoDB: Using Address Windowing Extensions (AWE); allocated %lu MB\n", + n_megabytes); + + return(TRUE); +#else + return(FALSE); +#endif +} + +/******************************************************************** +Allocates a window in the virtual address space where we can map then +pages of physical memory. */ + +byte* +os_awe_allocate_virtual_mem_window( +/*===============================*/ + /* out, own: allocated memory, or NULL if did not + succeed */ + ulint size) /* in: virtual memory allocation size in bytes, must + be < 2 GB */ +{ +#ifdef UNIV_SIMULATE_AWE + ulint i; + + os_awe_simulate_window = ut_align(ut_malloc(4096 + size), 4096); + os_awe_simulate_window_size = size; + + os_awe_simulate_map = ut_malloc(sizeof(byte*) * (size / 4096)); + + for (i = 0; i < (size / 4096); i++) { + *(os_awe_simulate_map + i) = NULL; + } + + return(os_awe_simulate_window); + +#elif defined(__WIN2000__) + byte* ptr; + + if (size > (ulint)0x7FFFFFFFUL) { + fprintf(stderr, +"InnoDB: AWE: Cannot allocate %lu bytes of virtual memory\n", size); + + return(NULL); + } + + ptr = VirtualAlloc(NULL, (SIZE_T)size, MEM_RESERVE | MEM_PHYSICAL, + PAGE_READWRITE); + if (ptr == NULL) { + fprintf(stderr, +"InnoDB: AWE: Cannot allocate %lu bytes of virtual memory, error %lu\n", + size, (ulint)GetLastError()); + + return(NULL); + } + + os_awe_window = ptr; + os_awe_window_size = size; + + ut_total_allocated_memory += size; + + return(ptr); +#else + return(NULL); +#endif +} + +/******************************************************************** +With this function you can map parts of physical memory allocated with +the ..._allocate_physical_mem to the virtual address space allocated with +the previous function. Intel implements this so that the process page +tables are updated accordingly. A test on a 1.5 GHz AMD processor and XP +showed that this takes < 1 microsecond, much better than the estimated 80 us +for copying a 16 kB page memory to memory. But, the operation will at least +partially invalidate the translation lookaside buffer (TLB) of all +processors. Under a real-world load the performance hit may be bigger. */ + +ibool +os_awe_map_physical_mem_to_window( +/*==============================*/ + /* out: TRUE if success; the function + calls exit(1) in case of an error */ + byte* ptr, /* in: a page-aligned pointer to + somewhere in the virtual address + space window; we map the physical mem + pages here */ + ulint n_mem_pages, /* in: number of 4 kB mem pages to + map */ + os_awe_t* page_info) /* in: array of page infos for those + pages; each page has one slot in the + array */ +{ +#ifdef UNIV_SIMULATE_AWE + ulint i; + byte** map; + byte* page; + byte* phys_page; + + ut_a(ptr >= os_awe_simulate_window); + ut_a(ptr < os_awe_simulate_window + os_awe_simulate_window_size); + ut_a(page_info >= os_awe_simulate_page_info); + ut_a(page_info < os_awe_simulate_page_info + + (os_awe_simulate_mem_size / 4096)); + + /* First look if some other 'physical pages' are mapped at ptr, + and copy them back to where they were if yes */ + + map = os_awe_simulate_map + + ((ulint)(ptr - os_awe_simulate_window)) / 4096; + page = ptr; + + for (i = 0; i < n_mem_pages; i++) { + if (*map != NULL) { + ut_memcpy(*map, page, 4096); + } + map++; + page += 4096; + } + + /* Then copy to ptr the 'physical pages' determined by page_info; we + assume page_info is a segment of the array we created at the start */ + + phys_page = os_awe_simulate_mem + + (ulint)(page_info - os_awe_simulate_page_info) + * 4096; + + ut_memcpy(ptr, phys_page, n_mem_pages * 4096); + + /* Update the map */ + + map = os_awe_simulate_map + + ((ulint)(ptr - os_awe_simulate_window)) / 4096; + + for (i = 0; i < n_mem_pages; i++) { + *map = phys_page; + + map++; + phys_page += 4096; + } + + return(TRUE); + +#elif defined(__WIN2000__) + BOOL bResult; + os_awe_t n_pages; + + n_pages = (os_awe_t)n_mem_pages; + + if (!(ptr >= os_awe_window)) { + fprintf(stderr, +"InnoDB: AWE: Error: trying to map to address %lx but AWE window start %lx\n", + (ulint)ptr, (ulint)os_awe_window); + ut_a(0); + } + + if (!(ptr <= os_awe_window + os_awe_window_size - UNIV_PAGE_SIZE)) { + fprintf(stderr, +"InnoDB: AWE: Error: trying to map to address %lx but AWE window end %lx\n", + (ulint)ptr, (ulint)os_awe_window + os_awe_window_size); + ut_a(0); + } + + if (!(page_info >= os_awe_page_info)) { + fprintf(stderr, +"InnoDB: AWE: Error: trying to map page info at %lx but array start %lx\n", + (ulint)page_info, (ulint)os_awe_page_info); + ut_a(0); + } + + if (!(page_info <= os_awe_page_info + (os_awe_n_pages - 4))) { + fprintf(stderr, +"InnoDB: AWE: Error: trying to map page info at %lx but array end %lx\n", + (ulint)page_info, (ulint)(os_awe_page_info + os_awe_n_pages)); + ut_a(0); + } + + bResult = MapUserPhysicalPages((PVOID)ptr, n_pages, page_info); + + if (bResult != TRUE) { + ut_print_timestamp(stderr); + fprintf(stderr, +" InnoDB: AWE: Mapping of %lu physical pages to address %lx failed,\n" +"InnoDB: error %lu.\n" +"InnoDB: Cannot continue operation.\n", + n_mem_pages, (ulint)ptr, (ulint)GetLastError()); + exit(1); + } + + return(TRUE); +#else + return(FALSE); +#endif +} /******************************************************************** Converts the current process id to a number. It is not guaranteed that the diff --git a/innobase/os/os0sync.c b/innobase/os/os0sync.c index 827d68501db..4ad9473fe66 100644 --- a/innobase/os/os0sync.c +++ b/innobase/os/os0sync.c @@ -109,9 +109,9 @@ must be reset explicitly by calling sync_os_reset_event. */ os_event_t os_event_create( /*============*/ - /* out: the event handle */ - char* name) /* in: the name of the event, if NULL - the event is created without a name */ + /* out: the event handle */ + const char* name) /* in: the name of the event, if NULL + the event is created without a name */ { #ifdef __WIN__ os_event_t event; @@ -125,7 +125,7 @@ os_event_create( if (!event->handle) { fprintf(stderr, "InnoDB: Could not create a Windows event semaphore; Windows error %lu\n", - (ulint)GetLastError()); + (ulong) GetLastError()); } #else /* Unix */ os_event_t event; @@ -166,9 +166,9 @@ reset when a single thread is released. Works only in Windows. */ os_event_t os_event_create_auto( /*=================*/ - /* out: the event handle */ - char* name) /* in: the name of the event, if NULL - the event is created without a name */ + /* out: the event handle */ + const char* name) /* in: the name of the event, if NULL + the event is created without a name */ { os_event_t event; @@ -182,7 +182,7 @@ os_event_create_auto( if (!event->handle) { fprintf(stderr, "InnoDB: Could not create a Windows auto event semaphore; Windows error %lu\n", - (ulint)GetLastError()); + (ulong) GetLastError()); } /* Put to the list of events */ @@ -361,7 +361,7 @@ os_event_wait_time( ut_a(event); if (time != OS_SYNC_INFINITE_TIME) { - err = WaitForSingleObject(event->handle, time / 1000); + err = WaitForSingleObject(event->handle, (DWORD) time / 1000); } else { err = WaitForSingleObject(event->handle, INFINITE); } @@ -408,11 +408,11 @@ os_event_wait_multiple( ut_a(native_event_array); ut_a(n > 0); - index = WaitForMultipleObjects(n, native_event_array, + index = WaitForMultipleObjects((DWORD) n, native_event_array, FALSE, /* Wait for any 1 event */ INFINITE); /* Infinite wait time limit */ - ut_a(index >= WAIT_OBJECT_0); + ut_a(index >= WAIT_OBJECT_0); /* NOTE: Pointless comparision */ ut_a(index < WAIT_OBJECT_0 + n); if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) { @@ -430,9 +430,9 @@ mutex semaphore of InnoDB itself (mutex_t) should be used where possible. */ os_mutex_t os_mutex_create( /*============*/ - /* out: the mutex handle */ - char* name) /* in: the name of the mutex, if NULL - the mutex is created without a name */ + /* out: the mutex handle */ + const char* name) /* in: the name of the mutex, if NULL + the mutex is created without a name */ { #ifdef __WIN__ HANDLE mutex; @@ -631,7 +631,21 @@ os_fast_mutex_free( DeleteCriticalSection((LPCRITICAL_SECTION) fast_mutex); #else - ut_a(0 == pthread_mutex_destroy(fast_mutex)); + int ret; + + ret = pthread_mutex_destroy(fast_mutex); + + if (ret != 0) { + ut_print_timestamp(stderr); + fprintf(stderr, +" InnoDB: error: return value %lu when calling\n" +"InnoDB: pthread_mutex_destroy().\n", (ulint)ret); + fprintf(stderr, +"InnoDB: Byte contents of the pthread mutex at %p:\n", fast_mutex); + ut_print_buf(stderr, (const byte*)fast_mutex, + sizeof(os_fast_mutex_t)); + fprintf(stderr, "\n"); + } #endif if (os_sync_mutex_inited) { /* When freeing the last mutexes, we have diff --git a/innobase/os/os0thread.c b/innobase/os/os0thread.c index cb72310f23d..e1a1119cfd4 100644 --- a/innobase/os/os0thread.c +++ b/innobase/os/os0thread.c @@ -100,7 +100,7 @@ os_thread_create( { #ifdef __WIN__ os_thread_t thread; - ulint win_thread_id; + DWORD win_thread_id; os_mutex_enter(os_sync_mutex); os_thread_count++; @@ -210,6 +210,15 @@ os_thread_exit( #endif } +#ifdef HAVE_PTHREAD_JOIN +int +os_thread_join( +/*=============*/ + os_thread_id_t thread_id) /* in: id of the thread to join */ +{ + return pthread_join(thread_id, NULL); +} +#endif /********************************************************************* Returns handle to the current thread. */ @@ -253,7 +262,7 @@ os_thread_sleep( ulint tm) /* in: time in microseconds */ { #ifdef __WIN__ - Sleep(tm / 1000); + Sleep((DWORD) tm / 1000); #elif defined(__NETWARE__) delay(tm / 1000); #else |