summaryrefslogtreecommitdiff
path: root/storage/xtradb/os
diff options
context:
space:
mode:
authorSergei Golubchik <serg@mariadb.org>2017-03-30 12:48:42 +0200
committerSergei Golubchik <serg@mariadb.org>2017-03-30 12:48:42 +0200
commitda4d71d10d23c1ac2d10b72baee14991ccb7a146 (patch)
tree7cdf3a8c8e72ca7c1c8105427c04123f025bd870 /storage/xtradb/os
parent9ec85009985d644ce7ae797bc3572d0ad0f69bb0 (diff)
parenta00517ac9707ffd51c092f5af5d198c5ee789bb4 (diff)
downloadmariadb-git-da4d71d10d23c1ac2d10b72baee14991ccb7a146.tar.gz
Merge branch '10.1' into 10.2
Diffstat (limited to 'storage/xtradb/os')
-rw-r--r--storage/xtradb/os/os0file.cc286
-rw-r--r--storage/xtradb/os/os0thread.cc28
2 files changed, 148 insertions, 166 deletions
diff --git a/storage/xtradb/os/os0file.cc b/storage/xtradb/os/os0file.cc
index caf2becae72..ed84834e6ea 100644
--- a/storage/xtradb/os/os0file.cc
+++ b/storage/xtradb/os/os0file.cc
@@ -2,7 +2,7 @@
Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2009, Percona Inc.
-Copyright (c) 2013, 2016, MariaDB Corporation.
+Copyright (c) 2013, 2017, MariaDB Corporation. All Rights Reserved.
Portions of this file contain modifications contributed and copyrighted
by Percona Inc.. Those modifications are
@@ -258,11 +258,15 @@ struct os_aio_array_t{
os_event_t not_full;
/*!< The event which is set to the
signaled state when there is space in
- the aio outside the ibuf segment */
+ the aio outside the ibuf segment;
+ os_event_set() and os_event_reset()
+ are protected by os_aio_array_t::mutex */
os_event_t is_empty;
/*!< The event which is set to the
signaled state when there are no
- pending i/os in this array */
+ pending i/os in this array;
+ os_event_set() and os_event_reset()
+ are protected by os_aio_array_t::mutex */
ulint n_slots;/*!< Total number of slots in the aio
array. This must be divisible by
n_threads. */
@@ -304,8 +308,8 @@ struct os_aio_array_t{
#define OS_AIO_IO_SETUP_RETRY_ATTEMPTS 5
#endif
-/** Array of events used in simulated aio */
-static os_event_t* os_aio_segment_wait_events = NULL;
+/** Array of events used in simulated aio. */
+static os_event_t* os_aio_segment_wait_events;
/** The aio arrays for non-ibuf i/o and ibuf i/o, as well as sync aio. These
are NULL when the module has not yet been initialized. @{ */
@@ -342,16 +346,17 @@ static os_ib_mutex_t os_file_count_mutex;
#endif /* !UNIV_HOTBACKUP && (!HAVE_ATOMIC_BUILTINS || UNIV_WORD_SIZE < 8) */
/** Number of pending os_file_pread() operations */
-UNIV_INTERN ulint os_file_n_pending_preads = 0;
+UNIV_INTERN ulint os_file_n_pending_preads;
/** Number of pending os_file_pwrite() operations */
-UNIV_INTERN ulint os_file_n_pending_pwrites = 0;
+UNIV_INTERN ulint os_file_n_pending_pwrites;
/** Number of pending write operations */
-UNIV_INTERN ulint os_n_pending_writes = 0;
+UNIV_INTERN ulint os_n_pending_writes;
/** Number of pending read operations */
-UNIV_INTERN ulint os_n_pending_reads = 0;
+UNIV_INTERN ulint os_n_pending_reads;
+#if defined(WIN_ASYNC_IO) || defined(LINUX_NATIVE_AIO)
/** After first fallocate failure we will disable os_file_trim */
-UNIV_INTERN ibool os_fallocate_failed = FALSE;
+static bool os_fallocate_failed;
/**********************************************************************//**
Directly manipulate the allocated disk space by deallocating for the file referred to
@@ -360,11 +365,12 @@ Within the specified range, partial file system blocks are zeroed, and whole
file system blocks are removed from the file. After a successful call,
subsequent reads from this range will return zeroes.
@return true if success, false if error */
-UNIV_INTERN
+static
ibool
os_file_trim(
/*=========*/
os_aio_slot_t* slot); /*!< in: slot structure */
+#endif /* WIN_ASYNC_IO || LINUX_NATIVE_AIO */
/****************************************************************//**
Does error handling when a file operation fails.
@@ -1230,50 +1236,15 @@ next_file:
char* full_path;
int ret;
struct stat statinfo;
-#ifdef HAVE_READDIR_R
- char dirent_buf[sizeof(struct dirent)
- + _POSIX_PATH_MAX + 100];
- /* In /mysys/my_lib.c, _POSIX_PATH_MAX + 1 is used as
- the max file name len; but in most standards, the
- length is NAME_MAX; we add 100 to be even safer */
-#endif
next_file:
-#ifdef HAVE_READDIR_R
- ret = readdir_r(dir, (struct dirent*) dirent_buf, &ent);
-
- if (ret != 0
-#ifdef UNIV_AIX
- /* On AIX, only if we got non-NULL 'ent' (result) value and
- a non-zero 'ret' (return) value, it indicates a failed
- readdir_r() call. An NULL 'ent' with an non-zero 'ret'
- would indicate the "end of the directory" is reached. */
- && ent != NULL
-#endif
- ) {
- fprintf(stderr,
- "InnoDB: cannot read directory %s, error %lu\n",
- dirname, (ulong) ret);
-
- return(-1);
- }
-
- if (ent == NULL) {
- /* End of directory */
-
- return(1);
- }
-
- ut_a(strlen(ent->d_name) < _POSIX_PATH_MAX + 100 - 1);
-#else
ent = readdir(dir);
if (ent == NULL) {
return(1);
}
-#endif
ut_a(strlen(ent->d_name) < OS_FILE_MAX_PATH);
if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0) {
@@ -1601,9 +1572,13 @@ os_file_set_nocache_if_needed(os_file_t file, const char* name,
if (srv_unix_file_flush_method == SRV_UNIX_ALL_O_DIRECT
|| (type == OS_DATA_FILE
&& (srv_unix_file_flush_method == SRV_UNIX_O_DIRECT
- || (srv_unix_file_flush_method == SRV_UNIX_O_DIRECT_NO_FSYNC)))) {
- os_file_set_nocache(file, name, mode_str);
- }
+ || (srv_unix_file_flush_method
+ == SRV_UNIX_O_DIRECT_NO_FSYNC))))
+ /* Do fsync() on log files when setting O_DIRECT fails.
+ See log_io_complete() */
+ if (!os_file_set_nocache(file, name, mode_str)
+ && srv_unix_file_flush_method == SRV_UNIX_ALL_O_DIRECT)
+ srv_unix_file_flush_method = SRV_UNIX_O_DIRECT;
}
/****************************************************************//**
@@ -1811,9 +1786,10 @@ os_file_create_simple_no_error_handling_func(
}
/****************************************************************//**
-Tries to disable OS caching on an opened file descriptor. */
+Tries to disable OS caching on an opened file descriptor.
+@return TRUE if operation is success and FALSE otherwise */
UNIV_INTERN
-void
+bool
os_file_set_nocache(
/*================*/
os_file_t fd /*!< in: file descriptor to alter */
@@ -1834,6 +1810,7 @@ os_file_set_nocache(
"Failed to set DIRECTIO_ON on file %s: %s: %s, "
"continuing anyway.",
file_name, operation_name, strerror(errno_save));
+ return false;
}
#elif defined(O_DIRECT)
if (fcntl(fd, F_SETFL, O_DIRECT) == -1) {
@@ -1864,8 +1841,10 @@ short_warning:
"continuing anyway.",
file_name, operation_name, strerror(errno_save));
}
+ return false;
}
#endif /* defined(UNIV_SOLARIS) && defined(DIRECTIO_ON) */
+ return true;
}
@@ -2005,10 +1984,6 @@ os_file_create_func(
attributes |= FILE_FLAG_NO_BUFFERING;
#else
if (purpose == OS_FILE_AIO) {
-
- bool encrypt_later; /*!< should the page be encrypted
- before write */
-
#ifdef WIN_ASYNC_IO
/* If specified, use asynchronous (overlapped) io and no
buffering of writes in the OS */
@@ -2537,60 +2512,80 @@ os_file_get_size(
#endif /* __WIN__ */
}
-/***********************************************************************//**
-Write the specified number of zeros to a newly created file.
-@return TRUE if success */
+/** Set the size of a newly created file.
+@param[in] name file name
+@param[in] file file handle
+@param[in] size desired file size
+@param[in] sparse whether to create a sparse file (no preallocating)
+@return whether the operation succeeded */
UNIV_INTERN
-ibool
+bool
os_file_set_size(
-/*=============*/
- const char* name, /*!< in: name of the file or path as a
- null-terminated string */
- os_file_t file, /*!< in: handle to a file */
- os_offset_t size) /*!< in: file size */
+ const char* name,
+ os_file_t file,
+ os_offset_t size,
+ bool is_sparse)
{
- os_offset_t current_size;
- ibool ret;
- byte* buf;
- byte* buf2;
- ulint buf_size;
-
- current_size = 0;
+#ifdef _WIN32
+ FILE_END_OF_FILE_INFO feof;
+ feof.EndOfFile.QuadPart = size;
+ bool success = SetFileInformationByHandle(file,
+ FileEndOfFileInfo,
+ &feof, sizeof feof);
+ if (!success) {
+ ib_logf(IB_LOG_LEVEL_ERROR, "os_file_set_size() of file %s"
+ " to " INT64PF " bytes failed with %u",
+ name, size, GetLastError());
+ }
+ return(success);
+#else
+ if (is_sparse) {
+ bool success = !ftruncate(file, size);
+ if (!success) {
+ ib_logf(IB_LOG_LEVEL_ERROR, "ftruncate of file %s"
+ " to " INT64PF " bytes failed with error %d",
+ name, size, errno);
+ }
+ return(success);
+ }
-#ifdef HAVE_POSIX_FALLOCATE
+# ifdef HAVE_POSIX_FALLOCATE
if (srv_use_posix_fallocate) {
+ int err;
+ do {
+ err = posix_fallocate(file, 0, size);
+ } while (err == EINTR
+ && srv_shutdown_state == SRV_SHUTDOWN_NONE);
- if (posix_fallocate(file, current_size, size) == -1) {
-
- ib_logf(IB_LOG_LEVEL_ERROR, "preallocating file "
- "space for file \'%s\' failed. Current size "
- INT64PF ", desired size " INT64PF,
- name, current_size, size);
- os_file_handle_error_no_exit (name, "posix_fallocate",
- FALSE, __FILE__, __LINE__);
- return(FALSE);
+ if (err) {
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "preallocating " INT64PF " bytes for"
+ "file %s failed with error %d",
+ size, name, err);
}
- return(TRUE);
+ return(!err);
}
-#endif
+# endif
/* Write up to 1 megabyte at a time. */
- buf_size = ut_min(64, (ulint) (size / UNIV_PAGE_SIZE))
+ ulint buf_size = ut_min(64, (ulint) (size / UNIV_PAGE_SIZE))
* UNIV_PAGE_SIZE;
- buf2 = static_cast<byte*>(ut_malloc(buf_size + UNIV_PAGE_SIZE));
-
- /* Align the buffer for possible raw i/o */
- buf = static_cast<byte*>(ut_align(buf2, UNIV_PAGE_SIZE));
+ os_offset_t current_size = 0;
- /* Write buffer full of zeros */
- memset(buf, 0, buf_size);
+ byte* buf2 = static_cast<byte*>(calloc(1, buf_size + UNIV_PAGE_SIZE));
- if (size >= (os_offset_t) 100 << 20) {
-
- fprintf(stderr, "InnoDB: Progress in MB:");
+ if (!buf2) {
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Cannot allocate " ULINTPF " bytes to extend file\n",
+ buf_size + UNIV_PAGE_SIZE);
+ return(false);
}
- while (current_size < size) {
+ /* Align the buffer for possible raw i/o */
+ byte* buf = static_cast<byte*>(ut_align(buf2, UNIV_PAGE_SIZE));
+ bool ret;
+
+ do {
ulint n_bytes;
if (size - current_size < (os_offset_t) buf_size) {
@@ -2602,37 +2597,16 @@ os_file_set_size(
ret = os_file_write(name, file, buf, current_size, n_bytes);
if (!ret) {
- ut_free(buf2);
- goto error_handling;
- }
-
- /* Print about progress for each 100 MB written */
- if ((current_size + n_bytes) / (100 << 20)
- != current_size / (100 << 20)) {
-
- fprintf(stderr, " %lu00",
- (ulong) ((current_size + n_bytes)
- / (100 << 20)));
+ break;
}
current_size += n_bytes;
- }
-
- if (size >= (os_offset_t) 100 << 20) {
-
- fprintf(stderr, "\n");
- }
-
- ut_free(buf2);
+ } while (current_size < size);
- ret = os_file_flush(file);
+ free(buf2);
- if (ret) {
- return(TRUE);
- }
-
-error_handling:
- return(FALSE);
+ return(ret && os_file_flush(file));
+#endif
}
/***********************************************************************//**
@@ -4435,13 +4409,6 @@ os_aio_init(
os_aio_validate();
- os_aio_segment_wait_events = static_cast<os_event_t*>(
- ut_malloc(n_segments * sizeof *os_aio_segment_wait_events));
-
- for (ulint i = 0; i < n_segments; ++i) {
- os_aio_segment_wait_events[i] = os_event_create();
- }
-
os_last_printout = ut_time();
#ifdef _WIN32
@@ -4451,8 +4418,18 @@ os_aio_init(
ut_a(completion_port && read_completion_port);
#endif
- return(TRUE);
+ if (srv_use_native_aio) {
+ return(TRUE);
+ }
+ os_aio_segment_wait_events = static_cast<os_event_t*>(
+ ut_malloc(n_segments * sizeof *os_aio_segment_wait_events));
+
+ for (ulint i = 0; i < n_segments; ++i) {
+ os_aio_segment_wait_events[i] = os_event_create();
+ }
+
+ return(TRUE);
}
/***********************************************************************
@@ -4480,8 +4457,10 @@ os_aio_free(void)
os_aio_array_free(os_aio_read_array);
- for (ulint i = 0; i < os_aio_n_segments; i++) {
- os_event_free(os_aio_segment_wait_events[i]);
+ if (!srv_use_native_aio) {
+ for (ulint i = 0; i < os_aio_n_segments; i++) {
+ os_event_free(os_aio_segment_wait_events[i]);
+ }
}
#if !defined(HAVE_ATOMIC_BUILTINS) || UNIV_WORD_SIZE < 8
@@ -4541,22 +4520,17 @@ os_aio_wake_all_threads_at_shutdown(void)
if (os_aio_log_array != 0) {
os_aio_array_wake_win_aio_at_shutdown(os_aio_log_array);
}
-
#elif defined(LINUX_NATIVE_AIO)
-
/* When using native AIO interface the io helper threads
wait on io_getevents with a timeout value of 500ms. At
each wake up these threads check the server status.
No need to do anything to wake them up. */
+#endif /* !WIN_ASYNC_AIO */
if (srv_use_native_aio) {
return;
}
- /* Fall through to simulated AIO handler wakeup if we are
- not using native AIO. */
-#endif /* !WIN_ASYNC_AIO */
-
/* This loop wakes up all simulated ai/o threads */
for (ulint i = 0; i < os_aio_n_segments; i++) {
@@ -4939,6 +4913,7 @@ os_aio_simulated_wake_handler_threads(void)
}
}
+#ifdef _WIN32
/**********************************************************************//**
This function can be called if one wants to post a batch of reads and
prefers an i/o-handler thread to handle them all at once later. You must
@@ -4946,15 +4921,14 @@ call os_aio_simulated_wake_handler_threads later to ensure the threads
are not left sleeping! */
UNIV_INTERN
void
-os_aio_simulated_put_read_threads_to_sleep(void)
-/*============================================*/
+os_aio_simulated_put_read_threads_to_sleep()
{
/* The idea of putting background IO threads to sleep is only for
Windows when using simulated AIO. Windows XP seems to schedule
background threads too eagerly to allow for coalescing during
readahead requests. */
-#ifdef __WIN__
+
os_aio_array_t* array;
if (srv_use_native_aio) {
@@ -4973,8 +4947,8 @@ readahead requests. */
os_event_reset(os_aio_segment_wait_events[i]);
}
}
-#endif /* __WIN__ */
}
+#endif /* _WIN32 */
#if defined(LINUX_NATIVE_AIO)
/*******************************************************************//**
@@ -5364,7 +5338,7 @@ os_aio_windows_handle(
}
if (slot->type == OS_FILE_WRITE) {
- if (!slot->is_log && srv_use_trim && os_fallocate_failed == FALSE) {
+ if (!slot->is_log && srv_use_trim && !os_fallocate_failed) {
// Deallocate unused blocks from file system
os_file_trim(slot);
}
@@ -5460,7 +5434,8 @@ retry:
ut_a(slot->pos < end_pos);
if (slot->type == OS_FILE_WRITE) {
- if (!slot->is_log && srv_use_trim && os_fallocate_failed == FALSE) {
+ if (!slot->is_log && srv_use_trim
+ && !os_fallocate_failed) {
// Deallocate unused blocks from file system
os_file_trim(slot);
}
@@ -6178,11 +6153,12 @@ os_aio_print(
srv_io_thread_op_info[i],
srv_io_thread_function[i]);
-#ifndef __WIN__
- if (os_aio_segment_wait_events[i]->is_set()) {
+#ifndef _WIN32
+ if (!srv_use_native_aio
+ && os_aio_segment_wait_events[i]->is_set()) {
fprintf(file, " ev set");
}
-#endif /* __WIN__ */
+#endif /* _WIN32 */
fprintf(file, "\n");
}
@@ -6361,6 +6337,7 @@ typedef struct _FILE_LEVEL_TRIM {
#endif
#endif
+#if defined(WIN_ASYNC_IO) || defined(LINUX_NATIVE_AIO)
/**********************************************************************//**
Directly manipulate the allocated disk space by deallocating for the file referred to
by fd for the byte range starting at offset and continuing for len bytes.
@@ -6368,7 +6345,7 @@ Within the specified range, partial file system blocks are zeroed, and whole
file system blocks are removed from the file. After a successful call,
subsequent reads from this range will return zeroes.
@return true if success, false if error */
-UNIV_INTERN
+static
ibool
os_file_trim(
/*=========*/
@@ -6413,13 +6390,13 @@ os_file_trim(
if (ret) {
/* After first failure do not try to trim again */
- os_fallocate_failed = TRUE;
+ os_fallocate_failed = true;
srv_use_trim = FALSE;
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Warning: fallocate call failed with error code %d.\n"
- " InnoDB: start: %lu len: %lu payload: %lu\n"
- " InnoDB: Disabling fallocate for now.\n", errno, (ulong) off, (ulong) trim_len, (ulong) len);
+ ib_logf(IB_LOG_LEVEL_WARN,
+ "fallocate() failed with error %d."
+ " start: " UINT64PF " len: " ULINTPF " payload: " ULINTPF "."
+ " Disabling fallocate for now.",
+ errno, off, ulint(trim_len), ulint(len));
os_file_handle_error_no_exit(slot->name,
" fallocate(FALLOC_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE) ",
@@ -6440,7 +6417,7 @@ os_file_trim(
fprintf(stderr,
" InnoDB: Warning: fallocate not supported on this installation."
" InnoDB: Disabling fallocate for now.");
- os_fallocate_failed = TRUE;
+ os_fallocate_failed = true;
srv_use_trim = FALSE;
if (slot->write_size) {
*slot->write_size = 0;
@@ -6460,7 +6437,7 @@ os_file_trim(
if (!ret) {
/* After first failure do not try to trim again */
- os_fallocate_failed = TRUE;
+ os_fallocate_failed = true;
srv_use_trim = FALSE;
ut_print_timestamp(stderr);
fprintf(stderr,
@@ -6514,6 +6491,7 @@ os_file_trim(
return (TRUE);
}
+#endif /* WIN_ASYNC_IO || LINUX_NATIVE_AIO */
/***********************************************************************//**
Try to get number of bytes per sector from file system.
diff --git a/storage/xtradb/os/os0thread.cc b/storage/xtradb/os/os0thread.cc
index 5ddc40b0eeb..8baf06b9bb7 100644
--- a/storage/xtradb/os/os0thread.cc
+++ b/storage/xtradb/os/os0thread.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -206,29 +206,32 @@ os_thread_create_func(
#endif
}
-/**
-Waits until the specified thread completes and joins it. Its return value is
-ignored.
-
-@param thread thread to join */
+/** Waits until the specified thread completes and joins it.
+Its return value is ignored.
+@param[in,out] thread thread to join */
UNIV_INTERN
void
os_thread_join(
os_thread_t thread)
{
- /*This function is currently only used to workaround glibc bug
+ /* This function is currently only used to workaround glibc bug
described in http://bugs.mysql.com/bug.php?id=82886
On Windows, no workarounds are necessary, all threads
are "detached" upon thread exit (handle is closed), so we do
nothing.
*/
-#ifndef _WIN32
- int ret MY_ATTRIBUTE((unused)) = pthread_join(thread, NULL);
+#ifdef __WIN__
+ /* Do nothing. */
+#else
+#ifdef UNIV_DEBUG
+ const int ret MY_ATTRIBUTE((unused)) =
+#endif /* UNIV_DEBUG */
+ pthread_join(thread, NULL);
- /* Waiting on already-quit threads is allowed */
+ /* Waiting on already-quit threads is allowed. */
ut_ad(ret == 0 || ret == ESRCH);
-#endif
+#endif /* __WIN__ */
}
/*****************************************************************//**
@@ -257,8 +260,9 @@ os_thread_exit(
#ifdef __WIN__
ExitThread((DWORD) exit_value);
#else
- if (detach)
+ if (detach) {
pthread_detach(pthread_self());
+ }
pthread_exit(exit_value);
#endif
}