summaryrefslogtreecommitdiff
path: root/storage/xtradb
diff options
context:
space:
mode:
authorVladislav Vaintroub <wlad@montyprogram.com>2012-01-08 21:14:07 +0100
committerVladislav Vaintroub <wlad@montyprogram.com>2012-01-08 21:14:07 +0100
commitcf86abffbfe5fb95dc79260e6a21332d1adadd2a (patch)
tree3a8d56ffff2a3445e4ee3c358e4e728b5f3822bd /storage/xtradb
parentcd55894a52b5b2bf4a740eb0f39d91d7a82f673c (diff)
downloadmariadb-git-cf86abffbfe5fb95dc79260e6a21332d1adadd2a.tar.gz
MDEV-77 - possible deadlock in XtraDB async io subsystem on Windows.
Split IO threads into ones that handle only read completion and ones that handle only write completion, as it was originally done, but got lost with "completion port" patch. The reason we need to have dedicated read and dedicated write threads is that read completion routine can block waiting for write io to complete, and in rare cases where all io threads are handling async reads, it can deadlock.
Diffstat (limited to 'storage/xtradb')
-rw-r--r--storage/xtradb/os/os0file.c56
1 files changed, 45 insertions, 11 deletions
diff --git a/storage/xtradb/os/os0file.c b/storage/xtradb/os/os0file.c
index ef20869c4c5..810a254f9fc 100644
--- a/storage/xtradb/os/os0file.c
+++ b/storage/xtradb/os/os0file.c
@@ -249,6 +249,8 @@ UNIV_INTERN ulint os_n_pending_reads = 0;
#ifdef _WIN32
/** IO completion port used by background io threads */
static HANDLE completion_port;
+/** IO completion port used by background io READ threads */
+static HANDLE read_completion_port;
/** Thread local storage index for the per-thread event used for synchronous IO */
static DWORD tls_sync_io = TLS_OUT_OF_INDEXES;
#endif
@@ -3251,9 +3253,10 @@ os_aio_init(
os_last_printout = time(NULL);
#ifdef _WIN32
- ut_a(completion_port == 0);
+ ut_a(completion_port == 0 && read_completion_port == 0);
completion_port = CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, 0, 0);
- ut_a(completion_port);
+ read_completion_port = CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, 0, 0);
+ ut_a(completion_port && read_completion_port);
#endif
}
@@ -3299,6 +3302,7 @@ os_aio_array_wake_win_aio_at_shutdown(
if(completion_port)
{
PostQueuedCompletionStatus(completion_port, 0, IOCP_SHUTDOWN_KEY, NULL);
+ PostQueuedCompletionStatus(read_completion_port, 0, IOCP_SHUTDOWN_KEY, NULL);
}
}
#endif
@@ -3860,6 +3864,9 @@ try_again:
}
#ifdef WIN_ASYNC_IO
+#define READ_SEGMENT(x) (x < srv_n_read_io_threads)
+#define WRITE_SEGMENT(x) !READ_SEGMENT(x)
+
/**********************************************************************//**
This function is only used in Windows asynchronous i/o.
Waits for an aio operation to complete. This function is used to wait the
@@ -3898,18 +3905,45 @@ os_aio_windows_handle(
DWORD len;
BOOL retry = FALSE;
ULONG_PTR key;
+ HANDLE port = READ_SEGMENT(segment)? read_completion_port : completion_port;
- ret = GetQueuedCompletionStatus(completion_port, &len, &key,
- (OVERLAPPED **)&slot, INFINITE);
+ for(;;) {
+ ret = GetQueuedCompletionStatus(port, &len, &key,
+ (OVERLAPPED **)&slot, INFINITE);
- /* If shutdown key was received, repost the shutdown message and exit */
- if (ret && (key == IOCP_SHUTDOWN_KEY)) {
- PostQueuedCompletionStatus(completion_port, 0, key, NULL);
- os_thread_exit(NULL);
- }
+ /* If shutdown key was received, repost the shutdown message and exit */
+ if (ret && (key == IOCP_SHUTDOWN_KEY)) {
+ PostQueuedCompletionStatus(port, 0, key, NULL);
+ os_thread_exit(NULL);
+ }
+
+ if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
+ os_thread_exit(NULL);
+ }
- if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
- os_thread_exit(NULL);
+ if(WRITE_SEGMENT(segment)&& slot->type == OS_FILE_READ) {
+ /*
+ Redirect read completions to the dedicated completion port
+ and thread. We need to split read and write threads. If we do not
+ do that, and just allow all io threads process all IO, it is possible
+ to get stuck in a deadlock in buffer pool code,
+
+ Currently, the problem is solved this way - "write io" threads
+ always get all completion notifications, from both async reads and
+ writes. Write completion is handled in the same thread that gets it.
+ Read completion is forwarded via PostQueueCompletionStatus())
+ to the second completion port dedicated solely to reads. One of the
+ "read io" threads waiting on this port will finally handle the IO.
+
+ Forwarding IO completion this way costs a context switch , and this
+ seems tolerable since asynchronous reads are by far less frequent.
+ */
+ ut_a(PostQueuedCompletionStatus(read_completion_port, len, key,
+ &slot->control));
+ }
+ else {
+ break;
+ }
}