summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarko Mäkelä <marko.makela@mariadb.com>2020-11-25 09:40:12 +0200
committerMarko Mäkelä <marko.makela@mariadb.com>2020-11-25 09:40:12 +0200
commit7a9405e3dc8741d50658e976b7e16c1807c2b9a7 (patch)
treec6ad0dc6cd195ffe0e0ae36de5ca6533acb6e8c0
parent1b12e251cdc692e4ce2dd130ea45e7b17a9ea5e1 (diff)
downloadmariadb-git-7a9405e3dc8741d50658e976b7e16c1807c2b9a7.tar.gz
MDEV-24270 Misuse of io_getevents() causes wake-ups at least twice per second
In the asynchronous I/O interface, InnoDB is invoking io_getevents() with a timeout value of half a second, and requesting exactly 1 event at a time. The reason to have such a short timeout is to facilitate shutdown. We can do better: Use an infinite timeout, wait for a larger maximum number of events. On shutdown, we will invoke io_destroy(), which should lead to the io_getevents system call reporting EINVAL. my_getevents(): Reimplement the libaio io_getevents() by only invoking the system call. The library implementation would try to elide the system call and return 0 immediately if aio_ring_is_empty() holds. Here, we do want a blocking system call, not 100% CPU usage. Neither do we want the aio_ring_is_empty() trigger SIGSEGV because it is dereferencing some memory that was freed by io_destroy().
-rw-r--r--tpool/aio_linux.cc161
1 files changed, 79 insertions, 82 deletions
diff --git a/tpool/aio_linux.cc b/tpool/aio_linux.cc
index 24bc04c75ba..51b656a604b 100644
--- a/tpool/aio_linux.cc
+++ b/tpool/aio_linux.cc
@@ -1,4 +1,4 @@
-/* Copyright(C) 2019 MariaDB Corporation.
+/* Copyright (C) 2019, 2020, MariaDB Corporation.
This program is free software; you can redistribute itand /or modify
it under the terms of the GNU General Public License as published by
@@ -14,133 +14,133 @@ along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111 - 1301 USA*/
#include "tpool_structs.h"
-
-#include <stdlib.h>
-#include <signal.h>
-#include <assert.h>
#include "tpool.h"
-#include <thread>
+
#ifdef LINUX_NATIVE_AIO
-#include <libaio.h>
+# include <thread>
+# include <atomic>
+# include <libaio.h>
+# include <sys/syscall.h>
+
+/** A simpler alternative to io_getevents(), without
+aio_ring_is_empty() that may trigger SIGSEGV */
+static int my_getevents(io_context_t ctx, long min_nr, long nr, io_event *ev)
+{
+ int saved_errno= errno;
+ int ret= syscall(__NR_io_getevents, reinterpret_cast<long>(ctx),
+ min_nr, nr, ev, 0);
+ if (ret < 0)
+ {
+ ret= -errno;
+ errno= saved_errno;
+ }
+ return ret;
+}
#endif
/*
Linux AIO implementation, based on native AIO.
Needs libaio.h and -laio at the compile time.
- submit_io() is used to submit async IO.
+ io_submit() is used to submit async IO.
- There is a single thread, that collects the completion notification
- with io_getevent(), and forwards io completion callback
+ A single thread will collect the completion notification
+ with io_getevents() and forward io completion callback to
the worker threadpool.
*/
namespace tpool
{
#ifdef LINUX_NATIVE_AIO
-class aio_linux : public aio
+class aio_linux final : public aio
{
- thread_pool* m_pool;
+ thread_pool *m_pool;
io_context_t m_io_ctx;
- bool m_in_shutdown;
std::thread m_getevent_thread;
+ static std::atomic<bool> shutdown_in_progress;
- static void getevent_thread_routine(aio_linux* aio)
+ static void getevent_thread_routine(aio_linux *aio)
{
+ io_event events[1];
for (;;)
{
- io_event event;
- struct timespec ts{0, 500000000};
- int ret = io_getevents(aio->m_io_ctx, 1, 1, &event, &ts);
-
- if (aio->m_in_shutdown)
- break;
-
- if (ret > 0)
- {
- aiocb* iocb = (aiocb*)event.obj;
- long long res = event.res;
- if (res < 0)
+ switch (int ret= my_getevents(aio->m_io_ctx, 1, 1, events)) {
+ case -EINTR:
+ case 0:
+ continue;
+ case -EINVAL:
+ if (shutdown_in_progress)
+ return;
+ /* fall through */
+ default:
+ if (ret != 1)
{
- iocb->m_err = static_cast<int>(-res);
- iocb->m_ret_len = 0;
+ fprintf(stderr, "io_getevents returned %d\n", ret);
+ abort();
+ return;
}
else
{
- iocb->m_ret_len = ret;
- iocb->m_err = 0;
+ const io_event &event= events[0];
+ aiocb *iocb= static_cast<aiocb*>(event.obj);
+ if (static_cast<int>(event.res) < 0)
+ {
+ iocb->m_err= -event.res;
+ iocb->m_ret_len= 0;
+ }
+ else
+ {
+ iocb->m_ret_len= event.res;
+ iocb->m_err= 0;
+ }
+ iocb->m_internal_task.m_func= iocb->m_callback;
+ iocb->m_internal_task.m_arg= iocb;
+ iocb->m_internal_task.m_group= iocb->m_group;
+ aio->m_pool->submit_task(&iocb->m_internal_task);
}
-
- iocb->m_internal_task.m_func = iocb->m_callback;
- iocb->m_internal_task.m_arg = iocb;
- iocb->m_internal_task.m_group = iocb->m_group;
- aio->m_pool->submit_task(&iocb->m_internal_task);
- continue;
- }
- switch (ret)
- {
- case -EAGAIN:
- usleep(1000);
- continue;
- case -EINTR:
- case 0:
- continue;
- default:
- fprintf(stderr, "io_getevents returned %d\n", ret);
- abort();
}
}
}
public:
- aio_linux(io_context_t ctx, thread_pool* pool)
+ aio_linux(io_context_t ctx, thread_pool *pool)
: m_pool(pool), m_io_ctx(ctx),
- m_in_shutdown(), m_getevent_thread(getevent_thread_routine, this)
+ m_getevent_thread(getevent_thread_routine, this)
{
}
~aio_linux()
{
- m_in_shutdown = true;
- m_getevent_thread.join();
+ shutdown_in_progress= true;
io_destroy(m_io_ctx);
+ m_getevent_thread.join();
+ shutdown_in_progress= false;
}
- // Inherited via aio
- virtual int submit_io(aiocb* cb) override
+ int submit_io(aiocb *cb) override
{
-
- if (cb->m_opcode == aio_opcode::AIO_PREAD)
- io_prep_pread((iocb *)cb, cb->m_fh, cb->m_buffer, cb->m_len,
- cb->m_offset);
- else
- io_prep_pwrite((iocb *)cb, cb->m_fh, cb->m_buffer, cb->m_len,
- cb->m_offset);
-
- int ret;
- ret = io_submit(m_io_ctx, 1, (iocb * *)& cb);
+ io_prep_pread(static_cast<iocb*>(cb), cb->m_fh, cb->m_buffer, cb->m_len,
+ cb->m_offset);
+ if (cb->m_opcode != aio_opcode::AIO_PREAD)
+ cb->aio_lio_opcode= IO_CMD_PWRITE;
+ iocb *icb= static_cast<iocb*>(cb);
+ int ret= io_submit(m_io_ctx, 1, &icb);
if (ret == 1)
return 0;
- errno = -ret;
+ errno= -ret;
return -1;
}
- // Inherited via aio
- virtual int bind(native_file_handle& fd) override
- {
- return 0;
- }
- virtual int unbind(const native_file_handle& fd) override
- {
- return 0;
- }
+ int bind(native_file_handle&) override { return 0; }
+ int unbind(const native_file_handle&) override { return 0; }
};
-aio* create_linux_aio(thread_pool* pool, int max_io)
+std::atomic<bool> aio_linux::shutdown_in_progress;
+
+aio *create_linux_aio(thread_pool *pool, int max_io)
{
io_context_t ctx;
- memset(&ctx, 0, sizeof(ctx));
- int ret = io_setup(max_io, &ctx);
- if (ret)
+ memset(&ctx, 0, sizeof ctx);
+ if (int ret= io_setup(max_io, &ctx))
{
fprintf(stderr, "io_setup(%d) returned %d\n", max_io, ret);
return nullptr;
@@ -148,9 +148,6 @@ aio* create_linux_aio(thread_pool* pool, int max_io)
return new aio_linux(ctx, pool);
}
#else
-aio* create_linux_aio(thread_pool* pool, int max_aio)
-{
- return nullptr;
-}
+aio *create_linux_aio(thread_pool*, int) { return nullptr; }
#endif
}