summaryrefslogtreecommitdiff
path: root/tpool/aio_liburing.cc
diff options
context:
space:
mode:
authorMarko Mäkelä <marko.makela@mariadb.com>2023-04-18 16:19:39 +0300
committerMarko Mäkelä <marko.makela@mariadb.com>2023-04-18 16:19:39 +0300
commit821e3d52c0e7405aea7054e138604ae1a4e8c921 (patch)
tree1dbf11353bfd7ca04e176bc719e3dcec42eafe11 /tpool/aio_liburing.cc
parent6c196090c8c265bfd93df1e2ee6b18cda2b1d2d8 (diff)
downloadmariadb-git-821e3d52c0e7405aea7054e138604ae1a4e8c921.tar.gz
MDEV-30986 hybrid build with 10.6 InnoDB10.56-MDEV-30986
Diffstat (limited to 'tpool/aio_liburing.cc')
-rw-r--r--tpool/aio_liburing.cc209
1 files changed, 209 insertions, 0 deletions
diff --git a/tpool/aio_liburing.cc b/tpool/aio_liburing.cc
new file mode 100644
index 00000000000..447c2335c74
--- /dev/null
+++ b/tpool/aio_liburing.cc
@@ -0,0 +1,209 @@
+/* Copyright (C) 2021, 2022, MariaDB Corporation.
+
+This program is free software; you can redistribute itand /or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111 - 1301 USA*/
+
+#include "tpool_structs.h"
+#include "tpool.h"
+#include "mysql/service_my_print_error.h"
+#include "mysqld_error.h"
+
+#include <liburing.h>
+
+#include <algorithm>
+#include <vector>
+#include <thread>
+#include <mutex>
+#include <stdexcept>
+
+namespace
+{
+
+class aio_uring final : public tpool::aio
+{
+public:
+ aio_uring(tpool::thread_pool *tpool, int max_aio) : tpool_(tpool)
+ {
+ if (io_uring_queue_init(max_aio, &uring_, 0) != 0)
+ {
+ switch (const auto e= errno) {
+ case ENOMEM:
+ my_printf_error(ER_UNKNOWN_ERROR,
+ "io_uring_queue_init() failed with ENOMEM:"
+ " try larger memory locked limit, ulimit -l"
+ ", or https://mariadb.com/kb/en/systemd/#configuring-limitmemlock"
+ " under systemd"
+#ifdef HAVE_IO_URING_MLOCK_SIZE
+ " (%zd bytes required)", ME_ERROR_LOG | ME_WARNING,
+ io_uring_mlock_size(max_aio, 0));
+#else
+ , ME_ERROR_LOG | ME_WARNING);
+#endif
+ break;
+ case ENOSYS:
+ my_printf_error(ER_UNKNOWN_ERROR,
+ "io_uring_queue_init() failed with ENOSYS:"
+ " check seccomp filters, and the kernel version "
+ "(newer than 5.1 required)",
+ ME_ERROR_LOG | ME_WARNING);
+ break;
+ default:
+ my_printf_error(ER_UNKNOWN_ERROR,
+ "io_uring_queue_init() failed with errno %d",
+ ME_ERROR_LOG | ME_WARNING, e);
+ }
+ throw std::runtime_error("aio_uring()");
+ }
+ if (io_uring_ring_dontfork(&uring_) != 0)
+ {
+ my_printf_error(ER_UNKNOWN_ERROR,
+ "io_uring_dontfork() failed with errno %d (continuing)",
+ ME_ERROR_LOG | ME_WARNING, errno);
+ }
+
+ thread_= std::thread(thread_routine, this);
+ }
+
+ ~aio_uring() noexcept
+ {
+ {
+ std::lock_guard<std::mutex> _(mutex_);
+ io_uring_sqe *sqe= io_uring_get_sqe(&uring_);
+ io_uring_prep_nop(sqe);
+ io_uring_sqe_set_data(sqe, nullptr);
+ auto ret= io_uring_submit(&uring_);
+ if (ret != 1)
+ {
+ my_printf_error(ER_UNKNOWN_ERROR,
+ "io_uring_submit() returned %d during shutdown:"
+ " this may cause a hang\n",
+ ME_ERROR_LOG | ME_FATAL, ret);
+ abort();
+ }
+ }
+ thread_.join();
+ io_uring_queue_exit(&uring_);
+ }
+
+ int submit_io(tpool::aiocb *cb) final
+ {
+ cb->iov_base= cb->m_buffer;
+ cb->iov_len= cb->m_len;
+
+ // The whole operation since io_uring_get_sqe() and till io_uring_submit()
+ // must be atomical. This is because liburing provides thread-unsafe calls.
+ std::lock_guard<std::mutex> _(mutex_);
+
+ io_uring_sqe *sqe= io_uring_get_sqe(&uring_);
+ if (cb->m_opcode == tpool::aio_opcode::AIO_PREAD)
+ io_uring_prep_readv(sqe, cb->m_fh, static_cast<struct iovec *>(cb), 1,
+ cb->m_offset);
+ else
+ io_uring_prep_writev(sqe, cb->m_fh, static_cast<struct iovec *>(cb), 1,
+ cb->m_offset);
+ io_uring_sqe_set_data(sqe, cb);
+
+ return io_uring_submit(&uring_) == 1 ? 0 : -1;
+ }
+
+ int bind(native_file_handle &fd) final
+ {
+ std::lock_guard<std::mutex> _(files_mutex_);
+ auto it= std::lower_bound(files_.begin(), files_.end(), fd);
+ assert(it == files_.end() || *it != fd);
+ files_.insert(it, fd);
+ return io_uring_register_files_update(&uring_, 0, files_.data(),
+ files_.size());
+ }
+
+ int unbind(const native_file_handle &fd) final
+ {
+ std::lock_guard<std::mutex> _(files_mutex_);
+ auto it= std::lower_bound(files_.begin(), files_.end(), fd);
+ assert(*it == fd);
+ files_.erase(it);
+ return io_uring_register_files_update(&uring_, 0, files_.data(),
+ files_.size());
+ }
+
+private:
+ static void thread_routine(aio_uring *aio)
+ {
+ for (;;)
+ {
+ io_uring_cqe *cqe;
+ if (int ret= io_uring_wait_cqe(&aio->uring_, &cqe))
+ {
+ if (ret == -EINTR)
+ continue;
+ my_printf_error(ER_UNKNOWN_ERROR,
+ "io_uring_wait_cqe() returned %d\n",
+ ME_ERROR_LOG | ME_FATAL, ret);
+ abort();
+ }
+
+ auto *iocb= static_cast<tpool::aiocb*>(io_uring_cqe_get_data(cqe));
+ if (!iocb)
+ break; // ~aio_uring() told us to terminate
+
+ int res= cqe->res;
+ if (res < 0)
+ {
+ iocb->m_err= -res;
+ iocb->m_ret_len= 0;
+ }
+ else
+ {
+ iocb->m_err= 0;
+ iocb->m_ret_len= res;
+ }
+
+ io_uring_cqe_seen(&aio->uring_, cqe);
+ finish_synchronous(iocb);
+
+ // If we need to resubmit the IO operation, but the ring is full,
+ // we will follow the same path as for any other error codes.
+ if (res == -EAGAIN && !aio->submit_io(iocb))
+ continue;
+
+ iocb->m_internal_task.m_func= iocb->m_callback;
+ iocb->m_internal_task.m_arg= iocb;
+ iocb->m_internal_task.m_group= iocb->m_group;
+ aio->tpool_->submit_task(&iocb->m_internal_task);
+ }
+ }
+
+ io_uring uring_;
+ std::mutex mutex_;
+ tpool::thread_pool *tpool_;
+ std::thread thread_;
+
+ std::vector<native_file_handle> files_;
+ std::mutex files_mutex_;
+};
+
+} // namespace
+
+namespace tpool
+{
+
+aio *create_linux_aio(thread_pool *pool, int max_aio)
+{
+ try {
+ return new aio_uring(pool, max_aio);
+ } catch (std::runtime_error& error) {
+ return nullptr;
+ }
+}
+
+} // namespace tpool