summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarko Mäkelä <marko.makela@mariadb.com>2021-03-15 11:30:17 +0200
committerMarko Mäkelä <marko.makela@mariadb.com>2021-03-15 11:30:17 +0200
commit783625d78f5072a4f986f079628535dc70d7e99f (patch)
treeb098563f485d62c165e2838e6526447ecc146d13
parent3dfda08702df0a1e2aee5bd62c9e4d95915cd08a (diff)
downloadmariadb-git-783625d78f5072a4f986f079628535dc70d7e99f.tar.gz
MDEV-24883 add io_uring support for tpool
liburing is a new optional dependency (WITH_URING=auto|yes|no) that replaces libaio when it is available. aio_uring: class which wraps io_uring stuff aio_uring::bind()/unbind(): optional optimization aio_uring::submit_io(): mutex prevents data race. liburing calls are thread-unsafe. But if you look into it's implementation you'll see atomic operations. They're used for synchronization between kernel and user-space only. That's why our own synchronization is still needed. For systemd, we add LimitMEMLOCK=524288 (ulimit -l 524288) because the io_uring_setup system call that is invoked by io_uring_queue_init() requests locked memory. The value was found empirically; with 262144, we would occasionally fail to enable io_uring when using the maximum values of innodb_read_io_threads=64 and innodb_write_io_threads=64. aio_uring::thread_routine(): Tolerate -EINTR return from io_uring_wait_cqe(), because it may occur on shutdown on Ubuntu 20.10 (Groovy Gorilla). This was mostly implemented by Eugene Kosov. Systemd integration and improved startup/shutdown error handling by Marko Mäkelä.
-rw-r--r--CMakeLists.txt3
-rw-r--r--cmake/systemd.cmake4
-rw-r--r--cmake/uring.cmake20
-rw-r--r--extra/mariabackup/xtrabackup.cc5
-rwxr-xr-xmysql-test/mysql-test-run.pl4
-rw-r--r--storage/innobase/handler/ha_innodb.cc3
-rw-r--r--storage/innobase/innodb.cmake14
-rw-r--r--storage/innobase/os/os0file.cc11
-rw-r--r--storage/innobase/srv/srv0start.cc5
-rw-r--r--support-files/CMakeLists.txt2
-rw-r--r--support-files/mariadb.service.in2
-rw-r--r--support-files/mariadb@.service.in2
-rw-r--r--tpool/CMakeLists.txt11
-rw-r--r--tpool/aio_liburing.cc185
-rw-r--r--tpool/aio_linux.cc6
-rw-r--r--tpool/tpool.h5
-rw-r--r--tpool/tpool_generic.cc4
17 files changed, 263 insertions, 23 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 36922e04368..13dce3cafc4 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -174,6 +174,7 @@ INCLUDE(mysql_add_executable)
INCLUDE(symlinks)
INCLUDE(compile_flags)
INCLUDE(pmem)
+INCLUDE(uring)
# Handle options
OPTION(DISABLE_SHARED
@@ -394,7 +395,7 @@ MYSQL_CHECK_READLINE()
SET(MALLOC_LIBRARY "system")
CHECK_PCRE()
-
+CHECK_URING()
CHECK_SYSTEMD()
IF(CMAKE_CROSSCOMPILING)
diff --git a/cmake/systemd.cmake b/cmake/systemd.cmake
index e353004e7d2..72cb77b4a6a 100644
--- a/cmake/systemd.cmake
+++ b/cmake/systemd.cmake
@@ -49,6 +49,10 @@ MACRO(CHECK_SYSTEMD)
SET(SYSTEMD_EXECSTARTPRE "ExecStartPre=/usr/bin/install -m 755 -o mysql -g root -d /var/run/mysqld")
SET(SYSTEMD_EXECSTARTPOST "ExecStartPost=/etc/mysql/debian-start")
ENDIF()
+ IF(LIBURING AND HAVE_LIBURING_H AND NOT WITH_URING STREQUAL "no")
+ SET(SYSTEMD_LIMIT "# For liburing and io_uring_setup()
+LimitMEMLOCK=524288")
+ ENDIF()
MESSAGE_ONCE(systemd "Systemd features enabled")
ELSE()
UNSET(LIBSYSTEMD)
diff --git a/cmake/uring.cmake b/cmake/uring.cmake
new file mode 100644
index 00000000000..ac76a6d2048
--- /dev/null
+++ b/cmake/uring.cmake
@@ -0,0 +1,20 @@
+MACRO(CHECK_URING)
+ IF(CMAKE_SYSTEM_NAME MATCHES "Linux")
+ INCLUDE(CheckIncludeFiles)
+ SET(WITH_URING "auto" CACHE STRING "Enable liburing usage")
+ IF(WITH_URING STREQUAL "yes" OR WITH_URING STREQUAL "auto")
+ FIND_LIBRARY(LIBURING uring)
+ CHECK_INCLUDE_FILES(liburing.h HAVE_LIBURING_H)
+ IF (LIBURING AND HAVE_LIBURING_H)
+ ADD_DEFINITIONS(-DHAVE_URING)
+ LINK_LIBRARIES(uring)
+ ELSE()
+ IF(WITH_URING STREQUAL "yes")
+ MESSAGE(FATAL_ERROR "Requested WITH_URING=yes but liburing was not found")
+ ENDIF()
+ ENDIF()
+ ELSEIF(NOT WITH_URING STREQUAL "no")
+ MESSAGE(FATAL_ERROR "Invalid value for WITH_URING. Must be 'yes', 'no', or 'auto'.")
+ ENDIF()
+ ENDIF()
+ENDMACRO()
diff --git a/extra/mariabackup/xtrabackup.cc b/extra/mariabackup/xtrabackup.cc
index 07b34388434..63c0313be9c 100644
--- a/extra/mariabackup/xtrabackup.cc
+++ b/extra/mariabackup/xtrabackup.cc
@@ -2141,6 +2141,11 @@ static bool innodb_init_param()
if (srv_use_native_aio) {
msg("InnoDB: Using Linux native AIO");
}
+#elif defined(HAVE_URING)
+
+ if (srv_use_native_aio) {
+ msg("InnoDB: Using liburing");
+ }
#else
/* Currently native AIO is supported only on windows and linux
and that also when the support is compiled in. In all other
diff --git a/mysql-test/mysql-test-run.pl b/mysql-test/mysql-test-run.pl
index fd0e5ffd12a..bda7750a598 100755
--- a/mysql-test/mysql-test-run.pl
+++ b/mysql-test/mysql-test-run.pl
@@ -2,7 +2,7 @@
# -*- cperl -*-
# Copyright (c) 2004, 2014, Oracle and/or its affiliates.
-# Copyright (c) 2009, 2020, MariaDB Corporation
+# Copyright (c) 2009, 2021, MariaDB Corporation
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -4376,6 +4376,8 @@ sub extract_warning_lines ($$) {
qr|Linux Native AIO|, # warning that aio does not work on /dev/shm
qr|InnoDB: io_setup\(\) attempt|,
qr|InnoDB: io_setup\(\) failed with EAGAIN|,
+ qr|io_uring_queue_init\(\) failed with|,
+ qr|InnoDB: liburing disabled|,
qr|setrlimit could not change the size of core files to 'infinity';|,
qr|feedback plugin: failed to retrieve the MAC address|,
qr|Plugin 'FEEDBACK' init function returned error|,
diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
index 401c1c4bb39..0230b6663ee 100644
--- a/storage/innobase/handler/ha_innodb.cc
+++ b/storage/innobase/handler/ha_innodb.cc
@@ -3543,8 +3543,7 @@ static int innodb_init_params()
srv_use_doublewrite_buf = FALSE;
}
-#ifdef LINUX_NATIVE_AIO
-#elif !defined _WIN32
+#if !defined LINUX_NATIVE_AIO && !defined HAVE_URING && !defined _WIN32
/* Currently native AIO is supported only on windows and linux
and that also when the support is compiled in. In all other
cases, we ignore the setting of innodb_use_native_aio. */
diff --git a/storage/innobase/innodb.cmake b/storage/innobase/innodb.cmake
index 5c7784091bd..7461da4c575 100644
--- a/storage/innobase/innodb.cmake
+++ b/storage/innobase/innodb.cmake
@@ -56,12 +56,14 @@ IF(UNIX)
ADD_DEFINITIONS("-DUNIV_LINUX -D_GNU_SOURCE=1")
- CHECK_INCLUDE_FILES (libaio.h HAVE_LIBAIO_H)
- CHECK_LIBRARY_EXISTS(aio io_queue_init "" HAVE_LIBAIO)
-
- IF(HAVE_LIBAIO_H AND HAVE_LIBAIO)
- ADD_DEFINITIONS(-DLINUX_NATIVE_AIO=1)
- LINK_LIBRARIES(aio)
+ IF (NOT LIBURING)
+ CHECK_INCLUDE_FILES (libaio.h HAVE_LIBAIO_H)
+ CHECK_LIBRARY_EXISTS(aio io_queue_init "" HAVE_LIBAIO)
+
+ IF(HAVE_LIBAIO_H AND HAVE_LIBAIO)
+ ADD_DEFINITIONS(-DLINUX_NATIVE_AIO=1)
+ LINK_LIBRARIES(aio)
+ ENDIF()
ENDIF()
IF(HAVE_LIBNUMA)
LINK_LIBRARIES(numa)
diff --git a/storage/innobase/os/os0file.cc b/storage/innobase/os/os0file.cc
index 9a11c91cd15..605c77b577e 100644
--- a/storage/innobase/os/os0file.cc
+++ b/storage/innobase/os/os0file.cc
@@ -4024,6 +4024,17 @@ disable:
}
#endif
+#ifdef HAVE_URING
+ if (ret)
+ {
+ ut_ad(srv_use_native_aio);
+ ib::warn()
+ << "liburing disabled: falling back to innodb_use_native_aio=OFF";
+ srv_use_native_aio= false;
+ ret= srv_thread_pool->configure_aio(false, max_events);
+ }
+#endif
+
if (!ret)
{
read_slots= new io_slots(max_read_events, srv_n_read_io_threads);
diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc
index 0995a57b9ce..120fb4f3ac8 100644
--- a/storage/innobase/srv/srv0start.cc
+++ b/storage/innobase/srv/srv0start.cc
@@ -1193,6 +1193,11 @@ dberr_t srv_start(bool create_new_db)
ib::info() << "Using Linux native AIO";
}
#endif
+#ifdef HAVE_URING
+ if (srv_use_native_aio) {
+ ib::info() << "Using liburing";
+ }
+#endif
fil_system.create(srv_file_per_table ? 50000 : 5000);
diff --git a/support-files/CMakeLists.txt b/support-files/CMakeLists.txt
index 0e5a62a9514..7fac042dca6 100644
--- a/support-files/CMakeLists.txt
+++ b/support-files/CMakeLists.txt
@@ -1,5 +1,5 @@
# Copyright (c) 2006, 2016, Oracle and/or its affiliates.
-# Copyright (c) 2012, 2017, MariaDB
+# Copyright (c) 2012, 2021, MariaDB
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
diff --git a/support-files/mariadb.service.in b/support-files/mariadb.service.in
index 1f7f20fb381..99aebd9a3d7 100644
--- a/support-files/mariadb.service.in
+++ b/support-files/mariadb.service.in
@@ -144,7 +144,7 @@ TimeoutStopSec=900
# Number of files limit. previously [mysqld_safe] open-files-limit
LimitNOFILE=16384
-
+@SYSTEMD_LIMIT@
# Maximium core size. previously [mysqld_safe] core-file-size
# LimitCore=
diff --git a/support-files/mariadb@.service.in b/support-files/mariadb@.service.in
index 14749092103..60ed4b74aa1 100644
--- a/support-files/mariadb@.service.in
+++ b/support-files/mariadb@.service.in
@@ -269,7 +269,7 @@ Group=mysql
# Number of files limit. previously [mysqld_safe] open-files-limit
LimitNOFILE=16384
-
+@SYSTEMD_LIMIT@
# Maximium core size. previously [mysqld_safe] core-file-size
# LimitCore=
diff --git a/tpool/CMakeLists.txt b/tpool/CMakeLists.txt
index 3e3f8e0b42a..239745ab90c 100644
--- a/tpool/CMakeLists.txt
+++ b/tpool/CMakeLists.txt
@@ -1,16 +1,19 @@
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
IF(WIN32)
SET(EXTRA_SOURCES tpool_win.cc aio_win.cc)
-ELSE()
- SET(EXTRA_SOURCES aio_linux.cc)
ENDIF()
-IF(CMAKE_SYSTEM_NAME STREQUAL "Linux")
+IF(CMAKE_SYSTEM_NAME STREQUAL "Linux" AND LIBURING)
+ SET(EXTRA_SOURCES aio_liburing.cc)
+ENDIF()
+
+IF(CMAKE_SYSTEM_NAME STREQUAL "Linux" AND NOT LIBURING)
CHECK_INCLUDE_FILES (libaio.h HAVE_LIBAIO_H)
CHECK_LIBRARY_EXISTS(aio io_queue_init "" HAVE_LIBAIO)
IF(HAVE_LIBAIO_H AND HAVE_LIBAIO)
ADD_DEFINITIONS(-DLINUX_NATIVE_AIO=1)
LINK_LIBRARIES(aio)
+ SET(EXTRA_SOURCES aio_linux.cc)
ENDIF()
ENDIF()
@@ -26,4 +29,4 @@ ADD_LIBRARY(tpool STATIC
${EXTRA_SOURCES}
)
-INCLUDE_DIRECTORIES(${PROJECT_SOURCE_DIR}/include) \ No newline at end of file
+INCLUDE_DIRECTORIES(${PROJECT_SOURCE_DIR}/include)
diff --git a/tpool/aio_liburing.cc b/tpool/aio_liburing.cc
new file mode 100644
index 00000000000..14219f1d499
--- /dev/null
+++ b/tpool/aio_liburing.cc
@@ -0,0 +1,185 @@
+/* Copyright (C) 2021, MariaDB Corporation.
+
+This program is free software; you can redistribute itand /or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111 - 1301 USA*/
+
+#include "tpool_structs.h"
+#include "tpool.h"
+#include "mysql/service_my_print_error.h"
+#include "mysqld_error.h"
+
+#include <liburing.h>
+
+#include <algorithm>
+#include <vector>
+#include <thread>
+#include <mutex>
+
+namespace
+{
+
+class aio_uring final : public tpool::aio
+{
+public:
+ aio_uring(tpool::thread_pool *tpool, int max_aio) : tpool_(tpool)
+ {
+ if (io_uring_queue_init(max_aio, &uring_, 0) != 0)
+ {
+ switch (const auto e= errno) {
+ case ENOMEM:
+ case ENOSYS:
+ my_printf_error(ER_UNKNOWN_ERROR, e == ENOMEM
+ ? "io_uring_queue_init() failed with ENOMEM:"
+ " try larger ulimit -l\n"
+ : "io_uring_queue_init() failed with ENOSYS:"
+ " try uprading the kernel\n",
+ ME_ERROR_LOG | ME_WARNING);
+ break;
+ default:
+ my_printf_error(ER_UNKNOWN_ERROR,
+ "io_uring_queue_init() failed with errno %d\n",
+ ME_ERROR_LOG | ME_WARNING, e);
+ }
+ throw std::runtime_error("aio_uring()");
+ }
+
+ thread_= std::thread(thread_routine, this);
+ }
+
+ ~aio_uring() noexcept
+ {
+ {
+ std::lock_guard<std::mutex> _(mutex_);
+ io_uring_sqe *sqe= io_uring_get_sqe(&uring_);
+ io_uring_prep_nop(sqe);
+ io_uring_sqe_set_data(sqe, nullptr);
+ auto ret= io_uring_submit(&uring_);
+ if (ret != 1)
+ {
+ my_printf_error(ER_UNKNOWN_ERROR,
+ "io_uring_submit() returned %d during shutdown:"
+ " this may cause a hang\n",
+ ME_ERROR_LOG | ME_FATAL, ret);
+ abort();
+ }
+ }
+ thread_.join();
+ io_uring_queue_exit(&uring_);
+ }
+
+ int submit_io(tpool::aiocb *cb) final
+ {
+ cb->iov_base= cb->m_buffer;
+ cb->iov_len= cb->m_len;
+
+ // The whole operation since io_uring_get_sqe() and till io_uring_submit()
+ // must be atomical. This is because liburing provides thread-unsafe calls.
+ std::lock_guard<std::mutex> _(mutex_);
+
+ io_uring_sqe *sqe= io_uring_get_sqe(&uring_);
+ if (cb->m_opcode == tpool::aio_opcode::AIO_PREAD)
+ io_uring_prep_readv(sqe, cb->m_fh, static_cast<struct iovec *>(cb), 1,
+ cb->m_offset);
+ else
+ io_uring_prep_writev(sqe, cb->m_fh, static_cast<struct iovec *>(cb), 1,
+ cb->m_offset);
+ io_uring_sqe_set_data(sqe, cb);
+
+ return io_uring_submit(&uring_) == 1 ? 0 : -1;
+ }
+
+ int bind(native_file_handle &fd) final
+ {
+ std::lock_guard<std::mutex> _(files_mutex_);
+ auto it= std::lower_bound(files_.begin(), files_.end(), fd);
+ assert(it == files_.end() || *it != fd);
+ files_.insert(it, fd);
+ return io_uring_register_files_update(&uring_, 0, files_.data(),
+ files_.size());
+ }
+
+ int unbind(const native_file_handle &fd) final
+ {
+ std::lock_guard<std::mutex> _(files_mutex_);
+ auto it= std::lower_bound(files_.begin(), files_.end(), fd);
+ assert(*it == fd);
+ files_.erase(it);
+ return io_uring_register_files_update(&uring_, 0, files_.data(),
+ files_.size());
+ }
+
+private:
+ static void thread_routine(aio_uring *aio)
+ {
+ for (;;)
+ {
+ io_uring_cqe *cqe;
+ if (int ret= io_uring_wait_cqe(&aio->uring_, &cqe))
+ {
+ if (ret == -EINTR) // this may occur during shutdown
+ break;
+ my_printf_error(ER_UNKNOWN_ERROR,
+ "io_uring_wait_cqe() returned %d\n",
+ ME_ERROR_LOG | ME_FATAL, ret);
+ abort();
+ }
+
+ auto *iocb= static_cast<tpool::aiocb*>(io_uring_cqe_get_data(cqe));
+ if (!iocb)
+ break;
+
+ int res= cqe->res;
+ if (res < 0)
+ {
+ iocb->m_err= -res;
+ iocb->m_ret_len= 0;
+ }
+ else
+ {
+ iocb->m_err= 0;
+ iocb->m_ret_len= res;
+ }
+
+ io_uring_cqe_seen(&aio->uring_, cqe);
+
+ iocb->m_internal_task.m_func= iocb->m_callback;
+ iocb->m_internal_task.m_arg= iocb;
+ iocb->m_internal_task.m_group= iocb->m_group;
+ aio->tpool_->submit_task(&iocb->m_internal_task);
+ }
+ }
+
+ io_uring uring_;
+ std::mutex mutex_;
+ tpool::thread_pool *tpool_;
+ std::thread thread_;
+
+ std::vector<native_file_handle> files_;
+ std::mutex files_mutex_;
+};
+
+} // namespace
+
+namespace tpool
+{
+
+aio *create_linux_aio(thread_pool *pool, int max_aio)
+{
+ try {
+ return new aio_uring(pool, max_aio);
+ } catch (std::runtime_error& error) {
+ return nullptr;
+ }
+}
+
+} // namespace tpool
diff --git a/tpool/aio_linux.cc b/tpool/aio_linux.cc
index d9aa8be2347..4abc2139881 100644
--- a/tpool/aio_linux.cc
+++ b/tpool/aio_linux.cc
@@ -16,7 +16,6 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111 - 1301 USA*/
#include "tpool_structs.h"
#include "tpool.h"
-#ifdef LINUX_NATIVE_AIO
# include <thread>
# include <atomic>
# include <libaio.h>
@@ -69,7 +68,6 @@ static int my_getevents(io_context_t ctx, long min_nr, long nr, io_event *ev)
}
return ret;
}
-#endif
/*
@@ -84,7 +82,6 @@ static int my_getevents(io_context_t ctx, long min_nr, long nr, io_event *ev)
*/
namespace tpool
{
-#ifdef LINUX_NATIVE_AIO
class aio_linux final : public aio
{
@@ -187,7 +184,4 @@ aio *create_linux_aio(thread_pool *pool, int max_io)
}
return new aio_linux(ctx, pool);
}
-#else
-aio *create_linux_aio(thread_pool*, int) { return nullptr; }
-#endif
}
diff --git a/tpool/tpool.h b/tpool/tpool.h
index 3a5658c0f36..d33c0608959 100644
--- a/tpool/tpool.h
+++ b/tpool/tpool.h
@@ -22,6 +22,9 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111 - 1301 USA*/
#ifdef LINUX_NATIVE_AIO
#include <libaio.h>
#endif
+#ifdef HAVE_URING
+#include <sys/uio.h>
+#endif
#ifdef _WIN32
#ifndef NOMINMAX
#define NOMINMAX
@@ -123,6 +126,8 @@ struct aiocb
:OVERLAPPED
#elif defined LINUX_NATIVE_AIO
:iocb
+#elif defined HAVE_URING
+ :iovec
#endif
{
native_file_handle m_fh;
diff --git a/tpool/tpool_generic.cc b/tpool/tpool_generic.cc
index 7c645b09785..0c769d67c99 100644
--- a/tpool/tpool_generic.cc
+++ b/tpool/tpool_generic.cc
@@ -38,7 +38,11 @@ namespace tpool
{
#ifdef __linux__
+#if defined(HAVE_URING) || defined(LINUX_NATIVE_AIO)
extern aio* create_linux_aio(thread_pool* tp, int max_io);
+#else
+ aio *create_linux_aio(thread_pool *, int) { return nullptr; };
+#endif
#endif
#ifdef _WIN32
extern aio* create_win_aio(thread_pool* tp, int max_io);