diff options
author | Marko Mäkelä <marko.makela@mariadb.com> | 2021-03-15 11:30:17 +0200 |
---|---|---|
committer | Marko Mäkelä <marko.makela@mariadb.com> | 2021-03-15 11:30:17 +0200 |
commit | 783625d78f5072a4f986f079628535dc70d7e99f (patch) | |
tree | b098563f485d62c165e2838e6526447ecc146d13 | |
parent | 3dfda08702df0a1e2aee5bd62c9e4d95915cd08a (diff) | |
download | mariadb-git-783625d78f5072a4f986f079628535dc70d7e99f.tar.gz |
MDEV-24883 add io_uring support for tpool
liburing is a new optional dependency (WITH_URING=auto|yes|no)
that replaces libaio when it is available.
aio_uring: class which wraps io_uring stuff
aio_uring::bind()/unbind(): optional optimization
aio_uring::submit_io(): mutex prevents data race. liburing calls are
thread-unsafe. But if you look into it's implementation you'll see
atomic operations. They're used for synchronization between kernel and
user-space only. That's why our own synchronization is still needed.
For systemd, we add LimitMEMLOCK=524288 (ulimit -l 524288)
because the io_uring_setup system call that is invoked
by io_uring_queue_init() requests locked memory. The value
was found empirically; with 262144, we would occasionally
fail to enable io_uring when using the maximum values of
innodb_read_io_threads=64 and innodb_write_io_threads=64.
aio_uring::thread_routine(): Tolerate -EINTR return from
io_uring_wait_cqe(), because it may occur on shutdown
on Ubuntu 20.10 (Groovy Gorilla).
This was mostly implemented by Eugene Kosov. Systemd integration
and improved startup/shutdown error handling by Marko Mäkelä.
-rw-r--r-- | CMakeLists.txt | 3 | ||||
-rw-r--r-- | cmake/systemd.cmake | 4 | ||||
-rw-r--r-- | cmake/uring.cmake | 20 | ||||
-rw-r--r-- | extra/mariabackup/xtrabackup.cc | 5 | ||||
-rwxr-xr-x | mysql-test/mysql-test-run.pl | 4 | ||||
-rw-r--r-- | storage/innobase/handler/ha_innodb.cc | 3 | ||||
-rw-r--r-- | storage/innobase/innodb.cmake | 14 | ||||
-rw-r--r-- | storage/innobase/os/os0file.cc | 11 | ||||
-rw-r--r-- | storage/innobase/srv/srv0start.cc | 5 | ||||
-rw-r--r-- | support-files/CMakeLists.txt | 2 | ||||
-rw-r--r-- | support-files/mariadb.service.in | 2 | ||||
-rw-r--r-- | support-files/mariadb@.service.in | 2 | ||||
-rw-r--r-- | tpool/CMakeLists.txt | 11 | ||||
-rw-r--r-- | tpool/aio_liburing.cc | 185 | ||||
-rw-r--r-- | tpool/aio_linux.cc | 6 | ||||
-rw-r--r-- | tpool/tpool.h | 5 | ||||
-rw-r--r-- | tpool/tpool_generic.cc | 4 |
17 files changed, 263 insertions, 23 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 36922e04368..13dce3cafc4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -174,6 +174,7 @@ INCLUDE(mysql_add_executable) INCLUDE(symlinks) INCLUDE(compile_flags) INCLUDE(pmem) +INCLUDE(uring) # Handle options OPTION(DISABLE_SHARED @@ -394,7 +395,7 @@ MYSQL_CHECK_READLINE() SET(MALLOC_LIBRARY "system") CHECK_PCRE() - +CHECK_URING() CHECK_SYSTEMD() IF(CMAKE_CROSSCOMPILING) diff --git a/cmake/systemd.cmake b/cmake/systemd.cmake index e353004e7d2..72cb77b4a6a 100644 --- a/cmake/systemd.cmake +++ b/cmake/systemd.cmake @@ -49,6 +49,10 @@ MACRO(CHECK_SYSTEMD) SET(SYSTEMD_EXECSTARTPRE "ExecStartPre=/usr/bin/install -m 755 -o mysql -g root -d /var/run/mysqld") SET(SYSTEMD_EXECSTARTPOST "ExecStartPost=/etc/mysql/debian-start") ENDIF() + IF(LIBURING AND HAVE_LIBURING_H AND NOT WITH_URING STREQUAL "no") + SET(SYSTEMD_LIMIT "# For liburing and io_uring_setup() +LimitMEMLOCK=524288") + ENDIF() MESSAGE_ONCE(systemd "Systemd features enabled") ELSE() UNSET(LIBSYSTEMD) diff --git a/cmake/uring.cmake b/cmake/uring.cmake new file mode 100644 index 00000000000..ac76a6d2048 --- /dev/null +++ b/cmake/uring.cmake @@ -0,0 +1,20 @@ +MACRO(CHECK_URING) + IF(CMAKE_SYSTEM_NAME MATCHES "Linux") + INCLUDE(CheckIncludeFiles) + SET(WITH_URING "auto" CACHE STRING "Enable liburing usage") + IF(WITH_URING STREQUAL "yes" OR WITH_URING STREQUAL "auto") + FIND_LIBRARY(LIBURING uring) + CHECK_INCLUDE_FILES(liburing.h HAVE_LIBURING_H) + IF (LIBURING AND HAVE_LIBURING_H) + ADD_DEFINITIONS(-DHAVE_URING) + LINK_LIBRARIES(uring) + ELSE() + IF(WITH_URING STREQUAL "yes") + MESSAGE(FATAL_ERROR "Requested WITH_URING=yes but liburing was not found") + ENDIF() + ENDIF() + ELSEIF(NOT WITH_URING STREQUAL "no") + MESSAGE(FATAL_ERROR "Invalid value for WITH_URING. Must be 'yes', 'no', or 'auto'.") + ENDIF() + ENDIF() +ENDMACRO() diff --git a/extra/mariabackup/xtrabackup.cc b/extra/mariabackup/xtrabackup.cc index 07b34388434..63c0313be9c 100644 --- a/extra/mariabackup/xtrabackup.cc +++ b/extra/mariabackup/xtrabackup.cc @@ -2141,6 +2141,11 @@ static bool innodb_init_param() if (srv_use_native_aio) { msg("InnoDB: Using Linux native AIO"); } +#elif defined(HAVE_URING) + + if (srv_use_native_aio) { + msg("InnoDB: Using liburing"); + } #else /* Currently native AIO is supported only on windows and linux and that also when the support is compiled in. In all other diff --git a/mysql-test/mysql-test-run.pl b/mysql-test/mysql-test-run.pl index fd0e5ffd12a..bda7750a598 100755 --- a/mysql-test/mysql-test-run.pl +++ b/mysql-test/mysql-test-run.pl @@ -2,7 +2,7 @@ # -*- cperl -*- # Copyright (c) 2004, 2014, Oracle and/or its affiliates. -# Copyright (c) 2009, 2020, MariaDB Corporation +# Copyright (c) 2009, 2021, MariaDB Corporation # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -4376,6 +4376,8 @@ sub extract_warning_lines ($$) { qr|Linux Native AIO|, # warning that aio does not work on /dev/shm qr|InnoDB: io_setup\(\) attempt|, qr|InnoDB: io_setup\(\) failed with EAGAIN|, + qr|io_uring_queue_init\(\) failed with|, + qr|InnoDB: liburing disabled|, qr|setrlimit could not change the size of core files to 'infinity';|, qr|feedback plugin: failed to retrieve the MAC address|, qr|Plugin 'FEEDBACK' init function returned error|, diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 401c1c4bb39..0230b6663ee 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -3543,8 +3543,7 @@ static int innodb_init_params() srv_use_doublewrite_buf = FALSE; } -#ifdef LINUX_NATIVE_AIO -#elif !defined _WIN32 +#if !defined LINUX_NATIVE_AIO && !defined HAVE_URING && !defined _WIN32 /* Currently native AIO is supported only on windows and linux and that also when the support is compiled in. In all other cases, we ignore the setting of innodb_use_native_aio. */ diff --git a/storage/innobase/innodb.cmake b/storage/innobase/innodb.cmake index 5c7784091bd..7461da4c575 100644 --- a/storage/innobase/innodb.cmake +++ b/storage/innobase/innodb.cmake @@ -56,12 +56,14 @@ IF(UNIX) ADD_DEFINITIONS("-DUNIV_LINUX -D_GNU_SOURCE=1") - CHECK_INCLUDE_FILES (libaio.h HAVE_LIBAIO_H) - CHECK_LIBRARY_EXISTS(aio io_queue_init "" HAVE_LIBAIO) - - IF(HAVE_LIBAIO_H AND HAVE_LIBAIO) - ADD_DEFINITIONS(-DLINUX_NATIVE_AIO=1) - LINK_LIBRARIES(aio) + IF (NOT LIBURING) + CHECK_INCLUDE_FILES (libaio.h HAVE_LIBAIO_H) + CHECK_LIBRARY_EXISTS(aio io_queue_init "" HAVE_LIBAIO) + + IF(HAVE_LIBAIO_H AND HAVE_LIBAIO) + ADD_DEFINITIONS(-DLINUX_NATIVE_AIO=1) + LINK_LIBRARIES(aio) + ENDIF() ENDIF() IF(HAVE_LIBNUMA) LINK_LIBRARIES(numa) diff --git a/storage/innobase/os/os0file.cc b/storage/innobase/os/os0file.cc index 9a11c91cd15..605c77b577e 100644 --- a/storage/innobase/os/os0file.cc +++ b/storage/innobase/os/os0file.cc @@ -4024,6 +4024,17 @@ disable: } #endif +#ifdef HAVE_URING + if (ret) + { + ut_ad(srv_use_native_aio); + ib::warn() + << "liburing disabled: falling back to innodb_use_native_aio=OFF"; + srv_use_native_aio= false; + ret= srv_thread_pool->configure_aio(false, max_events); + } +#endif + if (!ret) { read_slots= new io_slots(max_read_events, srv_n_read_io_threads); diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc index 0995a57b9ce..120fb4f3ac8 100644 --- a/storage/innobase/srv/srv0start.cc +++ b/storage/innobase/srv/srv0start.cc @@ -1193,6 +1193,11 @@ dberr_t srv_start(bool create_new_db) ib::info() << "Using Linux native AIO"; } #endif +#ifdef HAVE_URING + if (srv_use_native_aio) { + ib::info() << "Using liburing"; + } +#endif fil_system.create(srv_file_per_table ? 50000 : 5000); diff --git a/support-files/CMakeLists.txt b/support-files/CMakeLists.txt index 0e5a62a9514..7fac042dca6 100644 --- a/support-files/CMakeLists.txt +++ b/support-files/CMakeLists.txt @@ -1,5 +1,5 @@ # Copyright (c) 2006, 2016, Oracle and/or its affiliates. -# Copyright (c) 2012, 2017, MariaDB +# Copyright (c) 2012, 2021, MariaDB # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/support-files/mariadb.service.in b/support-files/mariadb.service.in index 1f7f20fb381..99aebd9a3d7 100644 --- a/support-files/mariadb.service.in +++ b/support-files/mariadb.service.in @@ -144,7 +144,7 @@ TimeoutStopSec=900 # Number of files limit. previously [mysqld_safe] open-files-limit LimitNOFILE=16384 - +@SYSTEMD_LIMIT@ # Maximium core size. previously [mysqld_safe] core-file-size # LimitCore= diff --git a/support-files/mariadb@.service.in b/support-files/mariadb@.service.in index 14749092103..60ed4b74aa1 100644 --- a/support-files/mariadb@.service.in +++ b/support-files/mariadb@.service.in @@ -269,7 +269,7 @@ Group=mysql # Number of files limit. previously [mysqld_safe] open-files-limit LimitNOFILE=16384 - +@SYSTEMD_LIMIT@ # Maximium core size. previously [mysqld_safe] core-file-size # LimitCore= diff --git a/tpool/CMakeLists.txt b/tpool/CMakeLists.txt index 3e3f8e0b42a..239745ab90c 100644 --- a/tpool/CMakeLists.txt +++ b/tpool/CMakeLists.txt @@ -1,16 +1,19 @@ INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) IF(WIN32) SET(EXTRA_SOURCES tpool_win.cc aio_win.cc) -ELSE() - SET(EXTRA_SOURCES aio_linux.cc) ENDIF() -IF(CMAKE_SYSTEM_NAME STREQUAL "Linux") +IF(CMAKE_SYSTEM_NAME STREQUAL "Linux" AND LIBURING) + SET(EXTRA_SOURCES aio_liburing.cc) +ENDIF() + +IF(CMAKE_SYSTEM_NAME STREQUAL "Linux" AND NOT LIBURING) CHECK_INCLUDE_FILES (libaio.h HAVE_LIBAIO_H) CHECK_LIBRARY_EXISTS(aio io_queue_init "" HAVE_LIBAIO) IF(HAVE_LIBAIO_H AND HAVE_LIBAIO) ADD_DEFINITIONS(-DLINUX_NATIVE_AIO=1) LINK_LIBRARIES(aio) + SET(EXTRA_SOURCES aio_linux.cc) ENDIF() ENDIF() @@ -26,4 +29,4 @@ ADD_LIBRARY(tpool STATIC ${EXTRA_SOURCES} ) -INCLUDE_DIRECTORIES(${PROJECT_SOURCE_DIR}/include)
\ No newline at end of file +INCLUDE_DIRECTORIES(${PROJECT_SOURCE_DIR}/include) diff --git a/tpool/aio_liburing.cc b/tpool/aio_liburing.cc new file mode 100644 index 00000000000..14219f1d499 --- /dev/null +++ b/tpool/aio_liburing.cc @@ -0,0 +1,185 @@ +/* Copyright (C) 2021, MariaDB Corporation. + +This program is free software; you can redistribute itand /or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111 - 1301 USA*/ + +#include "tpool_structs.h" +#include "tpool.h" +#include "mysql/service_my_print_error.h" +#include "mysqld_error.h" + +#include <liburing.h> + +#include <algorithm> +#include <vector> +#include <thread> +#include <mutex> + +namespace +{ + +class aio_uring final : public tpool::aio +{ +public: + aio_uring(tpool::thread_pool *tpool, int max_aio) : tpool_(tpool) + { + if (io_uring_queue_init(max_aio, &uring_, 0) != 0) + { + switch (const auto e= errno) { + case ENOMEM: + case ENOSYS: + my_printf_error(ER_UNKNOWN_ERROR, e == ENOMEM + ? "io_uring_queue_init() failed with ENOMEM:" + " try larger ulimit -l\n" + : "io_uring_queue_init() failed with ENOSYS:" + " try uprading the kernel\n", + ME_ERROR_LOG | ME_WARNING); + break; + default: + my_printf_error(ER_UNKNOWN_ERROR, + "io_uring_queue_init() failed with errno %d\n", + ME_ERROR_LOG | ME_WARNING, e); + } + throw std::runtime_error("aio_uring()"); + } + + thread_= std::thread(thread_routine, this); + } + + ~aio_uring() noexcept + { + { + std::lock_guard<std::mutex> _(mutex_); + io_uring_sqe *sqe= io_uring_get_sqe(&uring_); + io_uring_prep_nop(sqe); + io_uring_sqe_set_data(sqe, nullptr); + auto ret= io_uring_submit(&uring_); + if (ret != 1) + { + my_printf_error(ER_UNKNOWN_ERROR, + "io_uring_submit() returned %d during shutdown:" + " this may cause a hang\n", + ME_ERROR_LOG | ME_FATAL, ret); + abort(); + } + } + thread_.join(); + io_uring_queue_exit(&uring_); + } + + int submit_io(tpool::aiocb *cb) final + { + cb->iov_base= cb->m_buffer; + cb->iov_len= cb->m_len; + + // The whole operation since io_uring_get_sqe() and till io_uring_submit() + // must be atomical. This is because liburing provides thread-unsafe calls. + std::lock_guard<std::mutex> _(mutex_); + + io_uring_sqe *sqe= io_uring_get_sqe(&uring_); + if (cb->m_opcode == tpool::aio_opcode::AIO_PREAD) + io_uring_prep_readv(sqe, cb->m_fh, static_cast<struct iovec *>(cb), 1, + cb->m_offset); + else + io_uring_prep_writev(sqe, cb->m_fh, static_cast<struct iovec *>(cb), 1, + cb->m_offset); + io_uring_sqe_set_data(sqe, cb); + + return io_uring_submit(&uring_) == 1 ? 0 : -1; + } + + int bind(native_file_handle &fd) final + { + std::lock_guard<std::mutex> _(files_mutex_); + auto it= std::lower_bound(files_.begin(), files_.end(), fd); + assert(it == files_.end() || *it != fd); + files_.insert(it, fd); + return io_uring_register_files_update(&uring_, 0, files_.data(), + files_.size()); + } + + int unbind(const native_file_handle &fd) final + { + std::lock_guard<std::mutex> _(files_mutex_); + auto it= std::lower_bound(files_.begin(), files_.end(), fd); + assert(*it == fd); + files_.erase(it); + return io_uring_register_files_update(&uring_, 0, files_.data(), + files_.size()); + } + +private: + static void thread_routine(aio_uring *aio) + { + for (;;) + { + io_uring_cqe *cqe; + if (int ret= io_uring_wait_cqe(&aio->uring_, &cqe)) + { + if (ret == -EINTR) // this may occur during shutdown + break; + my_printf_error(ER_UNKNOWN_ERROR, + "io_uring_wait_cqe() returned %d\n", + ME_ERROR_LOG | ME_FATAL, ret); + abort(); + } + + auto *iocb= static_cast<tpool::aiocb*>(io_uring_cqe_get_data(cqe)); + if (!iocb) + break; + + int res= cqe->res; + if (res < 0) + { + iocb->m_err= -res; + iocb->m_ret_len= 0; + } + else + { + iocb->m_err= 0; + iocb->m_ret_len= res; + } + + io_uring_cqe_seen(&aio->uring_, cqe); + + iocb->m_internal_task.m_func= iocb->m_callback; + iocb->m_internal_task.m_arg= iocb; + iocb->m_internal_task.m_group= iocb->m_group; + aio->tpool_->submit_task(&iocb->m_internal_task); + } + } + + io_uring uring_; + std::mutex mutex_; + tpool::thread_pool *tpool_; + std::thread thread_; + + std::vector<native_file_handle> files_; + std::mutex files_mutex_; +}; + +} // namespace + +namespace tpool +{ + +aio *create_linux_aio(thread_pool *pool, int max_aio) +{ + try { + return new aio_uring(pool, max_aio); + } catch (std::runtime_error& error) { + return nullptr; + } +} + +} // namespace tpool diff --git a/tpool/aio_linux.cc b/tpool/aio_linux.cc index d9aa8be2347..4abc2139881 100644 --- a/tpool/aio_linux.cc +++ b/tpool/aio_linux.cc @@ -16,7 +16,6 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111 - 1301 USA*/ #include "tpool_structs.h" #include "tpool.h" -#ifdef LINUX_NATIVE_AIO # include <thread> # include <atomic> # include <libaio.h> @@ -69,7 +68,6 @@ static int my_getevents(io_context_t ctx, long min_nr, long nr, io_event *ev) } return ret; } -#endif /* @@ -84,7 +82,6 @@ static int my_getevents(io_context_t ctx, long min_nr, long nr, io_event *ev) */ namespace tpool { -#ifdef LINUX_NATIVE_AIO class aio_linux final : public aio { @@ -187,7 +184,4 @@ aio *create_linux_aio(thread_pool *pool, int max_io) } return new aio_linux(ctx, pool); } -#else -aio *create_linux_aio(thread_pool*, int) { return nullptr; } -#endif } diff --git a/tpool/tpool.h b/tpool/tpool.h index 3a5658c0f36..d33c0608959 100644 --- a/tpool/tpool.h +++ b/tpool/tpool.h @@ -22,6 +22,9 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111 - 1301 USA*/ #ifdef LINUX_NATIVE_AIO #include <libaio.h> #endif +#ifdef HAVE_URING +#include <sys/uio.h> +#endif #ifdef _WIN32 #ifndef NOMINMAX #define NOMINMAX @@ -123,6 +126,8 @@ struct aiocb :OVERLAPPED #elif defined LINUX_NATIVE_AIO :iocb +#elif defined HAVE_URING + :iovec #endif { native_file_handle m_fh; diff --git a/tpool/tpool_generic.cc b/tpool/tpool_generic.cc index 7c645b09785..0c769d67c99 100644 --- a/tpool/tpool_generic.cc +++ b/tpool/tpool_generic.cc @@ -38,7 +38,11 @@ namespace tpool { #ifdef __linux__ +#if defined(HAVE_URING) || defined(LINUX_NATIVE_AIO) extern aio* create_linux_aio(thread_pool* tp, int max_io); +#else + aio *create_linux_aio(thread_pool *, int) { return nullptr; }; +#endif #endif #ifdef _WIN32 extern aio* create_win_aio(thread_pool* tp, int max_io); |