diff options
Diffstat (limited to 'storage/innobase/log/log0log.cc')
-rw-r--r-- | storage/innobase/log/log0log.cc | 156 |
1 files changed, 132 insertions, 24 deletions
diff --git a/storage/innobase/log/log0log.cc b/storage/innobase/log/log0log.cc index d0b325c05aa..043de52d737 100644 --- a/storage/innobase/log/log0log.cc +++ b/storage/innobase/log/log0log.cc @@ -84,6 +84,8 @@ reduce the size of the log. /** Redo log system */ log_t log_sys; +ulong innodb_log_io_method; + /* Next log block number to do dummy record filling if no log records written for a while */ static ulint next_lbn_to_pad = 0; @@ -587,6 +589,112 @@ void log_t::create() } } + +class file_os_io final: public log_t::files::file_io +{ + pfs_os_file_t fd; +public: + dberr_t open(const char *path) final + { + bool success; + fd= os_file_create(innodb_log_file_key, path, + OS_FILE_OPEN | OS_FILE_ON_ERROR_NO_EXIT, + OS_FILE_NORMAL, OS_LOG_FILE, + srv_read_only_mode, &success); + durable_writes= srv_file_flush_method == SRV_O_DSYNC; + return success ? DB_SUCCESS : DB_ERROR; + } + dberr_t close() final { return os_file_close(fd) ? DB_SUCCESS : DB_ERROR; } + dberr_t read(os_offset_t offset, span<byte> buf) final + { + return os_file_read(IORequestRead, fd, buf.data(), offset, buf.size()); + } + dberr_t write(const char *path, os_offset_t offset, span<byte> buf) final + { + return os_file_write(IORequestWrite, path, fd, buf.data(), offset, + buf.size()); + } + dberr_t flush_data_only() final + { + return os_file_flush_data(fd) ? DB_SUCCESS : DB_ERROR; + } +}; + + +#ifdef HAVE_PMEM +#include <libpmem.h> +#endif +class file_mmap_io final: public log_t::files::file_io +{ + File fd; + void *addr; + size_t length; +public: + dberr_t open(const char *path) final + { + fd= mysql_file_open(innodb_log_file_key, path, + srv_read_only_mode ? O_RDONLY : O_RDWR, MYF(MY_WME)); + if (fd >= 0) + { + MY_STAT sb; + if (!mysql_file_fstat(fd, &sb, MYF(0))) + { + int prot= srv_read_only_mode ? PROT_READ : PROT_READ | PROT_WRITE; + length= sb.st_size; + addr= my_mmap(0, length, prot, MAP_SHARED_VALIDATE | MAP_SYNC, fd, 0); + if (addr != MAP_FAILED) + { +#ifdef HAVE_PMEM + durable_writes= true; + ib::info() << "The redo log file is located on a DAX storage. " + "Writes are durable, sync disabled."; +#else + durable_writes= false; + ib::info() << "The redo log file is located on a DAX storage, " + "but persistent memory features were disabled " + "(WITH_PMEM=OFF). Page cache is bypassed, sync is " + "required to make writes durable."; +#endif + return DB_SUCCESS; + } + addr= my_mmap(0, length, prot, MAP_SHARED, fd, 0); + if (addr != MAP_FAILED) + { + durable_writes= false; + return DB_SUCCESS; + } + } + mysql_file_close(fd, MYF(MY_WME)); + } + return DB_ERROR; + } + dberr_t close() final + { + int err= my_munmap(addr, length); + return (!mysql_file_close(fd, MYF(MY_WME)) && !err) ? DB_SUCCESS : DB_ERROR; + } + dberr_t read(os_offset_t offset, span<byte> buf) final + { + memcpy(buf.data(), (char*) addr + offset, buf.size()); + return DB_SUCCESS; + } + dberr_t write(const char *, os_offset_t offset, span<byte> buf) final + { +#ifdef HAVE_PMEM + pmem_memcpy_persist((char*) addr + offset, buf.data(), buf.size()); +#else + memcpy((char*) addr + offset, buf.data(), buf.size()); +#endif + return DB_SUCCESS; + } + dberr_t flush_data_only() final + { + ut_ad(!durable_writes); + return my_msync(fd, addr, length, MS_SYNC) ? DB_ERROR : DB_SUCCESS; + } +}; + + void log_t::files::set_file_names(std::vector<std::string> names) { file_names= std::move(names); @@ -598,15 +706,18 @@ void log_t::files::open_files() files.reserve(file_names.size()); for (const auto &name : file_names) { - bool success; - files.push_back(os_file_create(innodb_log_file_key, name.c_str(), - OS_FILE_OPEN | OS_FILE_ON_ERROR_NO_EXIT, - OS_FILE_NORMAL, OS_LOG_FILE, - srv_read_only_mode, &success)); - if (!success) + file_io *io; + + switch (innodb_log_io_method) { - ib::fatal() << "os_file_create(" << name << ") failed"; + case 1: io= new file_mmap_io; break; + default: io= new file_os_io; } + ut_a(io); + + if (io->open(name.c_str())) + ib::fatal() << "open(" << name << ") failed"; + files.emplace_back(io); } } @@ -617,12 +728,8 @@ void log_t::files::read(size_t total_offset, span<byte> buf) const size_t file_idx= total_offset / static_cast<size_t>(file_size); const size_t offset= total_offset % static_cast<size_t>(file_size); - if (const dberr_t err= os_file_read(IORequestRead, files[file_idx], - buf.data(), offset, buf.size())) - { - ib::fatal() << "os_file_read(" << file_names[file_idx] << ") returned " - << err; - } + if (const dberr_t err= files[file_idx]->read(offset, buf)) + ib::fatal() << "read(" << file_names[file_idx] << ") returned " << err; } void log_t::files::write(size_t total_offset, span<byte> buf) @@ -632,13 +739,9 @@ void log_t::files::write(size_t total_offset, span<byte> buf) const size_t file_idx= total_offset / static_cast<size_t>(file_size); const size_t offset= total_offset % static_cast<size_t>(file_size); - if (const dberr_t err= - os_file_write(IORequestWrite, file_names[file_idx].c_str(), - files[file_idx], buf.data(), offset, buf.size())) - { - ib::fatal() << "os_file_write(" << file_names[file_idx] << ") returned " - << err; - } + if (const dberr_t err= files[file_idx]->write(file_names[file_idx].c_str(), + offset, buf)) + ib::fatal() << "write(" << file_names[file_idx] << ") returned " << err; } void log_t::files::flush_data_only() @@ -648,10 +751,10 @@ void log_t::files::flush_data_only() log_sys.pending_flushes.fetch_add(1, std::memory_order_acquire); for (auto it= files.begin(), end= files.end(); it != end; ++it) { - if (!os_file_flush_data(*it)) + if ((*it)->flush_data_only()) { const auto idx= std::distance(files.begin(), it); - ib::fatal() << "os_file_flush_data(" << file_names[idx] << ") failed"; + ib::fatal() << "flush_data_only(" << file_names[idx] << ") failed"; } } log_sys.pending_flushes.fetch_sub(1, std::memory_order_release); @@ -662,10 +765,10 @@ void log_t::files::close_files() { for (auto it= files.begin(), end= files.end(); it != end; ++it) { - if (!os_file_close(*it)) + if ((*it)->close()) { const auto idx= std::distance(files.begin(), it); - ib::fatal() << "os_file_close(" << file_names[idx] << ") failed"; + ib::fatal() << "close(" << file_names[idx] << ") failed"; } } files.clear(); @@ -934,6 +1037,11 @@ void log_write_up_to(lsn_t lsn, bool flush_to_disk, bool rotate_key) return; } + /* FIXME!!! */ + if (log_sys.log.writes_are_durable()) { + flush_to_disk= false; + } + loop: ut_ad(++loop_count < 128); |