diff options
author | Marko Mäkelä <marko.makela@mariadb.com> | 2021-07-22 09:50:20 +0300 |
---|---|---|
committer | Marko Mäkelä <marko.makela@mariadb.com> | 2021-07-22 10:05:13 +0300 |
commit | 82d5994520c239da1b6edf1b24e08138ae0c753d (patch) | |
tree | 57960cba641e07952799a86eb5bed816064a128a | |
parent | bf435a3f4daa90dd6b4b94ed62f05a8e30fecc3d (diff) | |
download | mariadb-git-82d5994520c239da1b6edf1b24e08138ae0c753d.tar.gz |
MDEV-26110: Do not rely on alignment on static allocation
It is implementation-defined whether alignment requirements
that are larger than std::max_align_t (typically 8 or 16 bytes)
will be honored by the compiler and linker.
It turns out that on IBM AIX, both alignas() and MY_ALIGNED()
only guarantees alignment up to 16 bytes.
For some data structures, specifying alignment to the CPU
cache line size (typically 64 or 128 bytes) is a mere performance
optimization, and we do not really care whether the requested
alignment is guaranteed.
But, for the correct operation of direct I/O, we do require that
the buffers be aligned at a block size boundary.
field_ref_zero: Define as a pointer, not an array.
For innochecksum, we can make this point to unaligned memory;
for anything else, we will allocate an aligned buffer from the heap.
This buffer will be used for overwriting freed data pages when
innodb_immediate_scrub_data_uncompressed=ON. And exactly that code
hit an assertion failure on AIX, in the test innodb.innodb_scrub.
log_sys.checkpoint_buf: Define as a pointer to aligned memory
that is allocated from heap.
log_t::file::write_header_durable(): Reuse log_sys.checkpoint_buf
instead of trying to allocate an aligned buffer from the stack.
-rw-r--r-- | extra/innochecksum.cc | 3 | ||||
-rw-r--r-- | extra/mariabackup/xtrabackup.cc | 39 | ||||
-rw-r--r-- | storage/innobase/buf/buf0buf.cc | 22 | ||||
-rw-r--r-- | storage/innobase/handler/handler0alter.cc | 2 | ||||
-rw-r--r-- | storage/innobase/include/buf0types.h | 8 | ||||
-rw-r--r-- | storage/innobase/include/log0log.h | 5 | ||||
-rw-r--r-- | storage/innobase/log/log0log.cc | 19 | ||||
-rw-r--r-- | storage/innobase/page/page0zip.cc | 14 | ||||
-rw-r--r-- | storage/innobase/row/row0ins.cc | 2 |
9 files changed, 72 insertions, 42 deletions
diff --git a/extra/innochecksum.cc b/extra/innochecksum.cc index b2aade70d68..0d931129afa 100644 --- a/extra/innochecksum.cc +++ b/extra/innochecksum.cc @@ -97,6 +97,9 @@ FILE* log_file = NULL; /* Enabled for log write option. */ static bool is_log_enabled = false; +static byte field_ref_zero_buf[UNIV_PAGE_SIZE_MAX]; +const byte *field_ref_zero = field_ref_zero_buf; + #ifndef _WIN32 /* advisory lock for non-window system. */ struct flock lk; diff --git a/extra/mariabackup/xtrabackup.cc b/extra/mariabackup/xtrabackup.cc index 7ec6fbb8f59..bf4a80832dc 100644 --- a/extra/mariabackup/xtrabackup.cc +++ b/extra/mariabackup/xtrabackup.cc @@ -4501,6 +4501,14 @@ fail: goto fail; } + + if (auto b = aligned_malloc(UNIV_PAGE_SIZE_MAX, 4096)) { + field_ref_zero = static_cast<byte*>( + memset_aligned<4096>(b, 0, UNIV_PAGE_SIZE_MAX)); + } else { + goto fail; + } + { /* definition from recv_recovery_from_checkpoint_start() */ ulint max_cp_field; @@ -4515,14 +4523,17 @@ reread_log_header: if (err != DB_SUCCESS) { msg("Error: cannot read redo log header"); +unlock_and_fail: mysql_mutex_unlock(&log_sys.mutex); +free_and_fail: + aligned_free(const_cast<byte*>(field_ref_zero)); + field_ref_zero = nullptr; goto fail; } if (log_sys.log.format == 0) { msg("Error: cannot process redo log before MariaDB 10.2.2"); - mysql_mutex_unlock(&log_sys.mutex); - goto fail; + goto unlock_and_fail; } byte* buf = log_sys.checkpoint_buf; @@ -4543,7 +4554,7 @@ reread_log_header: xtrabackup_init_datasinks(); if (!select_history()) { - goto fail; + goto free_and_fail; } /* open the log file */ @@ -4552,12 +4563,13 @@ reread_log_header: if (dst_log_file == NULL) { msg("Error: failed to open the target stream for '%s'.", LOG_FILE_NAME); - goto fail; + goto free_and_fail; } /* label it */ - byte MY_ALIGNED(OS_FILE_LOG_BLOCK_SIZE) log_hdr_buf[LOG_FILE_HDR_SIZE]; - memset(log_hdr_buf, 0, sizeof log_hdr_buf); + byte* log_hdr_buf = static_cast<byte*>( + aligned_malloc(LOG_FILE_HDR_SIZE, OS_FILE_LOG_BLOCK_SIZE)); + memset(log_hdr_buf, 0, LOG_FILE_HDR_SIZE); byte *log_hdr_field = log_hdr_buf; mach_write_to_4(LOG_HEADER_FORMAT + log_hdr_field, log_sys.log.format); @@ -4586,11 +4598,13 @@ reread_log_header: log_block_calc_checksum_crc32(log_hdr_field)); /* Write log header*/ - if (ds_write(dst_log_file, log_hdr_buf, sizeof(log_hdr_buf))) { + if (ds_write(dst_log_file, log_hdr_buf, LOG_FILE_HDR_SIZE)) { msg("error: write to logfile failed"); - goto fail; + aligned_free(log_hdr_buf); + goto free_and_fail; } + aligned_free(log_hdr_buf); log_copying_running = true; /* start io throttle */ if(xtrabackup_throttle) { @@ -4608,7 +4622,7 @@ reread_log_header: " error %s.", ut_strerr(err)); fail_before_log_copying_thread_start: log_copying_running = false; - goto fail; + goto free_and_fail; } /* copy log file by current position */ @@ -4625,7 +4639,7 @@ fail_before_log_copying_thread_start: /* FLUSH CHANGED_PAGE_BITMAPS call */ if (!flush_changed_page_bitmaps()) { - goto fail; + goto free_and_fail; } ut_a(xtrabackup_parallel > 0); @@ -4647,7 +4661,7 @@ fail_before_log_copying_thread_start: datafiles_iter_t *it = datafiles_iter_new(); if (it == NULL) { msg("mariabackup: Error: datafiles_iter_new() failed."); - goto fail; + goto free_and_fail; } /* Create data copying threads */ @@ -4702,6 +4716,9 @@ fail_before_log_copying_thread_start: if (opt_log_innodb_page_corruption) ok = corrupted_pages.print_to_file(MB_CORRUPTED_PAGES_FILE); + aligned_free(const_cast<byte*>(field_ref_zero)); + field_ref_zero = nullptr; + if (!ok) { goto fail; } diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc index aca0d6b5ae8..8de876ebe6b 100644 --- a/storage/innobase/buf/buf0buf.cc +++ b/storage/innobase/buf/buf0buf.cc @@ -321,6 +321,12 @@ constexpr ulint BUF_PAGE_READ_MAX_RETRIES= 100; read-ahead buffer. (Divide buf_pool size by this amount) */ constexpr uint32_t BUF_READ_AHEAD_PORTION= 32; +/** A 64KiB buffer of NUL bytes, for use in assertions and checks, +and dummy default values of instantly dropped columns. +Initially, BLOB field references are set to NUL bytes, in +dtuple_convert_big_rec(). */ +const byte *field_ref_zero; + /** The InnoDB buffer pool */ buf_pool_t buf_pool; buf_pool_t::chunk_t::map *buf_pool_t::chunk_t::map_reg; @@ -698,7 +704,7 @@ static void buf_page_check_lsn(bool check_lsn, const byte* read_buf) @return whether the buffer is all zeroes */ bool buf_is_zeroes(span<const byte> buf) { - ut_ad(buf.size() <= sizeof field_ref_zero); + ut_ad(buf.size() <= UNIV_PAGE_SIZE_MAX); return memcmp(buf.data(), field_ref_zero, buf.size()) == 0; } @@ -1391,11 +1397,17 @@ bool buf_pool_t::create() ut_ad(srv_buf_pool_size % srv_buf_pool_chunk_unit == 0); ut_ad(!is_initialised()); ut_ad(srv_buf_pool_size > 0); + ut_ad(!resizing); + ut_ad(!chunks_old); + ut_ad(!field_ref_zero); NUMA_MEMPOLICY_INTERLEAVE_IN_SCOPE; - ut_ad(!resizing); - ut_ad(!chunks_old); + if (auto b= aligned_malloc(UNIV_PAGE_SIZE_MAX, 4096)) + field_ref_zero= static_cast<const byte*> + (memset_aligned<4096>(b, 0, UNIV_PAGE_SIZE_MAX)); + else + return true; chunk_t::map_reg= UT_NEW_NOKEY(chunk_t::map()); @@ -1426,6 +1438,8 @@ bool buf_pool_t::create() chunks= nullptr; UT_DELETE(chunk_t::map_reg); chunk_t::map_reg= nullptr; + aligned_free(const_cast<byte*>(field_ref_zero)); + field_ref_zero= nullptr; ut_ad(!is_initialised()); return true; } @@ -1541,6 +1555,8 @@ void buf_pool_t::close() io_buf.close(); UT_DELETE(chunk_t::map_reg); chunk_t::map_reg= chunk_t::map_ref= nullptr; + aligned_free(const_cast<byte*>(field_ref_zero)); + field_ref_zero= nullptr; } /** Try to reallocate a control block. diff --git a/storage/innobase/handler/handler0alter.cc b/storage/innobase/handler/handler0alter.cc index 9d94aa5b062..caae4035f7c 100644 --- a/storage/innobase/handler/handler0alter.cc +++ b/storage/innobase/handler/handler0alter.cc @@ -526,7 +526,7 @@ inline bool dict_table_t::instant_column(const dict_table_t& table, } DBUG_ASSERT(c.is_added()); - if (c.def_val.len <= sizeof field_ref_zero + if (c.def_val.len <= UNIV_PAGE_SIZE_MAX && (!c.def_val.len || !memcmp(c.def_val.data, field_ref_zero, c.def_val.len))) { diff --git a/storage/innobase/include/buf0types.h b/storage/innobase/include/buf0types.h index ba1e2e5eaa6..5dd581097f9 100644 --- a/storage/innobase/include/buf0types.h +++ b/storage/innobase/include/buf0types.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1995, 2015, Oracle and/or its affiliates. All rights reserved. -Copyright (c) 2019, 2020, MariaDB Corporation. +Copyright (c) 2019, 2021, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -177,11 +177,11 @@ private: uint64_t m_id; }; -/** A field reference full of zero, for use in assertions and checks, +/** A 64KiB buffer of NUL bytes, for use in assertions and checks, and dummy default values of instantly dropped columns. -Initially, BLOB field references are set to zero, in +Initially, BLOB field references are set to NUL bytes, in dtuple_convert_big_rec(). */ -extern const byte field_ref_zero[UNIV_PAGE_SIZE_MAX]; +extern const byte *field_ref_zero; #ifndef UNIV_INNOCHECKSUM diff --git a/storage/innobase/include/log0log.h b/storage/innobase/include/log0log.h index fbcc30ee7ca..460acaf5b66 100644 --- a/storage/innobase/include/log0log.h +++ b/storage/innobase/include/log0log.h @@ -617,9 +617,8 @@ public: /*!< number of currently pending checkpoint writes */ - /** buffer for checkpoint header */ - MY_ALIGNED(OS_FILE_LOG_BLOCK_SIZE) - byte checkpoint_buf[OS_FILE_LOG_BLOCK_SIZE]; + /** buffer for checkpoint header */ + byte *checkpoint_buf; /* @} */ private: diff --git a/storage/innobase/log/log0log.cc b/storage/innobase/log/log0log.cc index ece26ded72e..a6fa50dd753 100644 --- a/storage/innobase/log/log0log.cc +++ b/storage/innobase/log/log0log.cc @@ -219,6 +219,8 @@ void log_t::create() log_block_set_first_rec_group(buf, LOG_BLOCK_HDR_SIZE); buf_free= LOG_BLOCK_HDR_SIZE; + checkpoint_buf= static_cast<byte*> + (aligned_malloc(OS_FILE_LOG_BLOCK_SIZE, OS_FILE_LOG_BLOCK_SIZE)); } mapped_file_t::~mapped_file_t() noexcept @@ -461,8 +463,8 @@ void log_t::file::write_header_durable(lsn_t lsn) ut_ad(log_sys.log.format == log_t::FORMAT_10_5 || log_sys.log.format == log_t::FORMAT_ENC_10_5); - // man 2 open suggests this buffer to be aligned by 512 for O_DIRECT - MY_ALIGNED(OS_FILE_LOG_BLOCK_SIZE) byte buf[OS_FILE_LOG_BLOCK_SIZE] = {0}; + byte *buf= log_sys.checkpoint_buf; + memset_aligned<OS_FILE_LOG_BLOCK_SIZE>(buf, 0, OS_FILE_LOG_BLOCK_SIZE); mach_write_to_4(buf + LOG_HEADER_FORMAT, log_sys.log.format); mach_write_to_4(buf + LOG_HEADER_SUBFORMAT, log_sys.log.subformat); @@ -475,7 +477,7 @@ void log_t::file::write_header_durable(lsn_t lsn) DBUG_PRINT("ib_log", ("write " LSN_PF, lsn)); - log_sys.log.write(0, buf); + log_sys.log.write(0, {buf, OS_FILE_LOG_BLOCK_SIZE}); if (!log_sys.log.writes_are_durable()) log_sys.log.flush(); } @@ -880,7 +882,7 @@ ATTRIBUTE_COLD void log_write_checkpoint_info(lsn_t end_lsn) log_sys.next_checkpoint_lsn)); byte* buf = log_sys.checkpoint_buf; - memset(buf, 0, OS_FILE_LOG_BLOCK_SIZE); + memset_aligned<OS_FILE_LOG_BLOCK_SIZE>(buf, 0, OS_FILE_LOG_BLOCK_SIZE); mach_write_to_8(buf + LOG_CHECKPOINT_NO, log_sys.next_checkpoint_no); mach_write_to_8(buf + LOG_CHECKPOINT_LSN, log_sys.next_checkpoint_lsn); @@ -1282,18 +1284,21 @@ void log_t::close() { ut_ad(this == &log_sys); if (!is_initialised()) return; - m_initialised = false; + m_initialised= false; log.close(); ut_free_dodump(buf, srv_log_buffer_size); - buf = NULL; + buf= nullptr; ut_free_dodump(flush_buf, srv_log_buffer_size); - flush_buf = NULL; + flush_buf= nullptr; mysql_mutex_destroy(&mutex); mysql_mutex_destroy(&flush_order_mutex); recv_sys.close(); + + aligned_free(checkpoint_buf); + checkpoint_buf= nullptr; } std::string get_log_file_path(const char *filename) diff --git a/storage/innobase/page/page0zip.cc b/storage/innobase/page/page0zip.cc index 1ed9efafc69..74989fb018d 100644 --- a/storage/innobase/page/page0zip.cc +++ b/storage/innobase/page/page0zip.cc @@ -35,12 +35,6 @@ Created June 2005 by Marko Makela using st_::span; -/** A BLOB field reference full of zero, for use in assertions and tests. -Initially, BLOB field references are set to zero, in -dtuple_convert_big_rec(). */ -alignas(UNIV_PAGE_SIZE_MIN) -const byte field_ref_zero[UNIV_PAGE_SIZE_MAX] = { 0, }; - #ifndef UNIV_INNOCHECKSUM #include "mtr0log.h" #include "dict0dict.h" @@ -92,16 +86,12 @@ static const byte supremum_extra_data alignas(4) [] = { }; /** Assert that a block of memory is filled with zero bytes. -Compare at most sizeof(field_ref_zero) bytes. @param b in: memory block @param s in: size of the memory block, in bytes */ -#define ASSERT_ZERO(b, s) \ - ut_ad(!memcmp(b, field_ref_zero, \ - std::min<size_t>(s, sizeof field_ref_zero))); +#define ASSERT_ZERO(b, s) ut_ad(!memcmp(b, field_ref_zero, s)) /** Assert that a BLOB pointer is filled with zero bytes. @param b in: BLOB pointer */ -#define ASSERT_ZERO_BLOB(b) \ - ut_ad(!memcmp(b, field_ref_zero, FIELD_REF_SIZE)) +#define ASSERT_ZERO_BLOB(b) ASSERT_ZERO(b, FIELD_REF_SIZE) /* Enable some extra debugging output. This code can be enabled independently of any UNIV_ debugging conditions. */ diff --git a/storage/innobase/row/row0ins.cc b/storage/innobase/row/row0ins.cc index 4b1926c0ec0..96cc3aa413e 100644 --- a/storage/innobase/row/row0ins.cc +++ b/storage/innobase/row/row0ins.cc @@ -3424,7 +3424,7 @@ row_ins_index_entry_set_vals( field->len = UNIV_SQL_NULL; field->type.prtype = DATA_BINARY_TYPE; } else { - ut_ad(col->len <= sizeof field_ref_zero); + ut_ad(col->len <= UNIV_PAGE_SIZE_MAX); ut_ad(ind_field->fixed_len <= col->len); dfield_set_data(field, field_ref_zero, ind_field->fixed_len); |