summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarko Mäkelä <marko.makela@mariadb.com>2021-07-22 09:50:20 +0300
committerMarko Mäkelä <marko.makela@mariadb.com>2021-07-22 10:05:13 +0300
commit82d5994520c239da1b6edf1b24e08138ae0c753d (patch)
tree57960cba641e07952799a86eb5bed816064a128a
parentbf435a3f4daa90dd6b4b94ed62f05a8e30fecc3d (diff)
downloadmariadb-git-82d5994520c239da1b6edf1b24e08138ae0c753d.tar.gz
MDEV-26110: Do not rely on alignment on static allocation
It is implementation-defined whether alignment requirements that are larger than std::max_align_t (typically 8 or 16 bytes) will be honored by the compiler and linker. It turns out that on IBM AIX, both alignas() and MY_ALIGNED() only guarantees alignment up to 16 bytes. For some data structures, specifying alignment to the CPU cache line size (typically 64 or 128 bytes) is a mere performance optimization, and we do not really care whether the requested alignment is guaranteed. But, for the correct operation of direct I/O, we do require that the buffers be aligned at a block size boundary. field_ref_zero: Define as a pointer, not an array. For innochecksum, we can make this point to unaligned memory; for anything else, we will allocate an aligned buffer from the heap. This buffer will be used for overwriting freed data pages when innodb_immediate_scrub_data_uncompressed=ON. And exactly that code hit an assertion failure on AIX, in the test innodb.innodb_scrub. log_sys.checkpoint_buf: Define as a pointer to aligned memory that is allocated from heap. log_t::file::write_header_durable(): Reuse log_sys.checkpoint_buf instead of trying to allocate an aligned buffer from the stack.
-rw-r--r--extra/innochecksum.cc3
-rw-r--r--extra/mariabackup/xtrabackup.cc39
-rw-r--r--storage/innobase/buf/buf0buf.cc22
-rw-r--r--storage/innobase/handler/handler0alter.cc2
-rw-r--r--storage/innobase/include/buf0types.h8
-rw-r--r--storage/innobase/include/log0log.h5
-rw-r--r--storage/innobase/log/log0log.cc19
-rw-r--r--storage/innobase/page/page0zip.cc14
-rw-r--r--storage/innobase/row/row0ins.cc2
9 files changed, 72 insertions, 42 deletions
diff --git a/extra/innochecksum.cc b/extra/innochecksum.cc
index b2aade70d68..0d931129afa 100644
--- a/extra/innochecksum.cc
+++ b/extra/innochecksum.cc
@@ -97,6 +97,9 @@ FILE* log_file = NULL;
/* Enabled for log write option. */
static bool is_log_enabled = false;
+static byte field_ref_zero_buf[UNIV_PAGE_SIZE_MAX];
+const byte *field_ref_zero = field_ref_zero_buf;
+
#ifndef _WIN32
/* advisory lock for non-window system. */
struct flock lk;
diff --git a/extra/mariabackup/xtrabackup.cc b/extra/mariabackup/xtrabackup.cc
index 7ec6fbb8f59..bf4a80832dc 100644
--- a/extra/mariabackup/xtrabackup.cc
+++ b/extra/mariabackup/xtrabackup.cc
@@ -4501,6 +4501,14 @@ fail:
goto fail;
}
+
+ if (auto b = aligned_malloc(UNIV_PAGE_SIZE_MAX, 4096)) {
+ field_ref_zero = static_cast<byte*>(
+ memset_aligned<4096>(b, 0, UNIV_PAGE_SIZE_MAX));
+ } else {
+ goto fail;
+ }
+
{
/* definition from recv_recovery_from_checkpoint_start() */
ulint max_cp_field;
@@ -4515,14 +4523,17 @@ reread_log_header:
if (err != DB_SUCCESS) {
msg("Error: cannot read redo log header");
+unlock_and_fail:
mysql_mutex_unlock(&log_sys.mutex);
+free_and_fail:
+ aligned_free(const_cast<byte*>(field_ref_zero));
+ field_ref_zero = nullptr;
goto fail;
}
if (log_sys.log.format == 0) {
msg("Error: cannot process redo log before MariaDB 10.2.2");
- mysql_mutex_unlock(&log_sys.mutex);
- goto fail;
+ goto unlock_and_fail;
}
byte* buf = log_sys.checkpoint_buf;
@@ -4543,7 +4554,7 @@ reread_log_header:
xtrabackup_init_datasinks();
if (!select_history()) {
- goto fail;
+ goto free_and_fail;
}
/* open the log file */
@@ -4552,12 +4563,13 @@ reread_log_header:
if (dst_log_file == NULL) {
msg("Error: failed to open the target stream for '%s'.",
LOG_FILE_NAME);
- goto fail;
+ goto free_and_fail;
}
/* label it */
- byte MY_ALIGNED(OS_FILE_LOG_BLOCK_SIZE) log_hdr_buf[LOG_FILE_HDR_SIZE];
- memset(log_hdr_buf, 0, sizeof log_hdr_buf);
+ byte* log_hdr_buf = static_cast<byte*>(
+ aligned_malloc(LOG_FILE_HDR_SIZE, OS_FILE_LOG_BLOCK_SIZE));
+ memset(log_hdr_buf, 0, LOG_FILE_HDR_SIZE);
byte *log_hdr_field = log_hdr_buf;
mach_write_to_4(LOG_HEADER_FORMAT + log_hdr_field, log_sys.log.format);
@@ -4586,11 +4598,13 @@ reread_log_header:
log_block_calc_checksum_crc32(log_hdr_field));
/* Write log header*/
- if (ds_write(dst_log_file, log_hdr_buf, sizeof(log_hdr_buf))) {
+ if (ds_write(dst_log_file, log_hdr_buf, LOG_FILE_HDR_SIZE)) {
msg("error: write to logfile failed");
- goto fail;
+ aligned_free(log_hdr_buf);
+ goto free_and_fail;
}
+ aligned_free(log_hdr_buf);
log_copying_running = true;
/* start io throttle */
if(xtrabackup_throttle) {
@@ -4608,7 +4622,7 @@ reread_log_header:
" error %s.", ut_strerr(err));
fail_before_log_copying_thread_start:
log_copying_running = false;
- goto fail;
+ goto free_and_fail;
}
/* copy log file by current position */
@@ -4625,7 +4639,7 @@ fail_before_log_copying_thread_start:
/* FLUSH CHANGED_PAGE_BITMAPS call */
if (!flush_changed_page_bitmaps()) {
- goto fail;
+ goto free_and_fail;
}
ut_a(xtrabackup_parallel > 0);
@@ -4647,7 +4661,7 @@ fail_before_log_copying_thread_start:
datafiles_iter_t *it = datafiles_iter_new();
if (it == NULL) {
msg("mariabackup: Error: datafiles_iter_new() failed.");
- goto fail;
+ goto free_and_fail;
}
/* Create data copying threads */
@@ -4702,6 +4716,9 @@ fail_before_log_copying_thread_start:
if (opt_log_innodb_page_corruption)
ok = corrupted_pages.print_to_file(MB_CORRUPTED_PAGES_FILE);
+ aligned_free(const_cast<byte*>(field_ref_zero));
+ field_ref_zero = nullptr;
+
if (!ok) {
goto fail;
}
diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc
index aca0d6b5ae8..8de876ebe6b 100644
--- a/storage/innobase/buf/buf0buf.cc
+++ b/storage/innobase/buf/buf0buf.cc
@@ -321,6 +321,12 @@ constexpr ulint BUF_PAGE_READ_MAX_RETRIES= 100;
read-ahead buffer. (Divide buf_pool size by this amount) */
constexpr uint32_t BUF_READ_AHEAD_PORTION= 32;
+/** A 64KiB buffer of NUL bytes, for use in assertions and checks,
+and dummy default values of instantly dropped columns.
+Initially, BLOB field references are set to NUL bytes, in
+dtuple_convert_big_rec(). */
+const byte *field_ref_zero;
+
/** The InnoDB buffer pool */
buf_pool_t buf_pool;
buf_pool_t::chunk_t::map *buf_pool_t::chunk_t::map_reg;
@@ -698,7 +704,7 @@ static void buf_page_check_lsn(bool check_lsn, const byte* read_buf)
@return whether the buffer is all zeroes */
bool buf_is_zeroes(span<const byte> buf)
{
- ut_ad(buf.size() <= sizeof field_ref_zero);
+ ut_ad(buf.size() <= UNIV_PAGE_SIZE_MAX);
return memcmp(buf.data(), field_ref_zero, buf.size()) == 0;
}
@@ -1391,11 +1397,17 @@ bool buf_pool_t::create()
ut_ad(srv_buf_pool_size % srv_buf_pool_chunk_unit == 0);
ut_ad(!is_initialised());
ut_ad(srv_buf_pool_size > 0);
+ ut_ad(!resizing);
+ ut_ad(!chunks_old);
+ ut_ad(!field_ref_zero);
NUMA_MEMPOLICY_INTERLEAVE_IN_SCOPE;
- ut_ad(!resizing);
- ut_ad(!chunks_old);
+ if (auto b= aligned_malloc(UNIV_PAGE_SIZE_MAX, 4096))
+ field_ref_zero= static_cast<const byte*>
+ (memset_aligned<4096>(b, 0, UNIV_PAGE_SIZE_MAX));
+ else
+ return true;
chunk_t::map_reg= UT_NEW_NOKEY(chunk_t::map());
@@ -1426,6 +1438,8 @@ bool buf_pool_t::create()
chunks= nullptr;
UT_DELETE(chunk_t::map_reg);
chunk_t::map_reg= nullptr;
+ aligned_free(const_cast<byte*>(field_ref_zero));
+ field_ref_zero= nullptr;
ut_ad(!is_initialised());
return true;
}
@@ -1541,6 +1555,8 @@ void buf_pool_t::close()
io_buf.close();
UT_DELETE(chunk_t::map_reg);
chunk_t::map_reg= chunk_t::map_ref= nullptr;
+ aligned_free(const_cast<byte*>(field_ref_zero));
+ field_ref_zero= nullptr;
}
/** Try to reallocate a control block.
diff --git a/storage/innobase/handler/handler0alter.cc b/storage/innobase/handler/handler0alter.cc
index 9d94aa5b062..caae4035f7c 100644
--- a/storage/innobase/handler/handler0alter.cc
+++ b/storage/innobase/handler/handler0alter.cc
@@ -526,7 +526,7 @@ inline bool dict_table_t::instant_column(const dict_table_t& table,
}
DBUG_ASSERT(c.is_added());
- if (c.def_val.len <= sizeof field_ref_zero
+ if (c.def_val.len <= UNIV_PAGE_SIZE_MAX
&& (!c.def_val.len
|| !memcmp(c.def_val.data, field_ref_zero,
c.def_val.len))) {
diff --git a/storage/innobase/include/buf0types.h b/storage/innobase/include/buf0types.h
index ba1e2e5eaa6..5dd581097f9 100644
--- a/storage/innobase/include/buf0types.h
+++ b/storage/innobase/include/buf0types.h
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1995, 2015, Oracle and/or its affiliates. All rights reserved.
-Copyright (c) 2019, 2020, MariaDB Corporation.
+Copyright (c) 2019, 2021, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -177,11 +177,11 @@ private:
uint64_t m_id;
};
-/** A field reference full of zero, for use in assertions and checks,
+/** A 64KiB buffer of NUL bytes, for use in assertions and checks,
and dummy default values of instantly dropped columns.
-Initially, BLOB field references are set to zero, in
+Initially, BLOB field references are set to NUL bytes, in
dtuple_convert_big_rec(). */
-extern const byte field_ref_zero[UNIV_PAGE_SIZE_MAX];
+extern const byte *field_ref_zero;
#ifndef UNIV_INNOCHECKSUM
diff --git a/storage/innobase/include/log0log.h b/storage/innobase/include/log0log.h
index fbcc30ee7ca..460acaf5b66 100644
--- a/storage/innobase/include/log0log.h
+++ b/storage/innobase/include/log0log.h
@@ -617,9 +617,8 @@ public:
/*!< number of currently pending
checkpoint writes */
- /** buffer for checkpoint header */
- MY_ALIGNED(OS_FILE_LOG_BLOCK_SIZE)
- byte checkpoint_buf[OS_FILE_LOG_BLOCK_SIZE];
+ /** buffer for checkpoint header */
+ byte *checkpoint_buf;
/* @} */
private:
diff --git a/storage/innobase/log/log0log.cc b/storage/innobase/log/log0log.cc
index ece26ded72e..a6fa50dd753 100644
--- a/storage/innobase/log/log0log.cc
+++ b/storage/innobase/log/log0log.cc
@@ -219,6 +219,8 @@ void log_t::create()
log_block_set_first_rec_group(buf, LOG_BLOCK_HDR_SIZE);
buf_free= LOG_BLOCK_HDR_SIZE;
+ checkpoint_buf= static_cast<byte*>
+ (aligned_malloc(OS_FILE_LOG_BLOCK_SIZE, OS_FILE_LOG_BLOCK_SIZE));
}
mapped_file_t::~mapped_file_t() noexcept
@@ -461,8 +463,8 @@ void log_t::file::write_header_durable(lsn_t lsn)
ut_ad(log_sys.log.format == log_t::FORMAT_10_5 ||
log_sys.log.format == log_t::FORMAT_ENC_10_5);
- // man 2 open suggests this buffer to be aligned by 512 for O_DIRECT
- MY_ALIGNED(OS_FILE_LOG_BLOCK_SIZE) byte buf[OS_FILE_LOG_BLOCK_SIZE] = {0};
+ byte *buf= log_sys.checkpoint_buf;
+ memset_aligned<OS_FILE_LOG_BLOCK_SIZE>(buf, 0, OS_FILE_LOG_BLOCK_SIZE);
mach_write_to_4(buf + LOG_HEADER_FORMAT, log_sys.log.format);
mach_write_to_4(buf + LOG_HEADER_SUBFORMAT, log_sys.log.subformat);
@@ -475,7 +477,7 @@ void log_t::file::write_header_durable(lsn_t lsn)
DBUG_PRINT("ib_log", ("write " LSN_PF, lsn));
- log_sys.log.write(0, buf);
+ log_sys.log.write(0, {buf, OS_FILE_LOG_BLOCK_SIZE});
if (!log_sys.log.writes_are_durable())
log_sys.log.flush();
}
@@ -880,7 +882,7 @@ ATTRIBUTE_COLD void log_write_checkpoint_info(lsn_t end_lsn)
log_sys.next_checkpoint_lsn));
byte* buf = log_sys.checkpoint_buf;
- memset(buf, 0, OS_FILE_LOG_BLOCK_SIZE);
+ memset_aligned<OS_FILE_LOG_BLOCK_SIZE>(buf, 0, OS_FILE_LOG_BLOCK_SIZE);
mach_write_to_8(buf + LOG_CHECKPOINT_NO, log_sys.next_checkpoint_no);
mach_write_to_8(buf + LOG_CHECKPOINT_LSN, log_sys.next_checkpoint_lsn);
@@ -1282,18 +1284,21 @@ void log_t::close()
{
ut_ad(this == &log_sys);
if (!is_initialised()) return;
- m_initialised = false;
+ m_initialised= false;
log.close();
ut_free_dodump(buf, srv_log_buffer_size);
- buf = NULL;
+ buf= nullptr;
ut_free_dodump(flush_buf, srv_log_buffer_size);
- flush_buf = NULL;
+ flush_buf= nullptr;
mysql_mutex_destroy(&mutex);
mysql_mutex_destroy(&flush_order_mutex);
recv_sys.close();
+
+ aligned_free(checkpoint_buf);
+ checkpoint_buf= nullptr;
}
std::string get_log_file_path(const char *filename)
diff --git a/storage/innobase/page/page0zip.cc b/storage/innobase/page/page0zip.cc
index 1ed9efafc69..74989fb018d 100644
--- a/storage/innobase/page/page0zip.cc
+++ b/storage/innobase/page/page0zip.cc
@@ -35,12 +35,6 @@ Created June 2005 by Marko Makela
using st_::span;
-/** A BLOB field reference full of zero, for use in assertions and tests.
-Initially, BLOB field references are set to zero, in
-dtuple_convert_big_rec(). */
-alignas(UNIV_PAGE_SIZE_MIN)
-const byte field_ref_zero[UNIV_PAGE_SIZE_MAX] = { 0, };
-
#ifndef UNIV_INNOCHECKSUM
#include "mtr0log.h"
#include "dict0dict.h"
@@ -92,16 +86,12 @@ static const byte supremum_extra_data alignas(4) [] = {
};
/** Assert that a block of memory is filled with zero bytes.
-Compare at most sizeof(field_ref_zero) bytes.
@param b in: memory block
@param s in: size of the memory block, in bytes */
-#define ASSERT_ZERO(b, s) \
- ut_ad(!memcmp(b, field_ref_zero, \
- std::min<size_t>(s, sizeof field_ref_zero)));
+#define ASSERT_ZERO(b, s) ut_ad(!memcmp(b, field_ref_zero, s))
/** Assert that a BLOB pointer is filled with zero bytes.
@param b in: BLOB pointer */
-#define ASSERT_ZERO_BLOB(b) \
- ut_ad(!memcmp(b, field_ref_zero, FIELD_REF_SIZE))
+#define ASSERT_ZERO_BLOB(b) ASSERT_ZERO(b, FIELD_REF_SIZE)
/* Enable some extra debugging output. This code can be enabled
independently of any UNIV_ debugging conditions. */
diff --git a/storage/innobase/row/row0ins.cc b/storage/innobase/row/row0ins.cc
index 4b1926c0ec0..96cc3aa413e 100644
--- a/storage/innobase/row/row0ins.cc
+++ b/storage/innobase/row/row0ins.cc
@@ -3424,7 +3424,7 @@ row_ins_index_entry_set_vals(
field->len = UNIV_SQL_NULL;
field->type.prtype = DATA_BINARY_TYPE;
} else {
- ut_ad(col->len <= sizeof field_ref_zero);
+ ut_ad(col->len <= UNIV_PAGE_SIZE_MAX);
ut_ad(ind_field->fixed_len <= col->len);
dfield_set_data(field, field_ref_zero,
ind_field->fixed_len);