summaryrefslogtreecommitdiff
path: root/extra
diff options
context:
space:
mode:
authorThirunarayanan Balathandayuthapani <thiru@mariadb.com>2019-02-19 21:00:00 +0200
committerMarko Mäkelä <marko.makela@mariadb.com>2019-02-19 18:50:19 +0200
commitc0f47a4a58424c621204dacb8016a94b66cb2bce (patch)
treed318f8ea660d2d35de692fb00171bf12f0906d4a /extra
parent93984ff6d6aef2b9a522196f3db69e1d019221b9 (diff)
downloadmariadb-git-c0f47a4a58424c621204dacb8016a94b66cb2bce.tar.gz
MDEV-12026: Implement innodb_checksum_algorithm=full_crc32
MariaDB data-at-rest encryption (innodb_encrypt_tables) had repurposed the same unused data field that was repurposed in MySQL 5.7 (and MariaDB 10.2) for the Split Sequence Number (SSN) field of SPATIAL INDEX. Because of this, MariaDB was unable to support encryption on SPATIAL INDEX pages. Furthermore, InnoDB page checksums skipped some bytes, and there are multiple variations and checksum algorithms. By default, InnoDB accepts all variations of all algorithms that ever existed. This unnecessarily weakens the page checksums. We hereby introduce two more innodb_checksum_algorithm variants (full_crc32, strict_full_crc32) that are special in a way: When either setting is active, newly created data files will carry a flag (fil_space_t::full_crc32()) that indicates that all pages of the file will use a full CRC-32C checksum over the entire page contents (excluding the bytes where the checksum is stored, at the very end of the page). Such files will always use that checksum, no matter what the parameter innodb_checksum_algorithm is assigned to. For old files, the old checksum algorithms will continue to be used. The value strict_full_crc32 will be equivalent to strict_crc32 and the value full_crc32 will be equivalent to crc32. ROW_FORMAT=COMPRESSED tables will only use the old format. These tables do not support new features, such as larger innodb_page_size or instant ADD/DROP COLUMN. They may be deprecated in the future. We do not want an unnecessary file format change for them. The new full_crc32() format also cleans up the MariaDB tablespace flags. We will reserve flags to store the page_compressed compression algorithm, and to store the compressed payload length, so that checksum can be computed over the compressed (and possibly encrypted) stream and can be validated without decrypting or decompressing the page. In the full_crc32 format, there no longer are separate before-encryption and after-encryption checksums for pages. The single checksum is computed on the page contents that is written to the file. We do not make the new algorithm the default for two reasons. First, MariaDB 10.4.2 was a beta release, and the default values of parameters should not change after beta. Second, we did not yet implement the full_crc32 format for page_compressed pages. This will be fixed in MDEV-18644. This is joint work with Marko Mäkelä.
Diffstat (limited to 'extra')
-rw-r--r--extra/innochecksum.cc167
-rw-r--r--extra/mariabackup/fil_cur.cc11
-rw-r--r--extra/mariabackup/xtrabackup.cc32
3 files changed, 118 insertions, 92 deletions
diff --git a/extra/innochecksum.cc b/extra/innochecksum.cc
index 31826981ba3..55d41fc04ad 100644
--- a/extra/innochecksum.cc
+++ b/extra/innochecksum.cc
@@ -274,30 +274,27 @@ void print_leaf_stats(
}
}
-/** Get the ROW_FORMAT=COMPRESSED size from the filespace header.
-@param[in] buf buffer used to read the page.
-@return ROW_FORMAT_COMPRESSED page size
-@retval 0 if not ROW_FORMAT=COMPRESSED */
-static ulint get_zip_size(const byte* buf)
+/** Init the page size for the tablespace.
+@param[in] buf buffer used to read the page */
+static void init_page_size(const byte* buf)
{
const unsigned flags = mach_read_from_4(buf + FIL_PAGE_DATA
+ FSP_SPACE_FLAGS);
+ if (FSP_FLAGS_FCRC32_HAS_MARKER(flags)) {
+ srv_page_size = fil_space_t::logical_size(flags);
+ physical_page_size = srv_page_size;
+ return;
+ }
+
const ulong ssize = FSP_FLAGS_GET_PAGE_SSIZE(flags);
srv_page_size_shift = ssize
? UNIV_ZIP_SIZE_SHIFT_MIN - 1 + ssize
: UNIV_PAGE_SIZE_SHIFT_ORIG;
- srv_page_size = 1U << srv_page_size_shift;
- ulint zip_size = FSP_FLAGS_GET_ZIP_SSIZE(flags);
- if (zip_size) {
- zip_size = (UNIV_ZIP_SIZE_MIN >> 1) << zip_size;
- physical_page_size = zip_size;
- } else {
- physical_page_size = srv_page_size;
- }
- return zip_size;
+ srv_page_size = fil_space_t::logical_size(flags);
+ physical_page_size = fil_space_t::physical_size(flags);
}
#ifdef _WIN32
@@ -429,19 +426,16 @@ ulint read_file(
/** Check if page is corrupted or not.
@param[in] buf page frame
-@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
@param[in] is_encrypted true if page0 contained cryp_data
with crypt_scheme encrypted
-@param[in] is_compressed true if page0 fsp_flags contained
- page compression flag
+@param[in] flags tablespace flags
@retval true if page is corrupted otherwise false. */
static
bool
is_page_corrupted(
byte* buf,
- ulint zip_size,
bool is_encrypted,
- bool is_compressed)
+ ulint flags)
{
/* enable if page is corrupted. */
@@ -450,9 +444,12 @@ is_page_corrupted(
ulint logseq;
ulint logseqfield;
ulint page_type = mach_read_from_2(buf+FIL_PAGE_TYPE);
- uint key_version = mach_read_from_4(buf+FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION);
+ uint key_version = buf_page_get_key_version(buf, flags);
ulint space_id = mach_read_from_4(
buf + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
+ ulint zip_size = fil_space_t::zip_size(flags);
+ ulint is_compressed = fil_space_t::is_compressed(flags);
+ const bool use_full_crc32 = fil_space_t::full_crc32(flags);
/* We can't trust only a page type, thus we take account
also fsp_flags or crypt_data on page 0 */
@@ -468,9 +465,11 @@ is_page_corrupted(
/* check the stored log sequence numbers
for uncompressed tablespace. */
logseq = mach_read_from_4(buf + FIL_PAGE_LSN + 4);
- logseqfield = mach_read_from_4(
- buf + srv_page_size -
- FIL_PAGE_END_LSN_OLD_CHKSUM + 4);
+ logseqfield = use_full_crc32
+ ? mach_read_from_4(buf + srv_page_size
+ - FIL_PAGE_FCRC32_END_LSN)
+ : mach_read_from_4(buf + srv_page_size
+ - FIL_PAGE_END_LSN_OLD_CHKSUM + 4);
if (is_log_enabled) {
fprintf(log_file,
@@ -498,23 +497,22 @@ is_page_corrupted(
so if crypt checksum does not match we verify checksum using
normal method. */
if (is_encrypted && key_version != 0) {
- is_corrupted = !fil_space_verify_crypt_checksum(buf, zip_size);
+ is_corrupted = use_full_crc32
+ ? buf_page_is_corrupted(true, buf, flags)
+ : !fil_space_verify_crypt_checksum(buf, zip_size);
+
if (is_corrupted && log_file) {
fprintf(log_file,
"Page " ULINTPF ":%llu may be corrupted;"
" key_version=%u\n",
- space_id, cur_page_num,
- mach_read_from_4(
- FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION
- + buf));
+ space_id, cur_page_num, key_version);
}
} else {
is_corrupted = true;
}
if (is_corrupted) {
- is_corrupted = buf_page_is_corrupted(
- true, buf, zip_size, NULL);
+ is_corrupted = buf_page_is_corrupted(true, buf, flags);
}
return(is_corrupted);
@@ -566,17 +564,13 @@ is_page_empty(
/********************************************************************//**
Rewrite the checksum for the page.
@param [in/out] page page buffer
-@param [in] iscompressed Is compressed/Uncompressed Page.
+@param [in] flags tablespace flags
@retval true : do rewrite
@retval false : skip the rewrite as checksum stored match with
calculated or page is doublwrite buffer.
*/
-
-bool
-update_checksum(
- byte* page,
- bool iscompressed)
+static bool update_checksum(byte* page, ulint flags)
{
ib_uint32_t checksum = 0;
byte stored1[4]; /* get FIL_PAGE_SPACE_OR_CHKSUM field checksum */
@@ -588,6 +582,9 @@ update_checksum(
return (false);
}
+ const bool use_full_crc32 = fil_space_t::full_crc32(flags);
+ const bool iscompressed = fil_space_t::zip_size(flags);
+
memcpy(stored1, page + FIL_PAGE_SPACE_OR_CHKSUM, 4);
memcpy(stored2, page + physical_page_size -
FIL_PAGE_END_LSN_OLD_CHKSUM, 4);
@@ -615,12 +612,24 @@ update_checksum(
" %u\n", cur_page_num, checksum);
}
+ } else if (use_full_crc32) {
+ checksum = buf_calc_page_full_crc32(page);
+ byte* c = page + physical_page_size - FIL_PAGE_FCRC32_CHECKSUM;
+ if (mach_read_from_4(c) == checksum) return false;
+ mach_write_to_4(c, checksum);
+ if (is_log_enabled) {
+ fprintf(log_file, "page::%llu; Updated checksum"
+ " = %u\n", cur_page_num, checksum);
+ }
+ return true;
} else {
/* page is uncompressed. */
/* Store the new formula checksum */
switch ((srv_checksum_algorithm_t) write_check) {
+ case SRV_CHECKSUM_ALGORITHM_FULL_CRC32:
+ case SRV_CHECKSUM_ALGORITHM_STRICT_FULL_CRC32:
case SRV_CHECKSUM_ALGORITHM_CRC32:
case SRV_CHECKSUM_ALGORITHM_STRICT_CRC32:
checksum = buf_calc_page_crc32(page);
@@ -636,6 +645,7 @@ update_checksum(
case SRV_CHECKSUM_ALGORITHM_STRICT_NONE:
checksum = BUF_NO_CHECKSUM_MAGIC;
break;
+
/* no default so the compiler will emit a warning if new
enum is added and not handled here */
}
@@ -689,8 +699,7 @@ func_exit:
@param[in,out] file file pointer where content
have to be written
@param[in] buf file buffer read
-@param[in] compressed Enabled if tablespace is
- compressed.
+@param[in] flags tablespace flags
@param[in,out] pos current file position.
@retval true if successfully written
@@ -702,12 +711,12 @@ write_file(
const char* filename,
FILE* file,
byte* buf,
- bool compressed,
+ ulint flags,
fpos_t* pos)
{
bool do_update;
- do_update = update_checksum(buf, compressed);
+ do_update = update_checksum(buf, flags);
if (file != stdin) {
if (do_update) {
@@ -1322,6 +1331,13 @@ innochecksum_get_one_option(
srv_checksum_algorithm =
SRV_CHECKSUM_ALGORITHM_STRICT_NONE;
break;
+
+ case SRV_CHECKSUM_ALGORITHM_STRICT_FULL_CRC32:
+ case SRV_CHECKSUM_ALGORITHM_FULL_CRC32:
+ srv_checksum_algorithm =
+ SRV_CHECKSUM_ALGORITHM_STRICT_FULL_CRC32;
+ break;
+
default:
return(true);
}
@@ -1415,30 +1431,21 @@ static bool check_encryption(const char* filename, const byte* page)
return (type == CRYPT_SCHEME_1);
}
-/**
-Verify page checksum.
+/** Verify page checksum.
@param[in] buf page to verify
@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
@param[in] is_encrypted true if tablespace is encrypted
-@param[in] is_compressed true if tablespace is page compressed
@param[in,out] mismatch_count Number of pages failed in checksum verify
-@retval 0 if page checksum matches or 1 if it does not match
-*/
-static
-int verify_checksum(
- byte* buf,
- ulint zip_size,
- bool is_encrypted,
- bool is_compressed,
- unsigned long long* mismatch_count)
+@param[in] flags tablespace flags
+@retval 0 if page checksum matches or 1 if it does not match */
+static int verify_checksum(
+ byte* buf,
+ bool is_encrypted,
+ unsigned long long* mismatch_count,
+ ulint flags)
{
int exit_status = 0;
- bool is_corrupted = false;
-
- is_corrupted = is_page_corrupted(
- buf, zip_size, is_encrypted, is_compressed);
-
- if (is_corrupted) {
+ if (is_page_corrupted(buf, is_encrypted, flags)) {
fprintf(stderr, "Fail: page::%llu invalid\n",
cur_page_num);
@@ -1464,10 +1471,9 @@ int verify_checksum(
@param[in] filename File name
@param[in] fil_in File pointer
@param[in] buf page
-@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
@param[in] pos File position
@param[in] is_encrypted true if tablespace is encrypted
-@param[in] is_compressed true if tablespace is page compressed
+@param[in] flags tablespace flags
@retval 0 if checksum rewrite was successful, 1 if error was detected */
static
int
@@ -1475,24 +1481,16 @@ rewrite_checksum(
const char* filename,
FILE* fil_in,
byte* buf,
- ulint zip_size,
fpos_t* pos,
- bool is_encrypted,
- bool is_compressed)
+ bool is_encrypted,
+ ulint flags)
{
- int exit_status = 0;
+ bool is_compressed = fil_space_t::is_compressed(flags);
+
/* Rewrite checksum. Note that for encrypted and
page compressed tables this is not currently supported. */
- if (do_write &&
- !is_encrypted &&
- !is_compressed
- && !write_file(filename, fil_in, buf,
- zip_size, pos)) {
-
- exit_status = 1;
- }
-
- return (exit_status);
+ return do_write && !is_encrypted && !is_compressed
+ && !write_file(filename, fil_in, buf, flags, pos);
}
int main(
@@ -1668,10 +1666,9 @@ int main(
/* Determine page size, zip_size and page compression
from fsp_flags and encryption metadata from page 0 */
- ulint zip_size = get_zip_size(buf);
+ init_page_size(buf);
ulint flags = mach_read_from_4(FSP_HEADER_OFFSET + FSP_SPACE_FLAGS + buf);
- bool is_compressed = FSP_FLAGS_HAS_PAGE_COMPRESSION(flags);
if (physical_page_size > UNIV_ZIP_SIZE_MIN) {
/* Read rest of the page 0 to determine crypt_data */
@@ -1698,7 +1695,8 @@ int main(
unsigned long long tmp_allow_mismatches = allow_mismatches;
allow_mismatches = 0;
- exit_status = verify_checksum(buf, zip_size, is_encrypted, is_compressed, &mismatch_count);
+ exit_status = verify_checksum(buf, is_encrypted,
+ &mismatch_count, flags);
if (exit_status) {
fprintf(stderr, "Error: Page 0 checksum mismatch, can't continue. \n");
@@ -1707,8 +1705,9 @@ int main(
allow_mismatches = tmp_allow_mismatches;
}
- if ((exit_status = rewrite_checksum(filename, fil_in, buf,
- zip_size, &pos, is_encrypted, is_compressed))) {
+ if ((exit_status = rewrite_checksum(
+ filename, fil_in, buf,
+ &pos, is_encrypted, flags))) {
goto my_exit;
}
@@ -1874,13 +1873,15 @@ int main(
checksum verification.*/
if (!no_check
&& !skip_page
- && (exit_status = verify_checksum(buf, zip_size,
- is_encrypted, is_compressed, &mismatch_count))) {
+ && (exit_status = verify_checksum(
+ buf, is_encrypted,
+ &mismatch_count, flags))) {
goto my_exit;
}
- if ((exit_status = rewrite_checksum(filename, fil_in, buf,
- zip_size, &pos, is_encrypted, is_compressed))) {
+ if ((exit_status = rewrite_checksum(
+ filename, fil_in, buf,
+ &pos, is_encrypted, flags))) {
goto my_exit;
}
diff --git a/extra/mariabackup/fil_cur.cc b/extra/mariabackup/fil_cur.cc
index 04bf2b1f64d..ff187182dca 100644
--- a/extra/mariabackup/fil_cur.cc
+++ b/extra/mariabackup/fil_cur.cc
@@ -312,6 +312,10 @@ static bool page_is_corrupted(const byte *page, ulint page_no,
return false;
}
+ if (space->full_crc32()) {
+ return buf_page_is_corrupted(true, page, space->flags);
+ }
+
/* Validate encrypted pages. The first page is never encrypted.
In the system tablespace, the first page would be written with
FIL_PAGE_FILE_FLUSH_LSN at shutdown, and if the LSN exceeds
@@ -344,7 +348,8 @@ static bool page_is_corrupted(const byte *page, ulint page_no,
}
if (page_type != FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED) {
- return buf_page_is_corrupted(true, tmp_page, 0, space);
+ return buf_page_is_corrupted(true, tmp_page,
+ space->flags);
}
}
@@ -363,10 +368,10 @@ static bool page_is_corrupted(const byte *page, ulint page_no,
|| page_type == FIL_PAGE_PAGE_COMPRESSED
|| page_type == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED
|| buf_page_is_corrupted(true, tmp_page,
- space->zip_size(), space));
+ space->flags));
}
- return buf_page_is_corrupted(true, page, space->zip_size(), space);
+ return buf_page_is_corrupted(true, page, space->flags);
}
/************************************************************************
diff --git a/extra/mariabackup/xtrabackup.cc b/extra/mariabackup/xtrabackup.cc
index 60df902e098..0a547f21eff 100644
--- a/extra/mariabackup/xtrabackup.cc
+++ b/extra/mariabackup/xtrabackup.cc
@@ -1689,7 +1689,7 @@ xb_get_one_option(int optid,
case OPT_INNODB_CHECKSUM_ALGORITHM:
- ut_a(srv_checksum_algorithm <= SRV_CHECKSUM_ALGORITHM_STRICT_NONE);
+ ut_a(srv_checksum_algorithm <= SRV_CHECKSUM_ALGORITHM_STRICT_FULL_CRC32);
ADD_PRINT_PARAM_OPT(innodb_checksum_algorithm_names[srv_checksum_algorithm]);
break;
@@ -1864,7 +1864,15 @@ static bool innodb_init_param()
srv_sys_space.set_space_id(TRX_SYS_SPACE);
srv_sys_space.set_name("innodb_system");
srv_sys_space.set_path(srv_data_home);
- srv_sys_space.set_flags(FSP_FLAGS_PAGE_SSIZE());
+ switch (srv_checksum_algorithm) {
+ case SRV_CHECKSUM_ALGORITHM_FULL_CRC32:
+ case SRV_CHECKSUM_ALGORITHM_STRICT_FULL_CRC32:
+ srv_sys_space.set_flags(FSP_FLAGS_FCRC32_MASK_MARKER
+ | FSP_FLAGS_FCRC32_PAGE_SSIZE());
+ break;
+ default:
+ srv_sys_space.set_flags(FSP_FLAGS_PAGE_SSIZE());
+ }
if (!srv_sys_space.parse_params(innobase_data_file_path, true)) {
goto error;
@@ -3278,7 +3286,8 @@ static dberr_t xb_assign_undo_space_start()
bool ret;
dberr_t error = DB_SUCCESS;
ulint space, page_no __attribute__((unused));
- int n_retries = 5;
+ int n_retries = 5;
+ ulint fsp_flags;
if (srv_undo_tablespaces == 0) {
return error;
@@ -3295,6 +3304,15 @@ static dberr_t xb_assign_undo_space_start()
buf = static_cast<byte*>(ut_malloc_nokey(2U << srv_page_size_shift));
page = static_cast<byte*>(ut_align(buf, srv_page_size));
+ if (!os_file_read(IORequestRead, file, page,
+ 0, srv_page_size)) {
+ msg("Reading first page failed.\n");
+ error = DB_ERROR;
+ goto func_exit;
+ }
+
+ fsp_flags = mach_read_from_4(
+ page + FSP_HEADER_OFFSET + FSP_SPACE_FLAGS);
retry:
if (!os_file_read(IORequestRead, file, page,
TRX_SYS_PAGE_NO << srv_page_size_shift,
@@ -3305,7 +3323,7 @@ retry:
}
/* TRX_SYS page can't be compressed or encrypted. */
- if (buf_page_is_corrupted(false, page, 0)) {
+ if (buf_page_is_corrupted(false, page, fsp_flags)) {
if (n_retries--) {
os_thread_sleep(1000);
goto retry;
@@ -4586,7 +4604,9 @@ xb_space_create_file(
const ulint zip_size = fil_space_t::zip_size(flags);
if (!zip_size) {
- buf_flush_init_for_writing(NULL, page, NULL, 0);
+ buf_flush_init_for_writing(
+ NULL, page, NULL, 0,
+ fil_space_t::full_crc32(flags));
ret = os_file_write(IORequestWrite, path, *file, page, 0,
srv_page_size);
@@ -4602,7 +4622,7 @@ xb_space_create_file(
page_zip.m_end = page_zip.m_nonempty =
page_zip.n_blobs = 0;
- buf_flush_init_for_writing(NULL, page, &page_zip, 0);
+ buf_flush_init_for_writing(NULL, page, &page_zip, 0, false);
ret = os_file_write(IORequestWrite, path, *file,
page_zip.data, 0, zip_size);