summaryrefslogtreecommitdiff
path: root/storage/innobase/include
diff options
context:
space:
mode:
Diffstat (limited to 'storage/innobase/include')
-rw-r--r--storage/innobase/include/buf0buf.h27
-rw-r--r--storage/innobase/include/buf0buf.inl3
-rw-r--r--storage/innobase/include/buf0flu.h29
-rw-r--r--storage/innobase/include/data0data.h10
-rw-r--r--storage/innobase/include/dict0dict.h20
-rw-r--r--storage/innobase/include/dict0mem.h32
-rw-r--r--storage/innobase/include/fil0fil.h92
-rw-r--r--storage/innobase/include/fsp0file.h5
-rw-r--r--storage/innobase/include/fsp0sysspace.h9
-rw-r--r--storage/innobase/include/ibuf0ibuf.inl18
-rw-r--r--storage/innobase/include/log0crypt.h68
-rw-r--r--storage/innobase/include/log0log.h814
-rw-r--r--storage/innobase/include/log0log.inl311
-rw-r--r--storage/innobase/include/log0recv.h120
-rw-r--r--storage/innobase/include/mtr0log.h3
-rw-r--r--storage/innobase/include/mtr0mtr.h37
-rw-r--r--storage/innobase/include/mtr0mtr.inl19
-rw-r--r--storage/innobase/include/mtr0types.h29
-rw-r--r--storage/innobase/include/os0file.h10
-rw-r--r--storage/innobase/include/page0cur.inl3
-rw-r--r--storage/innobase/include/page0page.h2
-rw-r--r--storage/innobase/include/rem0cmp.h185
-rw-r--r--storage/innobase/include/rem0cmp.inl107
-rw-r--r--storage/innobase/include/row0merge.h1
-rw-r--r--storage/innobase/include/srv0mon.h9
-rw-r--r--storage/innobase/include/srv0srv.h45
-rw-r--r--storage/innobase/include/univ.i3
-rw-r--r--storage/innobase/include/ut0crc32.h37
-rw-r--r--storage/innobase/include/ut0ut.h10
29 files changed, 616 insertions, 1442 deletions
diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h
index 0c61888e5cc..24a092ed5b5 100644
--- a/storage/innobase/include/buf0buf.h
+++ b/storage/innobase/include/buf0buf.h
@@ -808,7 +808,7 @@ public:
{
ut_ad(fsp_is_system_temporary(id().space()));
ut_ad(in_file());
- ut_ad(!oldest_modification());
+ ut_ad(!oldest_modification() || oldest_modification() == 2);
oldest_modification_= 2;
}
@@ -1743,6 +1743,12 @@ public:
FlushHp flush_hp;
/** modified blocks (a subset of LRU) */
UT_LIST_BASE_NODE_T(buf_page_t) flush_list;
+ /** number of blocks ever added to flush_list;
+ sometimes protected by flush_list_mutex */
+ size_t flush_list_requests;
+
+ TPOOL_SUPPRESS_TSAN void add_flush_list_requests(size_t size)
+ { ut_ad(size); flush_list_requests+= size; }
private:
/** whether the page cleaner needs wakeup from indefinite sleep */
bool page_cleaner_is_idle;
@@ -1753,7 +1759,7 @@ public:
pthread_cond_t do_flush_list;
/** @return whether the page cleaner must sleep due to being idle */
- bool page_cleaner_idle() const
+ bool page_cleaner_idle() const noexcept
{
mysql_mutex_assert_owner(&flush_list_mutex);
return page_cleaner_is_idle;
@@ -1878,24 +1884,31 @@ public:
private:
/** Remove a block from the flush list. */
- inline void delete_from_flush_list_low(buf_page_t *bpage);
+ inline void delete_from_flush_list_low(buf_page_t *bpage) noexcept;
/** Remove a block from flush_list.
@param bpage buffer pool page
@param clear whether to invoke buf_page_t::clear_oldest_modification() */
- void delete_from_flush_list(buf_page_t *bpage, bool clear);
+ void delete_from_flush_list(buf_page_t *bpage, bool clear) noexcept;
public:
/** Remove a block from flush_list.
@param bpage buffer pool page */
- void delete_from_flush_list(buf_page_t *bpage)
+ void delete_from_flush_list(buf_page_t *bpage) noexcept
{ delete_from_flush_list(bpage, true); }
+ /** Prepare to insert a modified blcok into flush_list.
+ @param lsn start LSN of the mini-transaction
+ @return insert position for insert_into_flush_list() */
+ inline buf_page_t *prepare_insert_into_flush_list(lsn_t lsn) noexcept;
+
/** Insert a modified block into the flush list.
+ @param prev insert position (from prepare_insert_into_flush_list())
@param block modified block
@param lsn start LSN of the mini-transaction that modified the block */
- void insert_into_flush_list(buf_block_t *block, lsn_t lsn);
+ inline void insert_into_flush_list(buf_page_t *prev, buf_block_t *block,
+ lsn_t lsn) noexcept;
/** Free a page whose underlying file page has been freed. */
- inline void release_freed_page(buf_page_t *bpage);
+ inline void release_freed_page(buf_page_t *bpage) noexcept;
private:
/** Temporary memory for page_compressed and encrypted I/O */
diff --git a/storage/innobase/include/buf0buf.inl b/storage/innobase/include/buf0buf.inl
index 3c4da98f83b..4516a24803c 100644
--- a/storage/innobase/include/buf0buf.inl
+++ b/storage/innobase/include/buf0buf.inl
@@ -2,7 +2,7 @@
Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2008, Google Inc.
-Copyright (c) 2014, 2021, MariaDB Corporation.
+Copyright (c) 2014, 2022, MariaDB Corporation.
Portions of this file contain modifications contributed and copyrighted by
Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -141,4 +141,3 @@ buf_block_get_modify_clock(
ut_ad(block->page.lock.have_any());
return(block->modify_clock);
}
-
diff --git a/storage/innobase/include/buf0flu.h b/storage/innobase/include/buf0flu.h
index 665fd1115e7..af38f61b13b 100644
--- a/storage/innobase/include/buf0flu.h
+++ b/storage/innobase/include/buf0flu.h
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2014, 2021, MariaDB Corporation.
+Copyright (c) 2014, 2022, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -103,33 +103,6 @@ ATTRIBUTE_COLD void buf_flush_wait_flushed(lsn_t sync_lsn);
@param furious true=furious flushing, false=limit to innodb_io_capacity */
ATTRIBUTE_COLD void buf_flush_ahead(lsn_t lsn, bool furious);
-/********************************************************************//**
-This function should be called at a mini-transaction commit, if a page was
-modified in it. Puts the block to the list of modified blocks, if it not
-already in it. */
-inline void buf_flush_note_modification(buf_block_t *b, lsn_t start, lsn_t end)
-{
- ut_ad(!srv_read_only_mode);
- ut_d(const auto s= b->page.state());
- ut_ad(s > buf_page_t::FREED);
- ut_ad(s < buf_page_t::READ_FIX);
- ut_ad(mach_read_from_8(b->page.frame + FIL_PAGE_LSN) <= end);
- mach_write_to_8(b->page.frame + FIL_PAGE_LSN, end);
- if (UNIV_LIKELY_NULL(b->page.zip.data))
- memcpy_aligned<8>(FIL_PAGE_LSN + b->page.zip.data,
- FIL_PAGE_LSN + b->page.frame, 8);
-
- const lsn_t oldest_modification= b->page.oldest_modification();
-
- if (oldest_modification > 1)
- ut_ad(oldest_modification <= start);
- else if (fsp_is_system_temporary(b->page.id().space()))
- b->page.set_temp_modified();
- else
- buf_pool.insert_into_flush_list(b, start);
- srv_stats.buf_pool_write_requests.inc();
-}
-
/** Initialize page_cleaner. */
ATTRIBUTE_COLD void buf_flush_page_cleaner_init();
diff --git a/storage/innobase/include/data0data.h b/storage/innobase/include/data0data.h
index c2b8c3e00b6..a9b4aeb1ea3 100644
--- a/storage/innobase/include/data0data.h
+++ b/storage/innobase/include/data0data.h
@@ -316,16 +316,6 @@ dtuple_get_n_ext(
/*=============*/
const dtuple_t* tuple) /*!< in: tuple */
MY_ATTRIBUTE((nonnull));
-/** Compare two data tuples.
-@param[in] tuple1 first data tuple
-@param[in] tuple2 second data tuple
-@return positive, 0, negative if tuple1 is greater, equal, less, than tuple2,
-respectively */
-int
-dtuple_coll_cmp(
- const dtuple_t* tuple1,
- const dtuple_t* tuple2)
- MY_ATTRIBUTE((warn_unused_result));
/** Fold a prefix given as the number of fields of a tuple.
@param[in] tuple index record
@param[in] n_fields number of complete fields to fold
diff --git a/storage/innobase/include/dict0dict.h b/storage/innobase/include/dict0dict.h
index 57af86bcf85..f580a0f49b5 100644
--- a/storage/innobase/include/dict0dict.h
+++ b/storage/innobase/include/dict0dict.h
@@ -1092,16 +1092,16 @@ dict_table_get_nth_col_pos(
ulint n, /*!< in: column number */
ulint* prefix_col_pos) /*!< out: col num if prefix */
MY_ATTRIBUTE((nonnull(1), warn_unused_result));
-/*******************************************************************//**
-Adds a column to index. */
-void
-dict_index_add_col(
-/*===============*/
- dict_index_t* index, /*!< in/out: index */
- const dict_table_t* table, /*!< in: table */
- dict_col_t* col, /*!< in: column */
- ulint prefix_len) /*!< in: column prefix length */
- MY_ATTRIBUTE((nonnull));
+/** Add a column to an index.
+@param index index
+@param table table
+@param col column
+@param prefix_len column prefix length
+@param descending whether to use descending order */
+void dict_index_add_col(dict_index_t *index, const dict_table_t *table,
+ dict_col_t *col, ulint prefix_len,
+ bool descending= false)
+ MY_ATTRIBUTE((nonnull));
/*******************************************************************//**
Copies types of fields contained in index to tuple. */
diff --git a/storage/innobase/include/dict0mem.h b/storage/innobase/include/dict0mem.h
index aaf232366e3..5a316f8c734 100644
--- a/storage/innobase/include/dict0mem.h
+++ b/storage/innobase/include/dict0mem.h
@@ -396,18 +396,7 @@ dict_mem_index_create(
ulint type, /*!< in: DICT_UNIQUE,
DICT_CLUSTERED, ... ORed */
ulint n_fields); /*!< in: number of fields */
-/**********************************************************************//**
-Adds a field definition to an index. NOTE: does not take a copy
-of the column name if the field is a column. The memory occupied
-by the column name may be released only after publishing the index. */
-void
-dict_mem_index_add_field(
-/*=====================*/
- dict_index_t* index, /*!< in: index */
- const char* name, /*!< in: column name */
- ulint prefix_len); /*!< in: 0 or the column prefix length
- in a MySQL index like
- INDEX (textcol(25)) */
+
/**********************************************************************//**
Frees an index memory object. */
void
@@ -886,9 +875,11 @@ struct dict_field_t{
unsigned fixed_len:10; /*!< 0 or the fixed length of the
column if smaller than
DICT_ANTELOPE_MAX_INDEX_COL_LEN */
+ /** 1=DESC, 0=ASC */
+ unsigned descending:1;
/** Zero-initialize all fields */
- dict_field_t() : col(NULL), name(NULL), prefix_len(0), fixed_len(0) {}
+ dict_field_t() { memset((void*) this, 0, sizeof *this); }
/** Check whether two index fields are equivalent.
@param[in] old the other index field
@@ -1434,6 +1425,21 @@ inline void dict_col_t::detach(const dict_index_t &index)
reinterpret_cast<dict_v_col_t*>(this)->detach(index);
}
+/** Add a field definition to an index.
+@param index index
+@param name pointer to column name
+@param prefix_len column prefix length, or 0
+@param descending whether to use descending order */
+inline void dict_mem_index_add_field(dict_index_t *index, const char *name,
+ ulint prefix_len, bool descending= false)
+{
+ ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
+ dict_field_t &field= index->fields[index->n_def++];
+ field.name= name;
+ field.prefix_len= prefix_len & ((1U << 12) - 1);
+ field.descending= descending;
+}
+
/** The status of online index creation */
enum online_index_status {
/** the index is complete and ready for access */
diff --git a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h
index 79583000173..d6c7e07eaf5 100644
--- a/storage/innobase/include/fil0fil.h
+++ b/storage/innobase/include/fil0fil.h
@@ -353,13 +353,10 @@ struct fil_space_t final
/** fil_system.spaces chain node */
fil_space_t *hash;
- lsn_t max_lsn;
- /*!< LSN of the most recent
- fil_names_write_if_was_clean().
- Reset to 0 by fil_names_clear().
- Protected by log_sys.mutex.
- If and only if this is nonzero, the
- tablespace will be in named_spaces. */
+ /** LSN of the most recent fil_names_write_if_was_clean().
+ Reset to 0 by fil_names_clear(). Protected by exclusive log_sys.latch.
+ If and only if max_lsn is nonzero, this is in fil_system.named_spaces. */
+ lsn_t max_lsn;
/** tablespace identifier */
uint32_t id;
/** whether undo tablespace truncation is in progress */
@@ -427,9 +424,10 @@ private:
/** Whether any corrupton of this tablespace has been reported */
mutable std::atomic_flag is_corrupted;
+public:
/** mutex to protect freed_ranges and last_freed_lsn */
std::mutex freed_range_mutex;
-
+private:
/** Ranges of freed page numbers; protected by freed_range_mutex */
range_set freed_ranges;
@@ -649,11 +647,7 @@ public:
/** @return last_freed_lsn */
lsn_t get_last_freed_lsn() { return last_freed_lsn; }
/** Update last_freed_lsn */
- void update_last_freed_lsn(lsn_t lsn)
- {
- std::lock_guard<std::mutex> freed_lock(freed_range_mutex);
- last_freed_lsn= lsn;
- }
+ void update_last_freed_lsn(lsn_t lsn) { last_freed_lsn= lsn; }
/** Note that the file will need fsync().
@return whether this needs to be added to fil_system.unflushed_spaces */
@@ -674,11 +668,7 @@ public:
/** Clear all freed ranges for undo tablespace when InnoDB
encounters TRIM redo log record */
- void clear_freed_ranges()
- {
- std::lock_guard<std::mutex> freed_lock(freed_range_mutex);
- freed_ranges.clear();
- }
+ void clear_freed_ranges() { freed_ranges.clear(); }
#endif /* !UNIV_INNOCHECKSUM */
/** FSP_SPACE_FLAGS and FSP_FLAGS_MEM_ flags;
check fsp0types.h to more info about flags. */
@@ -951,7 +941,6 @@ public:
/** Add the set of freed page ranges */
void add_free_range(const range_t range)
{
- std::lock_guard<std::mutex> freed_lock(freed_range_mutex);
freed_ranges.add_range(range);
}
@@ -1052,7 +1041,7 @@ struct fil_node_t final
{
/** tablespace containing this file */
fil_space_t *space;
- /** file name; protected by fil_system.mutex and log_sys.mutex */
+ /** file name; protected by fil_system.mutex and exclusive log_sys.latch */
char *name;
/** file handle */
pfs_os_file_t handle;
@@ -1444,14 +1433,12 @@ public:
/** nonzero if fil_node_open_file_low() should avoid moving the tablespace
to the end of space_list, for FIFO policy of try_to_close() */
ulint freeze_space_list;
+ /** list of all tablespaces */
ilist<fil_space_t, space_list_tag_t> space_list;
- /*!< list of all file spaces */
+ /** list of all tablespaces for which a FILE_MODIFY record has been written
+ since the latest redo log checkpoint.
+ Protected only by exclusive log_sys.latch. */
ilist<fil_space_t, named_spaces_tag_t> named_spaces;
- /*!< list of all file spaces
- for which a FILE_MODIFY
- record has been written since
- the latest redo log checkpoint.
- Protected only by log_sys.mutex. */
/** list of all ENCRYPTED=DEFAULT tablespaces that need
to be converted to the current value of innodb_encrypt_tables */
@@ -1603,15 +1590,6 @@ Sets the max tablespace id counter if the given number is bigger than the
previous value. */
void fil_set_max_space_id_if_bigger(uint32_t max_id);
-/** Write the flushed LSN to the page header of the first page in the
-system tablespace.
-@param[in] lsn flushed LSN
-@return DB_SUCCESS or error number */
-dberr_t
-fil_write_flushed_lsn(
- lsn_t lsn)
-MY_ATTRIBUTE((warn_unused_result));
-
MY_ATTRIBUTE((warn_unused_result))
/** Delete a tablespace and associated .ibd file.
@param id tablespace identifier
@@ -1780,50 +1758,14 @@ void
fil_names_dirty(
fil_space_t* space);
-/** Write FILE_MODIFY records when a non-predefined persistent
-tablespace was modified for the first time since the latest
-fil_names_clear().
-@param[in,out] space tablespace */
-void fil_names_dirty_and_write(fil_space_t* space);
-
-/** Write FILE_MODIFY records if a persistent tablespace was modified
-for the first time since the latest fil_names_clear().
-@param[in,out] space tablespace
-@param[in,out] mtr mini-transaction
-@return whether any FILE_MODIFY record was written */
-inline bool fil_names_write_if_was_clean(fil_space_t* space)
-{
- mysql_mutex_assert_owner(&log_sys.mutex);
-
- if (space == NULL) {
- return(false);
- }
-
- const bool was_clean = space->max_lsn == 0;
- ut_ad(space->max_lsn <= log_sys.get_lsn());
- space->max_lsn = log_sys.get_lsn();
-
- if (was_clean) {
- fil_names_dirty_and_write(space);
- }
-
- return(was_clean);
-}
-
bool fil_comp_algo_loaded(ulint comp_algo);
/** On a log checkpoint, reset fil_names_dirty_and_write() flags
-and write out FILE_MODIFY and FILE_CHECKPOINT if needed.
-@param[in] lsn checkpoint LSN
-@param[in] do_write whether to always write FILE_CHECKPOINT
-@return whether anything was written to the redo log
-@retval false if no flags were set and nothing written
-@retval true if anything was written to the redo log */
-bool
-fil_names_clear(
- lsn_t lsn,
- bool do_write);
+and write out FILE_MODIFY if needed, and write FILE_CHECKPOINT.
+@param lsn checkpoint LSN
+@return current LSN */
+lsn_t fil_names_clear(lsn_t lsn);
#ifdef UNIV_ENABLE_UNIT_TEST_MAKE_FILEPATH
void test_make_filepath();
diff --git a/storage/innobase/include/fsp0file.h b/storage/innobase/include/fsp0file.h
index 9dfb3cc7561..ce11b868bd1 100644
--- a/storage/innobase/include/fsp0file.h
+++ b/storage/innobase/include/fsp0file.h
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 2013, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2018, 2021, MariaDB Corporation.
+Copyright (c) 2018, 2022, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -216,11 +216,10 @@ public:
tablespace is opened. This occurs before the fil_space_t is created
so the Space ID found here must not already be open.
m_is_valid is set true on success, else false.
- @param[out] flush_lsn contents of FIL_PAGE_FILE_FLUSH_LSN
@retval DB_SUCCESS on if the datafile is valid
@retval DB_CORRUPTION if the datafile is not readable
@retval DB_TABLESPACE_EXISTS if there is a duplicate space_id */
- dberr_t validate_first_page(lsn_t* flush_lsn)
+ dberr_t validate_first_page()
MY_ATTRIBUTE((warn_unused_result));
/** Get Datafile::m_filepath.
diff --git a/storage/innobase/include/fsp0sysspace.h b/storage/innobase/include/fsp0sysspace.h
index b6bdadd3501..514f3fdbf25 100644
--- a/storage/innobase/include/fsp0sysspace.h
+++ b/storage/innobase/include/fsp0sysspace.h
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 2013, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2016, 2021, MariaDB Corporation.
+Copyright (c) 2016, 2022, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -151,20 +151,17 @@ public:
@param[in] is_temp whether this is a temporary tablespace
@param[in] create_new_db whether we are creating a new database
@param[out] sum_new_sizes sum of sizes of the new files added
- @param[out] flush_lsn FIL_PAGE_FILE_FLUSH_LSN of first file
@return DB_SUCCESS or error code */
dberr_t open_or_create(
bool is_temp,
bool create_new_db,
- ulint* sum_new_sizes,
- lsn_t* flush_lsn)
+ ulint* sum_new_sizes)
MY_ATTRIBUTE((warn_unused_result));
private:
/** Check the tablespace header for this tablespace.
- @param[out] flushed_lsn the value of FIL_PAGE_FILE_FLUSH_LSN
@return DB_SUCCESS or error code */
- dberr_t read_lsn_and_check_flags(lsn_t* flushed_lsn);
+ inline dberr_t read_lsn_and_check_flags();
/**
@return true if the last file size is valid. */
diff --git a/storage/innobase/include/ibuf0ibuf.inl b/storage/innobase/include/ibuf0ibuf.inl
index 2c2620511c7..2d8265d2206 100644
--- a/storage/innobase/include/ibuf0ibuf.inl
+++ b/storage/innobase/include/ibuf0ibuf.inl
@@ -1,6 +1,7 @@
/*****************************************************************************
Copyright (c) 1997, 2015, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2021, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -130,12 +131,17 @@ ibuf_should_try(
a secondary index when we
decide */
{
- return(innodb_change_buffering
- && ibuf.max_size != 0
- && !dict_index_is_clust(index)
- && !dict_index_is_spatial(index)
- && index->table->quiesce == QUIESCE_NONE
- && (ignore_sec_unique || !dict_index_is_unique(index)));
+ if (!innodb_change_buffering || !ibuf.max_size || index->is_clust() ||
+ index->is_spatial())
+ return false;
+ if (!ignore_sec_unique && index->is_unique())
+ return false;
+ if (index->table->quiesce != QUIESCE_NONE)
+ return false;
+ for (unsigned i= 0; i < index->n_fields; i++)
+ if (index->fields[i].descending)
+ return false;
+ return true;
}
/******************************************************************//**
diff --git a/storage/innobase/include/log0crypt.h b/storage/innobase/include/log0crypt.h
index b9390927ece..22c0c9636bf 100644
--- a/storage/innobase/include/log0crypt.h
+++ b/storage/innobase/include/log0crypt.h
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (C) 2013, 2015, Google Inc. All Rights Reserved.
-Copyright (C) 2014, 2021, MariaDB Corporation.
+Copyright (C) 2014, 2022, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -24,27 +24,26 @@ Created 11/25/2013 Minli Zhu
Modified Jan Lindström jan.lindstrom@mariadb.com
MDEV-11782: Rewritten for MariaDB 10.2 by Marko Mäkelä, MariaDB Corporation.
*******************************************************/
-#ifndef log0crypt_h
-#define log0crypt_h
+#pragma once
#include "log0log.h"
-/** innodb_encrypt_log: whether to encrypt the redo log */
-extern my_bool srv_encrypt_log;
-
/** Initialize the redo log encryption key and random parameters
when creating a new redo log.
-The random parameters will be persisted in the log checkpoint pages.
-@see log_crypt_write_checkpoint_buf()
-@see log_crypt_read_checkpoint_buf()
+The random parameters will be persisted in the log header.
+@see log_crypt_write_header()
+@see log_crypt_read_header()
@return whether the operation succeeded */
bool log_crypt_init();
-/*********************************************************************//**
-Writes the crypto (version, msg and iv) info, which has been used for
-log blocks with lsn <= this checkpoint's lsn, to a log header's
-checkpoint buf. */
-void log_crypt_write_checkpoint_buf(byte *buf);
+/** Add the encryption information to the log header buffer.
+@param buf part of log header buffer */
+void log_crypt_write_header(byte *buf);
+
+/** Read the encryption information from a redo log checkpoint buffer.
+@param buf part of checkpoint buffer
+@return whether the operation was successful */
+bool log_crypt_read_header(const byte *buf);
/** Read the MariaDB 10.1 checkpoint crypto (version, msg and iv) info.
@param[in] buf checkpoint buffer
@@ -60,25 +59,28 @@ ATTRIBUTE_COLD bool log_crypt_101_read_block(byte* buf, lsn_t start_lsn);
/** Read the checkpoint crypto (version, msg and iv) info.
@param[in] buf checkpoint buffer
@return whether the operation was successful */
-bool log_crypt_read_checkpoint_buf(const byte* buf);
-
-/** log_crypt() operation code */
-enum log_crypt_t {
- /** encrypt a log block without rotating key */
- LOG_ENCRYPT,
- /** decrypt a log block */
- LOG_DECRYPT,
- /** attempt to rotate the key, and encrypt a log block */
- LOG_ENCRYPT_ROTATE_KEY
-};
-
-/** Encrypt or decrypt log blocks.
-@param[in,out] buf log blocks to encrypt or decrypt
+ATTRIBUTE_COLD bool log_crypt_read_checkpoint_buf(const byte* buf);
+
+/** Decrypt log blocks.
+@param[in,out] buf log blocks to decrypt
@param[in] lsn log sequence number of the start of the buffer
@param[in] size size of the buffer, in bytes
-@param[in] op whether to decrypt, encrypt, or rotate key and encrypt
-@return whether the operation succeeded (encrypt always does) */
-bool log_crypt(byte* buf, lsn_t lsn, ulint size, log_crypt_t op = LOG_ENCRYPT);
+@return whether the operation succeeded */
+ATTRIBUTE_COLD bool log_decrypt(byte* buf, lsn_t lsn, ulint size);
+
+/** Decrypt part of a log record.
+@param iv initialization vector
+@param buf buffer for the decrypted data
+@param data the encrypted data
+@param len length of the data, in bytes
+@return buf */
+byte *log_decrypt_buf(const byte *iv, byte *buf, const byte *data, uint len);
+
+/** Decrypt a log snippet.
+@param iv initialization vector
+@param buf buffer to be replaced with encrypted contents
+@param end pointer past the end of buf */
+void log_decrypt_buf(const byte *iv, byte *buf, const byte *const end);
/** Encrypt or decrypt a temporary file block.
@param[in] src block to encrypt or decrypt
@@ -111,7 +113,3 @@ log_tmp_block_decrypt(
{
return(log_tmp_block_encrypt(src, size, dst, offs, false));
}
-
-/** @return whether temporary files are encrypted */
-inline bool log_tmp_is_encrypted() { return srv_encrypt_log; }
-#endif // log0crypt.h
diff --git a/storage/innobase/include/log0log.h b/storage/innobase/include/log0log.h
index 629ddacdf1b..d1c6e40d946 100644
--- a/storage/innobase/include/log0log.h
+++ b/storage/innobase/include/log0log.h
@@ -2,7 +2,7 @@
Copyright (c) 1995, 2017, Oracle and/or its affiliates. All rights reserved.
Copyright (c) 2009, Google Inc.
-Copyright (c) 2017, 2021, MariaDB Corporation.
+Copyright (c) 2017, 2022, MariaDB Corporation.
Portions of this file contain modifications contributed and copyrighted by
Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -31,14 +31,13 @@ Database log
Created 12/9/1995 Heikki Tuuri
*******************************************************/
-#ifndef log0log_h
-#define log0log_h
+#pragma once
#include "log0types.h"
#include "os0file.h"
#include "span.h"
#include "my_atomic_wrapper.h"
-#include <vector>
+#include "srw_lock.h"
#include <string>
using st_::span;
@@ -51,77 +50,34 @@ static const char LOG_FILE_NAME[] = "ib_logfile0";
@return path with log file name*/
std::string get_log_file_path(const char *filename= LOG_FILE_NAME);
-/** Returns paths for all existing log files */
-std::vector<std::string> get_existing_log_files_paths();
-
/** Delete log file.
@param[in] suffix suffix of the file name */
static inline void delete_log_file(const char* suffix)
{
auto path = get_log_file_path(LOG_FILE_NAME_PREFIX).append(suffix);
- os_file_delete_if_exists(innodb_log_file_key, path.c_str(), nullptr);
+ os_file_delete_if_exists_func(path.c_str(), nullptr);
}
-/** Append a string to the log.
-@param[in] str string
-@param[in] len string length
-@param[out] start_lsn start LSN of the log record
-@return end lsn of the log record, zero if did not succeed */
-UNIV_INLINE
-lsn_t
-log_reserve_and_write_fast(
- const void* str,
- ulint len,
- lsn_t* start_lsn);
-/***********************************************************************//**
-Checks if there is need for a log buffer flush or a new checkpoint, and does
-this if yes. Any database operation should call this when it has modified
-more than about 4 pages. NOTE that this function may only be called when the
-OS thread owns no synchronization objects except dict_sys.latch. */
-UNIV_INLINE
-void
-log_free_check(void);
-/*================*/
-
-/** Extends the log buffer.
-@param[in] len requested minimum size in bytes */
-void log_buffer_extend(ulong len);
-
-/** Calculate the recommended highest values for lsn - last_checkpoint_lsn
-and lsn - buf_pool.get_oldest_modification().
-@param[in] file_size requested innodb_log_file_size
-@retval true on success
-@retval false if the smallest log is too small to
-accommodate the number of OS threads in the database server */
-bool
-log_set_capacity(ulonglong file_size)
- MY_ATTRIBUTE((warn_unused_result));
+struct completion_callback;
-/**
-Ensure that the log has been written to the log file up to a given
+/** Ensure that the log has been written to the log file up to a given
log entry (such as that of a transaction commit). Start a new write, or
wait and check if an already running write is covering the request.
-@param[in] lsn log sequence number that should be
-included in the redo log file write
-@param[in] flush_to_disk whether the written log should also
-be flushed to the file system
-@param[in] rotate_key whether to rotate the encryption key
-@param[in] cb completion callback. If not NULL, the callback will be called
- whenever lsn is written or flushed.
-*/
-struct completion_callback;
-void log_write_up_to(lsn_t lsn, bool flush_to_disk, bool rotate_key = false,
- const completion_callback* cb=nullptr);
+@param lsn log sequence number that should be included in the file write
+@param durable whether the write needs to be durable
+@param callback log write completion callback */
+void log_write_up_to(lsn_t lsn, bool durable,
+ const completion_callback *callback= nullptr);
/** Write to the log file up to the last log entry.
-@param sync whether to wait for a durable write to complete */
-void log_buffer_flush_to_disk(bool sync= true);
+@param durable whether to wait for a durable write to complete */
+void log_buffer_flush_to_disk(bool durable= true);
-/** Prepare to invoke log_write_and_flush(), before acquiring log_sys.mutex. */
+/** Prepare to invoke log_write_and_flush(), before acquiring log_sys.latch. */
ATTRIBUTE_COLD void log_write_and_flush_prepare();
-/** Durably write the log up to log_sys.lsn() and release log_sys.mutex. */
+/** Durably write the log up to log_sys.get_lsn(). */
ATTRIBUTE_COLD void log_write_and_flush();
/** Make a checkpoint */
@@ -130,10 +86,6 @@ ATTRIBUTE_COLD void log_make_checkpoint();
/** Make a checkpoint at the latest lsn on shutdown. */
ATTRIBUTE_COLD void logs_empty_and_mark_files_at_shutdown();
-/** Write checkpoint info to the log header and release log_sys.mutex.
-@param[in] end_lsn start LSN of the FILE_CHECKPOINT mini-transaction */
-ATTRIBUTE_COLD void log_write_checkpoint_info(lsn_t end_lsn);
-
/**
Checks that there is enough free space in the log to start a new query step.
Flushes the log buffer or makes a new checkpoint if necessary. NOTE: this
@@ -141,175 +93,12 @@ function may only be called if the calling thread owns no synchronization
objects! */
ATTRIBUTE_COLD void log_check_margins();
-/************************************************************//**
-Gets a log block flush bit.
-@return TRUE if this block was the first to be written in a log flush */
-UNIV_INLINE
-ibool
-log_block_get_flush_bit(
-/*====================*/
- const byte* log_block); /*!< in: log block */
-/************************************************************//**
-Gets a log block number stored in the header.
-@return log block number stored in the block header */
-UNIV_INLINE
-ulint
-log_block_get_hdr_no(
-/*=================*/
- const byte* log_block); /*!< in: log block */
-/************************************************************//**
-Gets a log block data length.
-@return log block data length measured as a byte offset from the block start */
-UNIV_INLINE
-ulint
-log_block_get_data_len(
-/*===================*/
- const byte* log_block); /*!< in: log block */
-/************************************************************//**
-Sets the log block data length. */
-UNIV_INLINE
-void
-log_block_set_data_len(
-/*===================*/
- byte* log_block, /*!< in/out: log block */
- ulint len); /*!< in: data length */
-/** Calculate the CRC-32C checksum of a log block.
-@param[in] block log block
-@return checksum */
-inline ulint log_block_calc_checksum_crc32(const byte* block);
-
-/************************************************************//**
-Gets a log block checksum field value.
-@return checksum */
-UNIV_INLINE
-ulint
-log_block_get_checksum(
-/*===================*/
- const byte* log_block); /*!< in: log block */
-/************************************************************//**
-Sets a log block checksum field value. */
-UNIV_INLINE
-void
-log_block_set_checksum(
-/*===================*/
- byte* log_block, /*!< in/out: log block */
- ulint checksum); /*!< in: checksum */
-/************************************************************//**
-Gets a log block first mtr log record group offset.
-@return first mtr log record group byte offset from the block start, 0
-if none */
-UNIV_INLINE
-ulint
-log_block_get_first_rec_group(
-/*==========================*/
- const byte* log_block); /*!< in: log block */
-/************************************************************//**
-Sets the log block first mtr log record group offset. */
-UNIV_INLINE
-void
-log_block_set_first_rec_group(
-/*==========================*/
- byte* log_block, /*!< in/out: log block */
- ulint offset); /*!< in: offset, 0 if none */
-/************************************************************//**
-Gets a log block checkpoint number field (4 lowest bytes).
-@return checkpoint no (4 lowest bytes) */
-UNIV_INLINE
-ulint
-log_block_get_checkpoint_no(
-/*========================*/
- const byte* log_block); /*!< in: log block */
-/************************************************************//**
-Initializes a log block in the log buffer. */
-UNIV_INLINE
-void
-log_block_init(
-/*===========*/
- byte* log_block, /*!< in: pointer to the log buffer */
- lsn_t lsn); /*!< in: lsn within the log block */
-/************************************************************//**
-Converts a lsn to a log block number.
-@return log block number, it is > 0 and <= 1G */
-UNIV_INLINE
-ulint
-log_block_convert_lsn_to_no(
-/*========================*/
- lsn_t lsn); /*!< in: lsn of a byte within the block */
/******************************************************//**
Prints info of the log. */
void
log_print(
/*======*/
FILE* file); /*!< in: file where to print */
-/**********************************************************************//**
-Refreshes the statistics used to print per-second averages. */
-void
-log_refresh_stats(void);
-/*===================*/
-
-/* The counting of lsn's starts from this value: this must be non-zero */
-#define LOG_START_LSN ((lsn_t) (16 * OS_FILE_LOG_BLOCK_SIZE))
-
-/* Offsets of a log block header */
-#define LOG_BLOCK_HDR_NO 0 /* block number which must be > 0 and
- is allowed to wrap around at 2G; the
- highest bit is set to 1 if this is the
- first log block in a log flush write
- segment */
-#define LOG_BLOCK_FLUSH_BIT_MASK 0x80000000UL
- /* mask used to get the highest bit in
- the preceding field */
-#define LOG_BLOCK_HDR_DATA_LEN 4 /* number of bytes of log written to
- this block */
-#define LOG_BLOCK_FIRST_REC_GROUP 6 /* offset of the first start of an
- mtr log record group in this log block,
- 0 if none; if the value is the same
- as LOG_BLOCK_HDR_DATA_LEN, it means
- that the first rec group has not yet
- been catenated to this log block, but
- if it will, it will start at this
- offset; an archive recovery can
- start parsing the log records starting
- from this offset in this log block,
- if value not 0 */
-#define LOG_BLOCK_CHECKPOINT_NO 8 /* 4 lower bytes of the value of
- log_sys.next_checkpoint_no when the
- log block was last written to: if the
- block has not yet been written full,
- this value is only updated before a
- log buffer flush */
-#define LOG_BLOCK_HDR_SIZE 12 /* size of the log block header in
- bytes */
-
-#define LOG_BLOCK_KEY 4 /* encryption key version
- before LOG_BLOCK_CHECKSUM;
- after log_t::FORMAT_ENC_10_4 only */
-#define LOG_BLOCK_CHECKSUM 4 /* 4 byte checksum of the log block
- contents; in InnoDB versions
- < 3.23.52 this did not contain the
- checksum but the same value as
- LOG_BLOCK_HDR_NO */
-
-/** Offsets inside the checkpoint pages (redo log format version 1) @{ */
-/** Checkpoint number */
-#define LOG_CHECKPOINT_NO 0
-/** Log sequence number up to which all changes have been flushed */
-#define LOG_CHECKPOINT_LSN 8
-/** Byte offset of the log record corresponding to LOG_CHECKPOINT_LSN */
-#define LOG_CHECKPOINT_OFFSET 16
-/** srv_log_buffer_size at the time of the checkpoint (not used) */
-#define LOG_CHECKPOINT_LOG_BUF_SIZE 24
-/** MariaDB 10.2.5 encrypted redo log encryption key version (32 bits)*/
-#define LOG_CHECKPOINT_CRYPT_KEY 32
-/** MariaDB 10.2.5 encrypted redo log random nonce (32 bits) */
-#define LOG_CHECKPOINT_CRYPT_NONCE 36
-/** MariaDB 10.2.5 encrypted redo log random message (MY_AES_BLOCK_SIZE) */
-#define LOG_CHECKPOINT_CRYPT_MESSAGE 40
-/** start LSN of the MLOG_CHECKPOINT mini-transaction corresponding
-to this checkpoint, or 0 if the information has not been written */
-#define LOG_CHECKPOINT_END_LSN OS_FILE_LOG_BLOCK_SIZE - 16
-
-/* @} */
/** Offsets of a log file header */
/* @{ */
@@ -317,12 +106,6 @@ to this checkpoint, or 0 if the information has not been written */
This used to be called LOG_GROUP_ID and always written as 0,
because InnoDB never supported more than one copy of the redo log. */
#define LOG_HEADER_FORMAT 0
-/** Redo log subformat (originally 0). In format version 0, the
-LOG_FILE_START_LSN started here, 4 bytes earlier than LOG_HEADER_START_LSN,
-which the LOG_FILE_START_LSN was renamed to.
-Subformat 1 is for the fully redo-logged TRUNCATE
-(no MLOG_TRUNCATE records or extra log checkpoints or log file) */
-#define LOG_HEADER_SUBFORMAT 4
/** LSN of the start of data in this log file (with format version 1;
in format version 0, it was called LOG_FILE_START_LSN and at offset 4). */
#define LOG_HEADER_START_LSN 8
@@ -331,123 +114,69 @@ and the creation time if the log file was created by mysqlbackup --restore,
or the MySQL version that created the redo log file. */
#define LOG_HEADER_CREATOR 16
/** End of the log file creator field. */
-#define LOG_HEADER_CREATOR_END (LOG_HEADER_CREATOR + 32)
-/** Contents of the LOG_HEADER_CREATOR field */
-#define LOG_HEADER_CREATOR_CURRENT \
- "MariaDB " \
- IB_TO_STR(MYSQL_VERSION_MAJOR) "." \
- IB_TO_STR(MYSQL_VERSION_MINOR) "." \
- IB_TO_STR(MYSQL_VERSION_PATCH)
-
+#define LOG_HEADER_CREATOR_END 48
/* @} */
-#define LOG_CHECKPOINT_1 OS_FILE_LOG_BLOCK_SIZE
- /* first checkpoint field in the log
- header; we write alternately to the
- checkpoint fields when we make new
- checkpoints; this field is only defined
- in the first log file of a log */
-#define LOG_CHECKPOINT_2 (3 * OS_FILE_LOG_BLOCK_SIZE)
- /* second checkpoint field in the log
- header */
-#define LOG_FILE_HDR_SIZE (4 * OS_FILE_LOG_BLOCK_SIZE)
-
-/** Abstraction for reading, writing and flushing file cache to disk */
-class file_io
-{
-public:
- file_io(bool durable_writes= false) : m_durable_writes(durable_writes) {}
- virtual ~file_io() noexcept {};
- virtual dberr_t open(const char *path, bool read_only) noexcept= 0;
- virtual dberr_t rename(const char *old_path,
- const char *new_path) noexcept= 0;
- virtual dberr_t close() noexcept= 0;
- virtual dberr_t read(os_offset_t offset, span<byte> buf) noexcept= 0;
- virtual dberr_t write(const char *path, os_offset_t offset,
- span<const byte> buf) noexcept= 0;
- virtual dberr_t flush() noexcept= 0;
-
- /** Durable writes doesn't require calling flush() */
- bool writes_are_durable() const noexcept { return m_durable_writes; }
-
-protected:
- bool m_durable_writes;
-};
+struct log_t;
-class file_os_io final: public file_io
-{
-public:
- file_os_io()= default;
- file_os_io(const file_os_io &)= delete;
- file_os_io &operator=(const file_os_io &)= delete;
- file_os_io(file_os_io &&rhs);
- file_os_io &operator=(file_os_io &&rhs);
- ~file_os_io() noexcept;
-
- dberr_t open(const char *path, bool read_only) noexcept final;
- bool is_opened() const noexcept { return m_fd != OS_FILE_CLOSED; }
- dberr_t rename(const char *old_path, const char *new_path) noexcept final;
- dberr_t close() noexcept final;
- dberr_t read(os_offset_t offset, span<byte> buf) noexcept final;
- dberr_t write(const char *path, os_offset_t offset,
- span<const byte> buf) noexcept final;
- dberr_t flush() noexcept final;
-
-private:
- pfs_os_file_t m_fd{OS_FILE_CLOSED};
-};
-
-/** File abstraction + path */
+/** File abstraction */
class log_file_t
{
+ friend log_t;
+ os_file_t m_file{OS_FILE_CLOSED};
public:
- log_file_t(std::string path= "") noexcept : m_path{std::move(path)} {}
-
- dberr_t open(bool read_only) noexcept;
- bool is_opened() const noexcept;
+ log_file_t()= default;
+ log_file_t(os_file_t file) noexcept : m_file(file) {}
- const std::string &get_path() const noexcept { return m_path; }
+ /** Open a file
+ @return file size in bytes
+ @retval 0 if not readable */
+ os_offset_t open(bool read_only) noexcept;
+ bool is_opened() const noexcept { return m_file != OS_FILE_CLOSED; }
- dberr_t rename(std::string new_path) noexcept;
dberr_t close() noexcept;
dberr_t read(os_offset_t offset, span<byte> buf) noexcept;
- bool writes_are_durable() const noexcept;
- dberr_t write(os_offset_t offset, span<const byte> buf) noexcept;
- dberr_t flush() noexcept;
- void free()
- {
- m_path.clear();
- m_path.shrink_to_fit();
- }
-
-private:
- std::unique_ptr<file_io> m_file;
- std::string m_path;
+ void write(os_offset_t offset, span<const byte> buf) noexcept;
+ bool flush() const noexcept { return os_file_flush(m_file); }
+#ifdef HAVE_PMEM
+ byte *mmap(bool read_only, const struct stat &st) noexcept;
+#endif
};
/** Redo log buffer */
-struct log_t{
+struct log_t
+{
/** The original (not version-tagged) InnoDB redo log format */
- static constexpr uint32_t FORMAT_3_23 = 0;
+ static constexpr uint32_t FORMAT_3_23= 0;
/** The MySQL 5.7.9/MariaDB 10.2.2 log format */
- static constexpr uint32_t FORMAT_10_2 = 1;
- /** The MariaDB 10.3.2 log format.
- To prevent crash-downgrade to earlier 10.2 due to the inability to
- roll back a retroactively introduced TRX_UNDO_RENAME_TABLE undo log record,
- MariaDB 10.2.18 and later will use the 10.3 format, but LOG_HEADER_SUBFORMAT
- 1 instead of 0. MariaDB 10.3 will use subformat 0 (5.7-style TRUNCATE) or 2
- (MDEV-13564 backup-friendly TRUNCATE). */
- static constexpr uint32_t FORMAT_10_3 = 103;
+ static constexpr uint32_t FORMAT_10_2= 1;
+ /** The MariaDB 10.3.2 log format. */
+ static constexpr uint32_t FORMAT_10_3= 103;
/** The MariaDB 10.4.0 log format. */
- static constexpr uint32_t FORMAT_10_4 = 104;
+ static constexpr uint32_t FORMAT_10_4= 104;
/** Encrypted MariaDB redo log */
- static constexpr uint32_t FORMAT_ENCRYPTED = 1U << 31;
+ static constexpr uint32_t FORMAT_ENCRYPTED= 1U << 31;
/** The MariaDB 10.4.0 log format (only with innodb_encrypt_log=ON) */
- static constexpr uint32_t FORMAT_ENC_10_4 = FORMAT_10_4 | FORMAT_ENCRYPTED;
- /** The MariaDB 10.5 physical redo log format */
- static constexpr uint32_t FORMAT_10_5 = 0x50485953;
- /** The MariaDB 10.5 physical format (only with innodb_encrypt_log=ON) */
- static constexpr uint32_t FORMAT_ENC_10_5 = FORMAT_10_5 | FORMAT_ENCRYPTED;
+ static constexpr uint32_t FORMAT_ENC_10_4= FORMAT_10_4 | FORMAT_ENCRYPTED;
+ /** The MariaDB 10.5.1 physical redo log format */
+ static constexpr uint32_t FORMAT_10_5= 0x50485953;
+ /** The MariaDB 10.5.1 physical format (only with innodb_encrypt_log=ON) */
+ static constexpr uint32_t FORMAT_ENC_10_5= FORMAT_10_5 | FORMAT_ENCRYPTED;
+ /** The MariaDB 10.8.0 variable-block-size redo log format */
+ static constexpr uint32_t FORMAT_10_8= 0x50687973;
+ /** The MariaDB 10.8.0 format with innodb_encrypt_log=ON */
+ static constexpr uint32_t FORMAT_ENC_10_8= FORMAT_10_8 | FORMAT_ENCRYPTED;
+
+ /** Location of the first checkpoint block */
+ static constexpr size_t CHECKPOINT_1= 4096;
+ /** Location of the second checkpoint block */
+ static constexpr size_t CHECKPOINT_2= 8192;
+ /** Start of record payload */
+ static constexpr lsn_t START_OFFSET= 12288;
+
+ /** smallest possible log sequence number in the current format
+ (used to be 2048 before FORMAT_10_8). */
+ static constexpr lsn_t FIRST_LSN= START_OFFSET;
private:
/** The log sequence number of the last change of durable InnoDB files */
@@ -459,127 +188,77 @@ private:
preflush buffer pool pages, or initiate a log checkpoint.
This must hold if lsn - last_checkpoint_lsn > max_checkpoint_age. */
std::atomic<bool> check_flush_or_checkpoint_;
+
+
+#if defined(__aarch64__)
+/* On ARM, we do more spinning */
+typedef srw_spin_lock log_rwlock_t;
+#define LSN_LOCK_ATTR MY_MUTEX_INIT_FAST
+#else
+typedef srw_lock log_rwlock_t;
+#define LSN_LOCK_ATTR nullptr
+#endif
+
public:
- /** mutex protecting the log */
- alignas(CPU_LEVEL1_DCACHE_LINESIZE) mysql_mutex_t mutex;
- /** first free offset within the log buffer in use */
- size_t buf_free;
- /** recommended maximum size of buf, after which the buffer is flushed */
- size_t max_buf_free;
- /** mutex to serialize access to the flush list when we are putting
- dirty blocks in the list. The idea behind this mutex is to be able
- to release log_sys.mutex during mtr_commit and still ensure that
- insertions in the flush_list happen in the LSN order. */
- alignas(CPU_LEVEL1_DCACHE_LINESIZE) mysql_mutex_t flush_order_mutex;
- /** log_buffer, append data here */
+ /** rw-lock protecting buf */
+ alignas(CPU_LEVEL1_DCACHE_LINESIZE) log_rwlock_t latch;
+private:
+ /** Last written LSN */
+ lsn_t write_lsn;
+public:
+ /** log record buffer, written to by mtr_t::commit() */
byte *buf;
- /** log_buffer, writing data to file from this buffer.
- Before flushing write_buf is swapped with flush_buf */
+ /** buffer for writing data to ib_logfile0, or nullptr if is_pmem()
+ In write_buf(), buf and flush_buf are swapped */
byte *flush_buf;
- /** Log file stuff. Protected by mutex. */
- struct file {
- /** format of the redo log: e.g., FORMAT_10_5 */
- uint32_t format;
- /** redo log subformat: 0 with separately logged TRUNCATE,
- 2 with fully redo-logged TRUNCATE (1 in MariaDB 10.2) */
- uint32_t subformat;
- /** individual log file size in bytes, including the header */
- lsn_t file_size;
- private:
- /** lsn used to fix coordinates within the log group */
- lsn_t lsn;
- /** the byte offset of the above lsn */
- lsn_t lsn_offset;
- /** log file */
- log_file_t fd;
-
- public:
- /** used only in recovery: recovery scan succeeded up to this
- lsn in this log group */
- lsn_t scanned_lsn;
-
- /** opens log file which must be closed prior this call */
- void open_file(std::string path);
- /** writes header */
- void write_header_durable(lsn_t lsn);
- /** opens log file which must be closed prior this call */
- dberr_t rename(std::string path) { return fd.rename(path); }
- /** reads buffer from log file
- @param[in] offset offset in log file
- @param[in] buf buffer where to read */
- void read(os_offset_t offset, span<byte> buf);
- /** Tells whether writes require calling flush() */
- bool writes_are_durable() const noexcept;
- /** writes buffer to log file
- @param[in] offset offset in log file
- @param[in] buf buffer from which to write */
- void write(os_offset_t offset, span<byte> buf);
- /** flushes OS page cache (excluding metadata!) for log file */
- void flush();
- /** closes log file */
- void close_file();
-
- /** @return whether the redo log is encrypted */
- bool is_encrypted() const { return format & FORMAT_ENCRYPTED; }
- /** @return whether the redo log is in the physical format */
- bool is_physical() const
- { return (format & ~FORMAT_ENCRYPTED) == FORMAT_10_5; }
- /** @return capacity in bytes */
- lsn_t capacity() const{ return file_size - LOG_FILE_HDR_SIZE; }
- /** Calculate the offset of a log sequence number.
- @param[in] lsn log sequence number
- @return offset within the log */
- inline lsn_t calc_lsn_offset(lsn_t lsn) const;
- inline lsn_t calc_lsn_offset_old(lsn_t lsn) const;
-
- /** Set the field values to correspond to a given lsn. */
- void set_fields(lsn_t lsn)
- {
- lsn_t c_lsn_offset = calc_lsn_offset(lsn);
- set_lsn(lsn);
- set_lsn_offset(c_lsn_offset);
- }
-
- /** Read a log segment to log_sys.buf.
- @param[in,out] start_lsn in: read area start,
- out: the last read valid lsn
- @param[in] end_lsn read area end
- @return whether no invalid blocks (e.g checksum mismatch) were found */
- bool read_log_seg(lsn_t* start_lsn, lsn_t end_lsn);
-
- /** Initialize the redo log buffer. */
- void create();
-
- /** Close the redo log buffer. */
- void close() { close_file(); }
- void set_lsn(lsn_t a_lsn);
- lsn_t get_lsn() const { return lsn; }
- void set_lsn_offset(lsn_t a_lsn);
- lsn_t get_lsn_offset() const { return lsn_offset; }
- } log;
-
- /** The fields involved in the log buffer flush @{ */
-
- size_t buf_next_to_write;/*!< first offset in the log buffer
- where the byte content may not exist
- written to file, e.g., the start
- offset of a log record catenated
- later; this is advanced when a flush
- operation is completed to all the log
- groups */
- lsn_t write_lsn; /*!< last written lsn */
- lsn_t current_flush_lsn;/*!< end lsn for the current running
- write + flush operation */
- std::atomic<size_t> pending_flushes; /*!< system calls in progress */
- std::atomic<size_t> flushes; /*!< system calls counter */
-
- ulint n_log_ios; /*!< number of log i/os initiated thus
- far */
- ulint n_log_ios_old; /*!< number of log i/o's at the
- previous printout */
- time_t last_printout_time;/*!< when log_print was last time
- called */
- /* @} */
+ /** number of std::swap(buf, flush_buf) and writes from buf to log;
+ protected by latch.wr_lock() */
+ ulint write_to_log;
+ /** innodb_log_buffer_size (size of buf and flush_buf, in bytes) */
+ size_t buf_size;
+
+private:
+ /** spin lock protecting lsn, buf_free in append_prepare() */
+ alignas(CPU_LEVEL1_DCACHE_LINESIZE) pthread_mutex_t lsn_lock;
+ void init_lsn_lock() { pthread_mutex_init(&lsn_lock, LSN_LOCK_ATTR); }
+ void lock_lsn() { pthread_mutex_lock(&lsn_lock); }
+ void unlock_lsn() { pthread_mutex_unlock(&lsn_lock); }
+ void destroy_lsn_lock() { pthread_mutex_destroy(&lsn_lock); }
+
+public:
+ /** first free offset within buf use; protected by lsn_lock */
+ Atomic_relaxed<size_t> buf_free;
+ /** number of write requests (to buf); protected by exclusive lsn_lock */
+ ulint write_to_buf;
+ /** number of waits in append_prepare(); protected by lsn_lock */
+ ulint waits;
+ /** recommended maximum size of buf, after which the buffer is flushed */
+ size_t max_buf_free;
+
+ /** log file size in bytes, including the header */
+ lsn_t file_size;
+private:
+ /** the log sequence number at the start of the log file */
+ lsn_t first_lsn;
+#if defined __linux__ || defined _WIN32
+ /** The physical block size of the storage */
+ uint32_t block_size;
+#endif
+public:
+ /** format of the redo log: e.g., FORMAT_10_8 */
+ uint32_t format;
+ /** Log file */
+ log_file_t log;
+#if defined __linux__ || defined _WIN32
+ /** whether file system caching is enabled for the log */
+ my_bool log_buffered;
+# ifdef _WIN32
+ static constexpr bool log_maybe_unbuffered= true;
+# else
+ /** whether file system caching may be disabled */
+ bool log_maybe_unbuffered;
+# endif
+#endif
/** Fields involved in checkpoints @{ */
lsn_t log_capacity; /*!< capacity of the log; if
@@ -597,12 +276,12 @@ public:
/*!< this is the maximum allowed value
for lsn - last_checkpoint_lsn when a
new query step is started */
- ib_uint64_t next_checkpoint_no;
- /*!< next checkpoint number */
- /** latest completed checkpoint (protected by log_sys.mutex) */
+ /** latest completed checkpoint (protected by latch.wr_lock()) */
Atomic_relaxed<lsn_t> last_checkpoint_lsn;
/** next checkpoint LSN (protected by log_sys.mutex) */
lsn_t next_checkpoint_lsn;
+ /** next checkpoint number (protected by latch.wr_lock()) */
+ ulint next_checkpoint_no;
/** whether a checkpoint is pending */
Atomic_relaxed<bool> checkpoint_pending;
@@ -610,32 +289,68 @@ public:
byte *checkpoint_buf;
/* @} */
-private:
- bool m_initialised;
-public:
- /**
- Constructor.
+ bool is_initialised() const noexcept { return max_buf_free != 0; }
- Some members may require late initialisation, thus we just mark object as
- uninitialised. Real initialisation happens in create().
- */
- log_t(): m_initialised(false) {}
+#ifdef HAVE_PMEM
+ bool is_pmem() const noexcept { return !flush_buf; }
+#else
+ static constexpr bool is_pmem() { return false; }
+#endif
- /** @return whether the redo log is encrypted */
- bool is_encrypted() const { return(log.is_encrypted()); }
- /** @return whether the redo log is in the physical format */
- bool is_physical() const { return log.is_physical(); }
+ bool is_opened() const noexcept { return log.is_opened(); }
+
+ static constexpr bool resize_in_progress() { return false; }
+
+ /** Rename a log file after resizing.
+ @return whether an error occurred */
+ static bool rename_resized() noexcept;
+
+#if defined __linux__ || defined _WIN32
+ /** Try to enable or disable file system caching (update log_buffered) */
+ void set_buffered(bool buffered);
+#endif
+
+ void attach(log_file_t file, os_offset_t size);
+
+ void close_file();
- bool is_initialised() const { return m_initialised; }
+ /** Calculate the checkpoint safety margins. */
+ static void set_capacity();
lsn_t get_lsn(std::memory_order order= std::memory_order_relaxed) const
{ return lsn.load(order); }
void set_lsn(lsn_t lsn) { this->lsn.store(lsn, std::memory_order_release); }
- lsn_t get_flushed_lsn() const
- { return flushed_to_disk_lsn.load(std::memory_order_acquire); }
- void set_flushed_lsn(lsn_t lsn)
- { flushed_to_disk_lsn.store(lsn, std::memory_order_release); }
+ lsn_t get_flushed_lsn(std::memory_order order= std::memory_order_acquire)
+ const noexcept
+ { return flushed_to_disk_lsn.load(order); }
+
+ /** Initialize the LSN on initial log file creation. */
+ lsn_t init_lsn() noexcept
+ {
+ latch.wr_lock(SRW_LOCK_CALL);
+ const lsn_t lsn{get_lsn()};
+ flushed_to_disk_lsn.store(lsn, std::memory_order_relaxed);
+ write_lsn= lsn;
+ latch.wr_unlock();
+ return lsn;
+ }
+
+ void set_recovered_lsn(lsn_t lsn) noexcept
+ {
+#ifndef SUX_LOCK_GENERIC
+ ut_ad(latch.is_write_locked());
+#endif /* SUX_LOCK_GENERIC */
+ write_lsn= lsn;
+ this->lsn.store(lsn, std::memory_order_relaxed);
+ flushed_to_disk_lsn.store(lsn, std::memory_order_relaxed);
+ }
+
+#ifdef HAVE_PMEM
+ /** Persist the log.
+ @param lsn desired new value of flushed_to_disk_lsn */
+ inline void persist(lsn_t lsn) noexcept;
+#endif
bool check_flush_or_checkpoint() const
{
@@ -645,96 +360,105 @@ public:
void set_check_flush_or_checkpoint(bool flag= true)
{ check_flush_or_checkpoint_.store(flag, std::memory_order_relaxed); }
- bool has_encryption_key_rotation() const {
- return log.format == FORMAT_ENC_10_4 || log.format == FORMAT_ENC_10_5;
- }
+ /** Make previous write_buf() durable and update flushed_to_disk_lsn. */
+ inline bool flush(lsn_t lsn) noexcept;
- /** @return the log block header + trailer size */
- unsigned framing_size() const
- {
- return has_encryption_key_rotation()
- ? LOG_BLOCK_HDR_SIZE + LOG_BLOCK_KEY + LOG_BLOCK_CHECKSUM
- : LOG_BLOCK_HDR_SIZE + LOG_BLOCK_CHECKSUM;
- }
- /** @return the log block payload size */
- unsigned payload_size() const
- {
- return has_encryption_key_rotation()
- ? OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_HDR_SIZE - LOG_BLOCK_CHECKSUM -
- LOG_BLOCK_KEY
- : OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_HDR_SIZE - LOG_BLOCK_CHECKSUM;
- }
- /** @return the log block trailer offset */
- unsigned trailer_offset() const
+ /** Initialise the redo log subsystem. */
+ void create();
+
+ /** Shut down the redo log subsystem. */
+ void close();
+
+#if defined __linux__ || defined _WIN32
+ /** @return the physical block size of the storage */
+ size_t get_block_size() const noexcept
+ { ut_ad(block_size); return block_size; }
+ /** Set the log block size for file I/O. */
+ void set_block_size(uint32_t size) noexcept { block_size= size; }
+#else
+ /** @return the physical block size of the storage */
+ static size_t get_block_size() { return 512; }
+#endif
+
+private:
+ /** Wait in append_prepare() for buffer to become available
+ @param ex whether log_sys.latch is exclusively locked */
+ ATTRIBUTE_COLD static void append_prepare_wait(bool ex) noexcept;
+public:
+ /** Reserve space in the log buffer for appending data.
+ @tparam pmem log_sys.is_pmem()
+ @param size total length of the data to append(), in bytes
+ @param ex whether log_sys.latch is exclusively locked
+ @return the start LSN and the buffer position for append() */
+ template<bool pmem>
+ inline std::pair<lsn_t,byte*> append_prepare(size_t size, bool ex) noexcept;
+
+ /** Append a string of bytes to the redo log.
+ @param d destination
+ @param s string of bytes
+ @param size length of str, in bytes */
+ void append(byte *&d, const void *s, size_t size) noexcept
{
- return has_encryption_key_rotation()
- ? OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_CHECKSUM - LOG_BLOCK_KEY
- : OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_CHECKSUM;
+#ifndef SUX_LOCK_GENERIC
+ ut_ad(latch.is_locked());
+#endif
+ ut_ad(d + size <= buf + (is_pmem() ? file_size : buf_size));
+ memcpy(d, s, size);
+ d+= size;
}
- size_t get_pending_flushes() const
+ /** Set the log file format. */
+ void set_latest_format(bool encrypted) noexcept
+ { format= encrypted ? FORMAT_ENC_10_8 : FORMAT_10_8; }
+ /** @return whether the redo log is encrypted */
+ bool is_encrypted() const noexcept { return format & FORMAT_ENCRYPTED; }
+ /** @return whether the redo log is in the latest format */
+ bool is_latest() const noexcept
+ { return (~FORMAT_ENCRYPTED & format) == FORMAT_10_8; }
+
+ /** @return capacity in bytes */
+ lsn_t capacity() const noexcept { return file_size - START_OFFSET; }
+
+ /** Set the LSN of the log file at file creation. */
+ void set_first_lsn(lsn_t lsn) noexcept { write_lsn= first_lsn= lsn; }
+ /** @return the first LSN of the log file */
+ lsn_t get_first_lsn() const noexcept { return first_lsn; }
+
+ /** Determine the sequence bit at a log sequence number */
+ byte get_sequence_bit(lsn_t lsn) const noexcept
{
- return pending_flushes.load(std::memory_order_relaxed);
+ ut_ad(lsn >= first_lsn);
+ return !(((lsn - first_lsn) / capacity()) & 1);
}
- size_t get_flushes() const
+ /** Calculate the offset of a log sequence number.
+ @param lsn log sequence number
+ @return byte offset within ib_logfile0 */
+ lsn_t calc_lsn_offset(lsn_t lsn) const noexcept
{
- return flushes.load(std::memory_order_relaxed);
+ ut_ad(lsn >= first_lsn);
+ return START_OFFSET + (lsn - first_lsn) % capacity();
}
- /** Initialise the redo log subsystem. */
- void create();
+ /** Write checkpoint information and invoke latch.wr_unlock().
+ @param end_lsn start LSN of the FILE_CHECKPOINT mini-transaction */
+ inline void write_checkpoint(lsn_t end_lsn) noexcept;
- /** Shut down the redo log subsystem. */
- void close();
+ /** Write buf to ib_logfile0.
+ @tparam release_latch whether to invoke latch.wr_unlock()
+ @return new write target
+ @retval 0 if everything was written */
+ template<bool release_latch> inline lsn_t write_buf() noexcept;
+
+ /** Create the log. */
+ void create(lsn_t lsn) noexcept;
};
/** Redo log system */
extern log_t log_sys;
-#ifdef UNIV_DEBUG
-extern bool log_write_lock_own();
-#endif
-
-/** Calculate the offset of a log sequence number.
-@param[in] lsn log sequence number
-@return offset within the log */
-inline lsn_t log_t::file::calc_lsn_offset(lsn_t lsn) const
-{
- ut_ad(this == &log_sys.log);
- /* The lsn parameters are updated while holding both the mutexes
- and it is ok to have either of them while reading */
-#ifdef SAFE_MUTEX
- ut_ad(mysql_mutex_is_owner(&log_sys.mutex) || log_write_lock_own());
-#endif /* SAFE_MUTEX */
- const lsn_t size = capacity();
- lsn_t l= lsn - this->lsn;
- if (longlong(l) < 0) {
- l = lsn_t(-longlong(l)) % size;
- l = size - l;
- }
-
- l+= lsn_offset - LOG_FILE_HDR_SIZE * (1 + lsn_offset / file_size);
- l %= size;
- return l + LOG_FILE_HDR_SIZE * (1 + l / (file_size - LOG_FILE_HDR_SIZE));
-}
-inline void log_t::file::set_lsn(lsn_t a_lsn)
+inline void log_free_check()
{
-#ifdef SAFE_MUTEX
- ut_ad(mysql_mutex_is_owner(&log_sys.mutex) || log_write_lock_own());
-#endif /* SAFE_MUTEX */
- lsn= a_lsn;
+ if (log_sys.check_flush_or_checkpoint())
+ log_check_margins();
}
-
-inline void log_t::file::set_lsn_offset(lsn_t a_lsn)
-{
-#ifdef SAFE_MUTEX
- ut_ad(mysql_mutex_is_owner(&log_sys.mutex) || log_write_lock_own());
-#endif /* SAFE_MUTEX */
- ut_ad((lsn % OS_FILE_LOG_BLOCK_SIZE) == (a_lsn % OS_FILE_LOG_BLOCK_SIZE));
- lsn_offset= a_lsn;
-}
-
-#include "log0log.inl"
-
-#endif
diff --git a/storage/innobase/include/log0log.inl b/storage/innobase/include/log0log.inl
deleted file mode 100644
index 73434737925..00000000000
--- a/storage/innobase/include/log0log.inl
+++ /dev/null
@@ -1,311 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, 2020, MariaDB Corporation.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/log0log.ic
-Database log
-
-Created 12/9/1995 Heikki Tuuri
-*******************************************************/
-
-#include "mach0data.h"
-#include "assume_aligned.h"
-#include "ut0crc32.h"
-
-extern ulong srv_log_buffer_size;
-
-/************************************************************//**
-Gets a log block flush bit.
-@return TRUE if this block was the first to be written in a log flush */
-UNIV_INLINE
-ibool
-log_block_get_flush_bit(
-/*====================*/
- const byte* log_block) /*!< in: log block */
-{
- static_assert(LOG_BLOCK_HDR_NO == 0, "compatibility");
- static_assert(LOG_BLOCK_FLUSH_BIT_MASK == 0x80000000, "compatibility");
-
- return *log_block & 0x80;
-}
-
-/************************************************************//**
-Sets the log block flush bit. */
-UNIV_INLINE
-void
-log_block_set_flush_bit(
-/*====================*/
- byte* log_block, /*!< in/out: log block */
- ibool val) /*!< in: value to set */
-{
- static_assert(LOG_BLOCK_HDR_NO == 0, "compatibility");
- static_assert(LOG_BLOCK_FLUSH_BIT_MASK == 0x80000000, "compatibility");
-
- if (val)
- *log_block|= 0x80;
- else
- *log_block&= 0x7f;
-}
-
-/************************************************************//**
-Gets a log block number stored in the header.
-@return log block number stored in the block header */
-UNIV_INLINE
-ulint
-log_block_get_hdr_no(
-/*=================*/
- const byte* log_block) /*!< in: log block */
-{
- static_assert(LOG_BLOCK_HDR_NO == 0, "compatibility");
- return mach_read_from_4(my_assume_aligned<4>(log_block)) &
- ~LOG_BLOCK_FLUSH_BIT_MASK;
-}
-
-/************************************************************//**
-Sets the log block number stored in the header; NOTE that this must be set
-before the flush bit! */
-UNIV_INLINE
-void
-log_block_set_hdr_no(
-/*=================*/
- byte* log_block, /*!< in/out: log block */
- ulint n) /*!< in: log block number: must be > 0 and
- < LOG_BLOCK_FLUSH_BIT_MASK */
-{
- static_assert(LOG_BLOCK_HDR_NO == 0, "compatibility");
- ut_ad(n > 0);
- ut_ad(n < LOG_BLOCK_FLUSH_BIT_MASK);
-
- mach_write_to_4(my_assume_aligned<4>(log_block), n);
-}
-
-/************************************************************//**
-Gets a log block data length.
-@return log block data length measured as a byte offset from the block start */
-UNIV_INLINE
-ulint
-log_block_get_data_len(
-/*===================*/
- const byte* log_block) /*!< in: log block */
-{
- return mach_read_from_2(my_assume_aligned<2>
- (log_block + LOG_BLOCK_HDR_DATA_LEN));
-}
-
-/************************************************************//**
-Sets the log block data length. */
-UNIV_INLINE
-void
-log_block_set_data_len(
-/*===================*/
- byte* log_block, /*!< in/out: log block */
- ulint len) /*!< in: data length */
-{
- mach_write_to_2(my_assume_aligned<2>(log_block + LOG_BLOCK_HDR_DATA_LEN),
- len);
-}
-
-/************************************************************//**
-Gets a log block first mtr log record group offset.
-@return first mtr log record group byte offset from the block start, 0
-if none */
-UNIV_INLINE
-ulint
-log_block_get_first_rec_group(
-/*==========================*/
- const byte* log_block) /*!< in: log block */
-{
- return mach_read_from_2(my_assume_aligned<2>
- (log_block + LOG_BLOCK_FIRST_REC_GROUP));
-}
-
-/************************************************************//**
-Sets the log block first mtr log record group offset. */
-UNIV_INLINE
-void
-log_block_set_first_rec_group(
-/*==========================*/
- byte* log_block, /*!< in/out: log block */
- ulint offset) /*!< in: offset, 0 if none */
-{
- mach_write_to_2(my_assume_aligned<2>
- (log_block + LOG_BLOCK_FIRST_REC_GROUP), offset);
-}
-
-/************************************************************//**
-Gets a log block checkpoint number field (4 lowest bytes).
-@return checkpoint no (4 lowest bytes) */
-UNIV_INLINE
-ulint
-log_block_get_checkpoint_no(
-/*========================*/
- const byte* log_block) /*!< in: log block */
-{
- return mach_read_from_4(my_assume_aligned<4>
- (log_block + LOG_BLOCK_CHECKPOINT_NO));
-}
-
-/************************************************************//**
-Sets a log block checkpoint number field (4 lowest bytes). */
-UNIV_INLINE
-void
-log_block_set_checkpoint_no(
-/*========================*/
- byte* log_block, /*!< in/out: log block */
- ib_uint64_t no) /*!< in: checkpoint no */
-{
- mach_write_to_4(my_assume_aligned<4>(log_block + LOG_BLOCK_CHECKPOINT_NO),
- static_cast<uint32_t>(no));
-}
-
-/************************************************************//**
-Converts a lsn to a log block number.
-@return log block number, it is > 0 and <= 1G */
-UNIV_INLINE
-ulint
-log_block_convert_lsn_to_no(
-/*========================*/
- lsn_t lsn) /*!< in: lsn of a byte within the block */
-{
- return(((ulint) (lsn / OS_FILE_LOG_BLOCK_SIZE) &
- (DBUG_IF("innodb_small_log_block_no_limit")
- ? 0xFUL : 0x3FFFFFFFUL)) + 1);
-}
-
-/** Calculate the CRC-32C checksum of a log block.
-@param[in] block log block
-@return checksum */
-inline ulint log_block_calc_checksum_crc32(const byte* block)
-{
- return ut_crc32(block, OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_CHECKSUM);
-}
-
-/************************************************************//**
-Gets a log block checksum field value.
-@return checksum */
-UNIV_INLINE
-ulint
-log_block_get_checksum(
-/*===================*/
- const byte* log_block) /*!< in: log block */
-{
- return mach_read_from_4(my_assume_aligned<4>
- (OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_CHECKSUM +
- log_block));
-}
-
-/************************************************************//**
-Sets a log block checksum field value. */
-UNIV_INLINE
-void
-log_block_set_checksum(
-/*===================*/
- byte* log_block, /*!< in/out: log block */
- ulint checksum) /*!< in: checksum */
-{
- mach_write_to_4(my_assume_aligned<4>
- (OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_CHECKSUM +
- log_block), checksum);
-}
-
-/************************************************************//**
-Initializes a log block in the log buffer. */
-UNIV_INLINE
-void
-log_block_init(
-/*===========*/
- byte* log_block, /*!< in: pointer to the log buffer */
- lsn_t lsn) /*!< in: lsn within the log block */
-{
- ulint no;
-
- no = log_block_convert_lsn_to_no(lsn);
-
- log_block_set_hdr_no(log_block, no);
-
- log_block_set_data_len(log_block, LOG_BLOCK_HDR_SIZE);
- log_block_set_first_rec_group(log_block, 0);
-}
-
-/** Append a string to the log.
-@param[in] str string
-@param[in] len string length
-@param[out] start_lsn start LSN of the log record
-@return end lsn of the log record, zero if did not succeed */
-UNIV_INLINE
-lsn_t
-log_reserve_and_write_fast(
- const void* str,
- ulint len,
- lsn_t* start_lsn)
-{
- mysql_mutex_assert_owner(&log_sys.mutex);
- ut_ad(len > 0);
-
- const ulint data_len = len
- + log_sys.buf_free % OS_FILE_LOG_BLOCK_SIZE;
-
- if (data_len >= log_sys.trailer_offset()) {
-
- /* The string does not fit within the current log block
- or the log block would become full */
-
- return(0);
- }
-
- lsn_t lsn = log_sys.get_lsn();
- *start_lsn = lsn;
-
- memcpy(log_sys.buf + log_sys.buf_free, str, len);
-
- log_block_set_data_len(
- reinterpret_cast<byte*>(ut_align_down(
- log_sys.buf + log_sys.buf_free,
- OS_FILE_LOG_BLOCK_SIZE)),
- data_len);
-
- log_sys.buf_free += len;
-
- ut_ad(log_sys.buf_free <= size_t{srv_log_buffer_size});
-
- lsn += len;
- log_sys.set_lsn(lsn);
-
- return lsn;
-}
-
-/***********************************************************************//**
-Checks if there is need for a log buffer flush or a new checkpoint, and does
-this if yes. Any database operation should call this when it has modified
-more than about 4 pages. NOTE that this function may only be called when the
-OS thread owns no synchronization objects except dict_sys.latch. */
-UNIV_INLINE
-void
-log_free_check(void)
-/*================*/
-{
- /* During row_log_table_apply(), this function will be called while we
- are holding some latches. This is OK, as long as we are not holding
- any latches on buffer blocks. */
-
- if (log_sys.check_flush_or_checkpoint()) {
-
- log_check_margins();
- }
-}
diff --git a/storage/innobase/include/log0recv.h b/storage/innobase/include/log0recv.h
index 6c47c135526..c661c52905b 100644
--- a/storage/innobase/include/log0recv.h
+++ b/storage/innobase/include/log0recv.h
@@ -37,13 +37,6 @@ Created 9/20/1997 Heikki Tuuri
/** @return whether recovery is currently running. */
#define recv_recovery_is_on() UNIV_UNLIKELY(recv_sys.recovery_on)
-/** Find the latest checkpoint in the log header.
-@param[out] max_field LOG_CHECKPOINT_1 or LOG_CHECKPOINT_2
-@return error code or DB_SUCCESS */
-dberr_t
-recv_find_max_checkpoint(ulint* max_field)
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-
ATTRIBUTE_COLD MY_ATTRIBUTE((nonnull, warn_unused_result))
/** Apply any buffered redo log to a page that was just read from a data file.
@param[in,out] space tablespace
@@ -52,12 +45,9 @@ ATTRIBUTE_COLD MY_ATTRIBUTE((nonnull, warn_unused_result))
bool recv_recover_page(fil_space_t* space, buf_page_t* bpage);
/** Start recovering from a redo log checkpoint.
-@param[in] flush_lsn FIL_PAGE_FILE_FLUSH_LSN
of first system tablespace page
@return error code or DB_SUCCESS */
-dberr_t
-recv_recovery_from_checkpoint_start(
- lsn_t flush_lsn);
+dberr_t recv_recovery_from_checkpoint_start();
/** Whether to store redo log records in recv_sys.pages */
enum store_t {
@@ -70,17 +60,6 @@ enum store_t {
};
-/** Adds data from a new log block to the parsing buffer of recv_sys if
-recv_sys.parse_start_lsn is non-zero.
-@param[in] log_block log block to add
-@param[in] scanned_lsn lsn of how far we were able to find
- data in this log block
-@return true if more data added */
-bool recv_sys_add_to_parsing_buf(const byte* log_block, lsn_t scanned_lsn);
-
-/** Moves the parsing buffer data left to the buffer start */
-void recv_sys_justify_left_parsing_buf();
-
/** Report an operation to create, delete, or rename a file during backup.
@param[in] space_id tablespace identifier
@param[in] type file operation redo log type
@@ -228,35 +207,24 @@ private:
during log scan or apply */
bool found_corrupt_fs;
public:
+ /** @return maximum guaranteed size of a mini-transaction on recovery */
+ static constexpr size_t MTR_SIZE_MAX{1U << 20};
+
/** whether we are applying redo log records during crash recovery */
bool recovery_on;
/** whether recv_recover_page(), invoked from buf_page_t::read_complete(),
should apply log records*/
bool apply_log_recs;
- byte* buf; /*!< buffer for parsing log records */
- ulint len; /*!< amount of data in buf */
- lsn_t parse_start_lsn;
- /*!< this is the lsn from which we were able to
- start parsing log records and adding them to
- pages; zero if a suitable
- start point not found yet */
- lsn_t scanned_lsn;
- /*!< the log data has been scanned up to this
- lsn */
- ulint scanned_checkpoint_no;
- /*!< the log data has been scanned up to this
- checkpoint number (lowest 4 bytes) */
- ulint recovered_offset;
- /*!< start offset of non-parsed log records in
- buf */
- lsn_t recovered_lsn;
- /*!< the log records have been parsed up to
- this lsn */
- lsn_t mlog_checkpoint_lsn;
- /*!< the LSN of a FILE_CHECKPOINT
- record, or 0 if none was parsed */
- /** the time when progress was last reported */
- time_t progress_time;
+ /** number of bytes in log_sys.buf */
+ size_t len;
+ /** start offset of non-parsed log records in log_sys.buf */
+ size_t offset;
+ /** log sequence number of the first non-parsed record */
+ lsn_t lsn;
+ /** log sequence number at the end of the FILE_CHECKPOINT record, or 0 */
+ lsn_t file_checkpoint;
+ /** the time when progress was last reported */
+ time_t progress_time;
using map = std::map<const page_id_t, page_recv_t,
std::less<const page_id_t>,
@@ -284,10 +252,10 @@ public:
/** The contents of the doublewrite buffer */
recv_dblwr_t dblwr;
- /** Last added LSN to pages. */
+ /** Last added LSN to pages, before switching to STORE_NO */
lsn_t last_stored_lsn= 0;
- void read(os_offset_t offset, span<byte> buf);
+ inline void read(os_offset_t offset, span<byte> buf);
inline size_t files_size();
void close_files() { files.clear(); files.shrink_to_fit(); }
@@ -313,17 +281,13 @@ private:
from before MariaDB Server 10.5.1) */
std::vector<log_file_t> files;
- void open_log_files_if_needed();
-
/** Base node of the redo block list.
List elements are linked via buf_block_t::unzip_LRU. */
UT_LIST_BASE_NODE_T(buf_block_t) blocks;
public:
/** Check whether the number of read redo log blocks exceeds the maximum.
- Store last_stored_lsn if the recovery is not in the last phase.
- @param[in,out] store whether to store page operations
@return whether the memory is exhausted */
- inline bool is_memory_exhausted(store_t *store);
+ inline bool is_memory_exhausted();
/** Apply buffered log to persistent data pages.
@param last_batch whether it is possible to write more redo log */
void apply(bool last_batch);
@@ -343,22 +307,42 @@ public:
bool is_initialised() const { return last_stored_lsn != 0; }
+ /** Find the latest checkpoint.
+ @return error code or DB_SUCCESS */
+ dberr_t find_checkpoint();
+
/** Register a redo log snippet for a page.
@param it page iterator
@param start_lsn start LSN of the mini-transaction
@param lsn @see mtr_t::commit_lsn()
- @param l redo log snippet @see log_t::FORMAT_10_5
+ @param l redo log snippet
@param len length of l, in bytes */
inline void add(map::iterator it, lsn_t start_lsn, lsn_t lsn,
const byte *l, size_t len);
- /** Parse and register one mini-transaction in log_t::FORMAT_10_5.
- @param checkpoint_lsn the log sequence number of the latest checkpoint
- @param store whether to store the records
- @param apply whether to apply file-level log records
- @return whether FILE_CHECKPOINT record was seen the first time,
- or corruption was noticed */
- bool parse(lsn_t checkpoint_lsn, store_t *store, bool apply);
+ enum parse_mtr_result { OK, PREMATURE_EOF, GOT_EOF };
+
+private:
+ /** Parse and register one log_t::FORMAT_10_8 mini-transaction.
+ @param store whether to store the records
+ @param l log data source */
+ template<typename source>
+ inline parse_mtr_result parse(store_t store, source& l) noexcept;
+public:
+ /** Parse and register one log_t::FORMAT_10_8 mini-transaction,
+ handling log_sys.is_pmem() buffer wrap-around.
+ @param store whether to store the records */
+ static parse_mtr_result parse_mtr(store_t store) noexcept;
+
+ /** Parse and register one log_t::FORMAT_10_8 mini-transaction,
+ handling log_sys.is_pmem() buffer wrap-around.
+ @param store whether to store the records */
+ static parse_mtr_result parse_pmem(store_t store) noexcept
+#ifdef HAVE_PMEM
+ ;
+#else
+ { return parse_mtr(store); }
+#endif
/** Clear a fully processed set of stored redo log records. */
inline void clear();
@@ -441,20 +425,12 @@ extern bool recv_no_ibuf_operations;
/** TRUE when recv_init_crash_recovery() has been called. */
extern bool recv_needed_recovery;
#ifdef UNIV_DEBUG
-/** TRUE if writing to the redo log (mtr_commit) is forbidden.
-Protected by log_sys.mutex. */
-extern bool recv_no_log_write;
+/** whether writing to the redo log is forbidden;
+protected by exclusive log_sys.latch. */
+extern bool recv_no_log_write;
#endif /* UNIV_DEBUG */
/** TRUE if buf_page_is_corrupted() should check if the log sequence
number (FIL_PAGE_LSN) is in the future. Initially FALSE, and set by
recv_recovery_from_checkpoint_start(). */
extern bool recv_lsn_checks_on;
-
-/** Size of the parsing buffer; it must accommodate RECV_SCAN_SIZE many
-times! */
-#define RECV_PARSING_BUF_SIZE (2U << 20)
-
-/** Size of block reads when the log groups are scanned forward to do a
-roll-forward */
-#define RECV_SCAN_SIZE (4U << srv_page_size_shift)
diff --git a/storage/innobase/include/mtr0log.h b/storage/innobase/include/mtr0log.h
index 093b706c1de..ca194f905b5 100644
--- a/storage/innobase/include/mtr0log.h
+++ b/storage/innobase/include/mtr0log.h
@@ -53,7 +53,8 @@ inline uint8_t mlog_decode_varint_length(byte first)
@param log redo log record buffer
@return the decoded integer
@retval MLOG_DECODE_ERROR on error */
-inline uint32_t mlog_decode_varint(const byte* log)
+template<typename byte_pointer>
+inline uint32_t mlog_decode_varint(const byte_pointer log)
{
uint32_t i= *log;
if (i < MIN_2BYTE)
diff --git a/storage/innobase/include/mtr0mtr.h b/storage/innobase/include/mtr0mtr.h
index 84f62334c5c..f068467f70c 100644
--- a/storage/innobase/include/mtr0mtr.h
+++ b/storage/innobase/include/mtr0mtr.h
@@ -105,10 +105,11 @@ struct mtr_t {
/** Commit a mini-transaction that did not modify any pages,
but generated some redo log on a higher level, such as
FILE_MODIFY records and an optional FILE_CHECKPOINT marker.
- The caller must hold log_sys.mutex.
+ The caller must hold exclusive log_sys.latch.
This is to be used at log_checkpoint().
- @param checkpoint_lsn the log sequence number of a checkpoint, or 0 */
- void commit_files(lsn_t checkpoint_lsn= 0);
+ @param checkpoint_lsn the log sequence number of a checkpoint, or 0
+ @return current LSN */
+ lsn_t commit_files(lsn_t checkpoint_lsn= 0);
/** @return mini-transaction savepoint (current size of m_memo) */
ulint get_savepoint() const { ut_ad(is_active()); return m_memo.size(); }
@@ -319,12 +320,9 @@ public:
/** @return true if we are inside the change buffer code */
bool is_inside_ibuf() const { return m_inside_ibuf; }
- /** Note that pages has been trimed */
+ /** Note that some pages have been freed */
void set_trim_pages() { m_trim_pages= true; }
- /** @return true if pages has been trimed */
- bool is_trim_pages() { return m_trim_pages; }
-
/** Latch a buffer pool block.
@param block block to be latched
@param rw_latch RW_S_LATCH, RW_SX_LATCH, RW_X_LATCH, RW_NO_LATCH */
@@ -592,6 +590,9 @@ public:
@return number of buffer count added by this mtr */
uint32_t get_fix_count(const buf_block_t *block) const;
+ /** Note that log_sys.latch is no longer being held exclusively. */
+ void flag_wr_unlock() noexcept { ut_ad(m_latch_ex); m_latch_ex= false; }
+
/** type of page flushing is needed during commit() */
enum page_flush_ahead
{
@@ -610,6 +611,11 @@ public:
#endif
private:
+ /** Handle any pages that were freed during the mini-transaction. */
+ void process_freed_pages();
+ /** Release modified pages when no log was written. */
+ void release_unlogged();
+
/** Log a write of a byte string to a page.
@param block buffer page
@param offset byte offset within page
@@ -635,6 +641,13 @@ private:
@param type extended record subtype; @see mrec_ext_t */
inline void log_write_extended(const buf_block_t &block, byte type);
+ /** Write a FILE_MODIFY record when a non-predefined persistent
+ tablespace was modified for the first time since fil_names_clear(). */
+ ATTRIBUTE_NOINLINE ATTRIBUTE_COLD void name_write();
+
+ /** Encrypt the log */
+ ATTRIBUTE_NOINLINE void encrypt();
+
/** Append the redo log records to the redo log buffer.
@return {start_lsn,flush_ahead} */
std::pair<lsn_t,page_flush_ahead> do_write();
@@ -642,7 +655,7 @@ private:
/** Append the redo log records to the redo log buffer.
@param len number of bytes to write
@return {start_lsn,flush_ahead} */
- inline std::pair<lsn_t,page_flush_ahead> finish_write(ulint len);
+ std::pair<lsn_t,page_flush_ahead> finish_write(size_t len);
/** Release the resources */
inline void release_resources();
@@ -666,7 +679,7 @@ private:
/** whether freeing_tree() has been called */
bool m_freeing_tree= false;
#endif
-
+private:
/** The page of the most recent m_log record written, or NULL */
const buf_page_t* m_last;
/** The current byte offset in m_last, or 0 */
@@ -681,6 +694,9 @@ private:
/** whether at least one previously clean buffer pool page was written to */
uint16_t m_made_dirty:1;
+ /** whether log_sys.latch is locked exclusively */
+ uint16_t m_latch_ex:1;
+
/** whether change buffer is latched; only needed in non-debug builds
to suppress some read-ahead operations, @see ibuf_inside() */
uint16_t m_inside_ibuf:1;
@@ -688,6 +704,9 @@ private:
/** whether the pages has been trimmed */
uint16_t m_trim_pages:1;
+ /** CRC-32C of m_log */
+ uint32_t m_crc;
+
#ifdef UNIV_DEBUG
/** Persistent user tablespace associated with the
mini-transaction, or 0 (TRX_SYS_SPACE) if none yet */
diff --git a/storage/innobase/include/mtr0mtr.inl b/storage/innobase/include/mtr0mtr.inl
index 71b476a2f5d..0b45bc1d695 100644
--- a/storage/innobase/include/mtr0mtr.inl
+++ b/storage/innobase/include/mtr0mtr.inl
@@ -49,9 +49,8 @@ mtr_t::memo_push(void* object, mtr_memo_type_t type)
ut_ad(type == MTR_MEMO_PAGE_X_MODIFY || ut_is_2pow(type));
/* If this mtr has x-fixed a clean page then we set
- the made_dirty flag. This tells us if we need to
- grab log_sys.flush_order_mutex at mtr_t::commit() so that we
- can insert the dirtied page into the flush list. */
+ the made_dirty flag. This tells mtr_t::commit()
+ to hold log_sys.latch longer. */
if (!m_made_dirty
&& (type == MTR_MEMO_PAGE_X_FIX || type == MTR_MEMO_PAGE_SX_FIX)) {
@@ -107,9 +106,8 @@ mtr_t::sx_latch_at_savepoint(
mtr_memo_slot_t* slot = m_memo.at<mtr_memo_slot_t*>(savepoint);
ut_ad(slot->object == block);
-
- /* == RW_NO_LATCH */
- ut_a(slot->type == MTR_MEMO_BUF_FIX);
+ ut_ad(slot->type == MTR_MEMO_BUF_FIX); /* == RW_NO_LATCH */
+ slot->type = MTR_MEMO_PAGE_SX_FIX;
block->page.lock.u_lock();
ut_ad(!block->page.is_io_fixed());
@@ -117,8 +115,6 @@ mtr_t::sx_latch_at_savepoint(
if (!m_made_dirty) {
m_made_dirty = is_block_dirtied(block);
}
-
- slot->type = MTR_MEMO_PAGE_SX_FIX;
}
/**
@@ -141,9 +137,8 @@ mtr_t::x_latch_at_savepoint(
mtr_memo_slot_t* slot = m_memo.at<mtr_memo_slot_t*>(savepoint);
ut_ad(slot->object == block);
-
- /* == RW_NO_LATCH */
- ut_a(slot->type == MTR_MEMO_BUF_FIX);
+ ut_ad(slot->type == MTR_MEMO_BUF_FIX); /* == RW_NO_LATCH */
+ slot->type = MTR_MEMO_PAGE_X_FIX;
block->page.lock.x_lock();
ut_ad(!block->page.is_io_fixed());
@@ -151,8 +146,6 @@ mtr_t::x_latch_at_savepoint(
if (!m_made_dirty) {
m_made_dirty = is_block_dirtied(block);
}
-
- slot->type = MTR_MEMO_PAGE_X_FIX;
}
/**
diff --git a/storage/innobase/include/mtr0types.h b/storage/innobase/include/mtr0types.h
index 7acc255da36..1de31126a88 100644
--- a/storage/innobase/include/mtr0types.h
+++ b/storage/innobase/include/mtr0types.h
@@ -52,17 +52,17 @@ enum mtr_log_t {
/*
A mini-transaction is a stream of records that is always terminated by
-a NUL byte. The first byte of a mini-transaction record is never NUL,
-but NUL bytes can occur within mini-transaction records. The first
-bytes of each record will explicitly encode the length of the record.
-NUL bytes also acts as padding in log blocks, that is, there can be
-multiple sucessive NUL bytes between mini-transactions in a redo log
-block.
+a byte 0x00 or 0x01. The first byte of a mini-transaction record is
+never one of these bytes, but these bytes can occur within mini-transaction
+records.
The first byte of the record would contain a record type, flags, and a
part of length. The optional second byte of the record will contain
more length. (Not needed for short records.)
+For example, because the length of an INIT_PAGE record is 3 to 11 bytes,
+the first byte will be 0x02 to 0x0a, indicating the number of subsequent bytes.
+
Bit 7 of the first byte of a redo log record is the same_page flag.
If same_page=1, the record is referring to the same page as the
previous record. Records that do not refer to data pages but to file
@@ -187,8 +187,11 @@ A subsequent WRITE to the same page could be logged 0xb5 0x7f 0x23
0x34 0x56 0x78, meaning "same page, type code 3 (WRITE), 5 bytes to
follow", "byte offset 0x7f"+0x60+2, bytes 0x23,0x34,0x56,0x78.
-The end of the mini-transaction would be indicated by a NUL byte.
-*/
+The end of the mini-transaction would be indicated by the end byte
+0x00 or 0x01; @see log_sys.get_sequence_bit().
+If log_sys.is_encrypted(), that is followed by 8 bytes of nonce
+(part of initialization vector). That will be followed by 4 bytes
+of CRC-32C of the entire mini-tranasction, excluding the end byte. */
/** Redo log record types. These bit patterns (3 bits) will be written
to the redo log file, so the existing codes or their interpretation on
@@ -305,14 +308,16 @@ enum mfile_type_t
FILE_RENAME = 0xa0,
/** Modify a file. Followed by tablespace ID and the file name. */
FILE_MODIFY = 0xb0,
- /** End-of-checkpoint marker. Followed by 2 dummy bytes of page identifier,
- 8 bytes of LSN, and padded with a NUL; @see SIZE_OF_FILE_CHECKPOINT. */
+ /** End-of-checkpoint marker, at the end of a mini-transaction.
+ Followed by 2 NUL bytes of page identifier and 8 bytes of LSN;
+ @see SIZE_OF_FILE_CHECKPOINT.
+ When all bytes are NUL, this is a dummy padding record. */
FILE_CHECKPOINT = 0xf0
};
/** Size of a FILE_CHECKPOINT record, including the trailing byte to
-terminate the mini-transaction. */
-constexpr byte SIZE_OF_FILE_CHECKPOINT= 3/*type,page_id*/ + 8/*LSN*/ + 1;
+terminate the mini-transaction and the CRC-32C. */
+constexpr byte SIZE_OF_FILE_CHECKPOINT= 3/*type,page_id*/ + 8/*LSN*/ + 1 + 4;
#ifndef UNIV_INNOCHECKSUM
/** Types for the mlock objects to store in the mtr_t::m_memo */
diff --git a/storage/innobase/include/os0file.h b/storage/innobase/include/os0file.h
index fe977c10633..727e9e49ead 100644
--- a/storage/innobase/include/os0file.h
+++ b/storage/innobase/include/os0file.h
@@ -107,15 +107,6 @@ struct pfs_os_file_t
#endif
};
-/** The next value should be smaller or equal to the smallest sector size used
-on any disk. A log block is required to be a portion of disk which is written
-so that if the start and the end of a block get written to disk, then the
-whole block gets written. This should be true even in most cases of a crash:
-if this fails for a log block, then it is equivalent to a media failure in the
-log. */
-
-#define OS_FILE_LOG_BLOCK_SIZE 512U
-
/** Options for os_file_create_func @{ */
enum os_file_create_t {
OS_FILE_OPEN = 51, /*!< to open an existing file (if
@@ -456,7 +447,6 @@ bool os_file_close_func(os_file_t file);
/* Keys to register InnoDB I/O with performance schema */
extern mysql_pfs_key_t innodb_data_file_key;
-extern mysql_pfs_key_t innodb_log_file_key;
extern mysql_pfs_key_t innodb_temp_file_key;
/* Following four macros are instumentations to register
diff --git a/storage/innobase/include/page0cur.inl b/storage/innobase/include/page0cur.inl
index 6f7c633561f..48ac428f09c 100644
--- a/storage/innobase/include/page0cur.inl
+++ b/storage/innobase/include/page0cur.inl
@@ -272,6 +272,7 @@ page_cur_tuple_insert(
index, rec, *offsets, mtr);
}
- ut_ad(!rec || !cmp_dtuple_rec(tuple, rec, *offsets));
+ ut_ad(!rec || !cmp_dtuple_rec(tuple, rec, index, *offsets));
return(rec);
}
+
diff --git a/storage/innobase/include/page0page.h b/storage/innobase/include/page0page.h
index 4787ce36c7a..ae06d06ad5e 100644
--- a/storage/innobase/include/page0page.h
+++ b/storage/innobase/include/page0page.h
@@ -1130,7 +1130,7 @@ page_find_rec_with_heap_no(
@return the last record, not delete-marked
@retval infimum record if all records are delete-marked */
const rec_t*
-page_find_rec_max_not_deleted(
+page_find_rec_last_not_deleted(
const page_t* page);
#endif /* !UNIV_INNOCHECKSUM */
diff --git a/storage/innobase/include/rem0cmp.h b/storage/innobase/include/rem0cmp.h
index 6f2201971d1..3a30f5a92f3 100644
--- a/storage/innobase/include/rem0cmp.h
+++ b/storage/innobase/include/rem0cmp.h
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, 2020, MariaDB Corporation.
+Copyright (c) 2017, 2021, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -24,8 +24,7 @@ Comparison services for records
Created 7/1/1994 Heikki Tuuri
************************************************************************/
-#ifndef rem0cmp_h
-#define rem0cmp_h
+#pragma once
#include "data0data.h"
#include "data0type.h"
@@ -43,39 +42,40 @@ cmp_cols_are_equal(
ibool check_charsets);
/*!< in: whether to check charsets */
/** Compare two data fields.
-@param[in] mtype main type
-@param[in] prtype precise type
-@param[in] data1 data field
-@param[in] len1 length of data1 in bytes, or UNIV_SQL_NULL
-@param[in] data2 data field
-@param[in] len2 length of data2 in bytes, or UNIV_SQL_NULL
+@param mtype main type
+@param prtype precise type
+@param descending whether to use descending order
+@param data1 data field
+@param len1 length of data1 in bytes, or UNIV_SQL_NULL
+@param data2 data field
+@param len2 length of data2 in bytes, or UNIV_SQL_NULL
@return the comparison result of data1 and data2
@retval 0 if data1 is equal to data2
@retval negative if data1 is less than data2
@retval positive if data1 is greater than data2 */
-int
-cmp_data_data(
- ulint mtype,
- ulint prtype,
- const byte* data1,
- ulint len1,
- const byte* data2,
- ulint len2)
- MY_ATTRIBUTE((warn_unused_result));
+int cmp_data(ulint mtype, ulint prtype, bool descending,
+ const byte *data1, size_t len1, const byte *data2, size_t len2)
+ MY_ATTRIBUTE((warn_unused_result));
/** Compare two data fields.
-@param[in] dfield1 data field; must have type field set
-@param[in] dfield2 data field
+@param dfield1 data field; must have type field set
+@param dfield2 data field
+@param descending whether to use descending order
@return the comparison result of dfield1 and dfield2
@retval 0 if dfield1 is equal to dfield2
@retval negative if dfield1 is less than dfield2
@retval positive if dfield1 is greater than dfield2 */
-UNIV_INLINE
-int
-cmp_dfield_dfield(
-/*==============*/
- const dfield_t* dfield1,/*!< in: data field; must have type field set */
- const dfield_t* dfield2);/*!< in: data field */
+inline int cmp_dfield_dfield(const dfield_t *dfield1, const dfield_t *dfield2,
+ bool descending= false)
+{
+ ut_ad(dfield_check_typed(dfield1));
+ const dtype_t *type= dfield_get_type(dfield1);
+ return cmp_data(type->mtype, type->prtype, descending,
+ static_cast<const byte*>(dfield_get_data(dfield1)),
+ dfield_get_len(dfield1),
+ static_cast<const byte*>(dfield_get_data(dfield2)),
+ dfield_get_len(dfield2));
+}
#ifdef UNIV_DEBUG
/** Compare a GIS data tuple to a physical record.
@@ -103,15 +103,15 @@ inline int cmp_geometry_field(const void *a, const void *b)
double x2= mach_double_read(mbr2);
if (x1 > x2)
return 1;
- if (x2 > x1)
+ if (x1 < x2)
return -1;
- double y1= mach_double_read(mbr1 + sizeof(double) * SPDIMS);
- double y2= mach_double_read(mbr2 + sizeof(double) * SPDIMS);
+ x1= mach_double_read(mbr1 + sizeof(double) * SPDIMS);
+ x2= mach_double_read(mbr2 + sizeof(double) * SPDIMS);
- if (y1 > y2)
+ if (x1 > x2)
return 1;
- if (y2 > y1)
+ if (x1 < x2)
return -1;
/* left lower corner (xmin, ymin) overlaps, now right upper corner */
@@ -120,41 +120,39 @@ inline int cmp_geometry_field(const void *a, const void *b)
if (x1 > x2)
return 1;
- if (x2 > x1)
+ if (x1 < x2)
return -1;
- y1= mach_double_read(mbr1 + sizeof(double) * 2 + sizeof(double));
- y2= mach_double_read(mbr2 + sizeof(double) * 2 + sizeof(double));
+ x1= mach_double_read(mbr1 + sizeof(double) * 2 + sizeof(double));
+ x2= mach_double_read(mbr2 + sizeof(double) * 2 + sizeof(double));
- if (y1 > y2)
+ if (x1 > x2)
return 1;
- if (y2 > y1)
+ if (x1 < x2)
return -1;
return 0;
}
/** Compare a data tuple to a physical record.
-@param[in] dtuple data tuple
-@param[in] rec B-tree record
-@param[in] offsets rec_get_offsets(rec)
-@param[in] n_cmp number of fields to compare
-@param[in,out] matched_fields number of completely matched fields
+@param dtuple data tuple
+@param rec B-tree index record
+@param index B-tree index
+@param offsets rec_get_offsets(rec,index)
+@param n_cmp number of fields to compare
+@param matched_fields number of completely matched fields
@return the comparison result of dtuple and rec
@retval 0 if dtuple is equal to rec
@retval negative if dtuple is less than rec
@retval positive if dtuple is greater than rec */
-int
-cmp_dtuple_rec_with_match_low(
- const dtuple_t* dtuple,
- const rec_t* rec,
- const rec_offs* offsets,
- ulint n_cmp,
- ulint* matched_fields)
- MY_ATTRIBUTE((nonnull));
-#define cmp_dtuple_rec_with_match(tuple,rec,offsets,fields) \
+int cmp_dtuple_rec_with_match_low(const dtuple_t *dtuple, const rec_t *rec,
+ const dict_index_t *index,
+ const rec_offs *offsets,
+ ulint n_cmp, ulint *matched_fields)
+ MY_ATTRIBUTE((nonnull));
+#define cmp_dtuple_rec_with_match(tuple,rec,index,offsets,fields) \
cmp_dtuple_rec_with_match_low( \
- tuple,rec,offsets,dtuple_get_n_fields_cmp(tuple),fields)
+ tuple,rec,index,offsets,dtuple_get_n_fields_cmp(tuple),fields)
/** Compare a data tuple to a physical record.
@param[in] dtuple data tuple
@param[in] rec B-tree or R-tree index record
@@ -178,28 +176,32 @@ cmp_dtuple_rec_with_match_bytes(
MY_ATTRIBUTE((warn_unused_result));
/** Compare a data tuple to a physical record.
@see cmp_dtuple_rec_with_match
-@param[in] dtuple data tuple
-@param[in] rec B-tree record
-@param[in] offsets rec_get_offsets(rec)
+@param dtuple data tuple
+@param rec index record
+@param index index
+@param offsets rec_get_offsets(rec, index)
@return the comparison result of dtuple and rec
@retval 0 if dtuple is equal to rec
@retval negative if dtuple is less than rec
@retval positive if dtuple is greater than rec */
-int
-cmp_dtuple_rec(
- const dtuple_t* dtuple,
- const rec_t* rec,
- const rec_offs* offsets);
-/**************************************************************//**
-Checks if a dtuple is a prefix of a record. The last field in dtuple
-is allowed to be a prefix of the corresponding field in the record.
-@return TRUE if prefix */
-ibool
-cmp_dtuple_is_prefix_of_rec(
-/*========================*/
- const dtuple_t* dtuple, /*!< in: data tuple */
- const rec_t* rec, /*!< in: physical record */
- const rec_offs* offsets);/*!< in: array returned by rec_get_offsets() */
+inline int cmp_dtuple_rec(const dtuple_t *dtuple, const rec_t *rec,
+ const dict_index_t *index, const rec_offs *offsets)
+{
+ ulint matched= 0;
+ return cmp_dtuple_rec_with_match(dtuple, rec, index, offsets, &matched);
+}
+
+/** Check if a dtuple is a prefix of a record.
+@param dtuple data tuple
+@param rec index record
+@param index index
+@param offsets rec_get_offsets(rec)
+@return whether dtuple is a prefix of rec */
+bool cmp_dtuple_is_prefix_of_rec(const dtuple_t *dtuple, const rec_t *rec,
+ const dict_index_t *index,
+ const rec_offs *offsets)
+ MY_ATTRIBUTE((nonnull, warn_unused_result));
+
/** Compare two physical records that contain the same number of columns,
none of which are stored externally.
@retval positive if rec1 (including non-ordering columns) is greater than rec2
@@ -246,18 +248,39 @@ cmp_rec_rec(
MY_ATTRIBUTE((nonnull(1,2,3,4,5)));
/** Compare two data fields.
-@param[in] dfield1 data field
-@param[in] dfield2 data field
+@param dfield1 data field
+@param dfield2 data field
@return the comparison result of dfield1 and dfield2
-@retval 0 if dfield1 is equal to dfield2, or a prefix of dfield1
-@retval negative if dfield1 is less than dfield2
-@retval positive if dfield1 is greater than dfield2 */
-UNIV_INLINE
-int
-cmp_dfield_dfield_like_prefix(
- const dfield_t* dfield1,
- const dfield_t* dfield2);
+@retval true if dfield1 is equal to dfield2, or a prefix of dfield1
+@retval false otherwise */
+inline bool cmp_dfield_dfield_eq_prefix(const dfield_t *dfield1,
+ const dfield_t *dfield2)
+{
+ ut_ad(dfield_check_typed(dfield1));
+ ut_ad(dfield_check_typed(dfield2));
+ const dtype_t *type= dfield_get_type(dfield1);
-#include "rem0cmp.inl"
+#ifdef UNIV_DEBUG
+ switch (type->prtype & DATA_MYSQL_TYPE_MASK) {
+ case MYSQL_TYPE_BIT:
+ case MYSQL_TYPE_STRING:
+ case MYSQL_TYPE_VAR_STRING:
+ case MYSQL_TYPE_TINY_BLOB:
+ case MYSQL_TYPE_MEDIUM_BLOB:
+ case MYSQL_TYPE_BLOB:
+ case MYSQL_TYPE_LONG_BLOB:
+ case MYSQL_TYPE_VARCHAR:
+ break;
+ default:
+ ut_error;
+ }
+#endif /* UNIV_DEBUG */
-#endif
+ uint cs_num= dtype_get_charset_coll(type->prtype);
+ CHARSET_INFO *cs= get_charset(cs_num, MYF(MY_WME));
+ ut_a(cs);
+ return !cs->strnncoll(static_cast<const uchar*>(dfield_get_data(dfield1)),
+ dfield_get_len(dfield1),
+ static_cast<const uchar*>(dfield_get_data(dfield2)),
+ dfield_get_len(dfield2), 1);
+}
diff --git a/storage/innobase/include/rem0cmp.inl b/storage/innobase/include/rem0cmp.inl
deleted file mode 100644
index 6e21382d187..00000000000
--- a/storage/innobase/include/rem0cmp.inl
+++ /dev/null
@@ -1,107 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2014, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2020, MariaDB Corporation.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/*******************************************************************//**
-@file include/rem0cmp.ic
-Comparison services for records
-
-Created 7/1/1994 Heikki Tuuri
-************************************************************************/
-
-#include <mysql_com.h>
-#include <my_sys.h>
-
-/** Compare two data fields.
-@param[in] dfield1 data field; must have type field set
-@param[in] dfield2 data field
-@return the comparison result of dfield1 and dfield2
-@retval 0 if dfield1 is equal to dfield2
-@retval negative if dfield1 is less than dfield2
-@retval positive if dfield1 is greater than dfield2 */
-UNIV_INLINE
-int
-cmp_dfield_dfield(
- const dfield_t* dfield1,
- const dfield_t* dfield2)
-{
- const dtype_t* type;
-
- ut_ad(dfield_check_typed(dfield1));
-
- type = dfield_get_type(dfield1);
-
- return(cmp_data_data(type->mtype, type->prtype,
- (const byte*) dfield_get_data(dfield1),
- dfield_get_len(dfield1),
- (const byte*) dfield_get_data(dfield2),
- dfield_get_len(dfield2)));
-}
-
-/** Compare two data fields.
-@param[in] dfield1 data field
-@param[in] dfield2 data field
-@return the comparison result of dfield1 and dfield2
-@retval 0 if dfield1 is equal to dfield2, or a prefix of dfield1
-@retval negative if dfield1 is less than dfield2
-@retval positive if dfield1 is greater than dfield2 */
-UNIV_INLINE
-int
-cmp_dfield_dfield_like_prefix(
- const dfield_t* dfield1,
- const dfield_t* dfield2)
-{
- const dtype_t* type;
-
- ut_ad(dfield_check_typed(dfield1));
- ut_ad(dfield_check_typed(dfield2));
-
- type = dfield_get_type(dfield1);
-
-#ifdef UNIV_DEBUG
- switch (type->prtype & DATA_MYSQL_TYPE_MASK) {
- case MYSQL_TYPE_BIT:
- case MYSQL_TYPE_STRING:
- case MYSQL_TYPE_VAR_STRING:
- case MYSQL_TYPE_TINY_BLOB:
- case MYSQL_TYPE_MEDIUM_BLOB:
- case MYSQL_TYPE_BLOB:
- case MYSQL_TYPE_LONG_BLOB:
- case MYSQL_TYPE_VARCHAR:
- break;
- default:
- ut_error;
- }
-#endif /* UNIV_DEBUG */
-
- uint cs_num = (uint) dtype_get_charset_coll(type->prtype);
-
- if (CHARSET_INFO* cs = get_charset(cs_num, MYF(MY_WME))) {
- return(cs->strnncoll(
- static_cast<const uchar*>(
- dfield_get_data(dfield1)),
- dfield_get_len(dfield1),
- static_cast<const uchar*>(
- dfield_get_data(dfield2)),
- dfield_get_len(dfield2),
- 1));
- }
-
- ib::fatal() << "Unable to find charset-collation " << cs_num;
- return(0);
-}
diff --git a/storage/innobase/include/row0merge.h b/storage/innobase/include/row0merge.h
index f29fd98ad30..ec435df17d8 100644
--- a/storage/innobase/include/row0merge.h
+++ b/storage/innobase/include/row0merge.h
@@ -109,6 +109,7 @@ struct index_field_t {
ulint prefix_len; /*!< column prefix length, or 0
if indexing the whole column */
bool is_v_col; /*!< whether this is a virtual column */
+ bool descending; /*!< whether to use DESC order */
};
/** Definition of an index being created */
diff --git a/storage/innobase/include/srv0mon.h b/storage/innobase/include/srv0mon.h
index 971f6363bdb..1ca2d7a429f 100644
--- a/storage/innobase/include/srv0mon.h
+++ b/storage/innobase/include/srv0mon.h
@@ -269,9 +269,6 @@ enum monitor_id_t {
MONITOR_OS_PENDING_READS,
MONITOR_OS_PENDING_WRITES,
MONITOR_OVLD_OS_LOG_WRITTEN,
- MONITOR_OVLD_OS_LOG_FSYNC,
- MONITOR_OVLD_OS_LOG_PENDING_FSYNC,
- MONITOR_OVLD_OS_LOG_PENDING_WRITES,
/* Transaction related counters */
MONITOR_MODULE_TRX,
@@ -298,20 +295,16 @@ enum monitor_id_t {
/* Recovery related counters */
MONITOR_MODULE_RECOVERY,
- MONITOR_NUM_CHECKPOINT,
+ MONITOR_OVLD_CHECKPOINTS,
MONITOR_OVLD_LSN_FLUSHDISK,
MONITOR_OVLD_LSN_CHECKPOINT,
MONITOR_OVLD_LSN_CURRENT,
MONITOR_LSN_CHECKPOINT_AGE,
MONITOR_OVLD_BUF_OLDEST_LSN,
MONITOR_OVLD_MAX_AGE_ASYNC,
- MONITOR_PENDING_LOG_FLUSH,
- MONITOR_PENDING_CHECKPOINT_WRITE,
- MONITOR_LOG_IO,
MONITOR_OVLD_LOG_WAITS,
MONITOR_OVLD_LOG_WRITE_REQUEST,
MONITOR_OVLD_LOG_WRITES,
- MONITOR_OVLD_LOG_PADDED,
/* Page Manager related counters */
MONITOR_MODULE_PAGE,
diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
index 4cd77b08a60..41f61567b53 100644
--- a/storage/innobase/include/srv0srv.h
+++ b/storage/innobase/include/srv0srv.h
@@ -86,28 +86,6 @@ struct srv_stats_t
/** Count the amount of data written in total (in bytes) */
ulint_ctr_1_t data_written;
- /** Number of the log write requests done */
- ulint_ctr_1_t log_write_requests;
-
- /** Number of physical writes to the log performed */
- ulint_ctr_1_t log_writes;
-
- /** Amount of data padded for log write ahead */
- ulint_ctr_1_t log_padded;
-
- /** Amount of data written to the log files in bytes */
- lsn_ctr_1_t os_log_written;
-
- /** Number of writes being done to the log files */
- ulint_ctr_1_t os_log_pending_writes;
-
- /** We increase this counter, when we don't have enough
- space in the log buffer and have to flush it */
- ulint_ctr_1_t log_waits;
-
- /** Store the number of write requests issued */
- ulint_ctr_1_t buf_pool_write_requests;
-
/** Number of buffer pool reads that led to the reading of
a disk page */
ulint_ctr_1_t buf_pool_reads;
@@ -286,18 +264,15 @@ extern char* srv_log_group_home_dir;
/** The InnoDB redo log file size, or 0 when changing the redo log format
at startup (while disallowing writes to the redo log). */
extern ulonglong srv_log_file_size;
-extern ulong srv_log_buffer_size;
extern ulong srv_flush_log_at_trx_commit;
extern uint srv_flush_log_at_timeout;
-extern ulong srv_log_write_ahead_size;
extern my_bool srv_adaptive_flushing;
extern my_bool srv_flush_sync;
/** Requested size in bytes */
extern ulint srv_buf_pool_size;
-/** Requested buffer pool chunk size. Each buffer pool instance consists
-of one or more chunks. */
-extern ulong srv_buf_pool_chunk_unit;
+/** Requested buffer pool chunk size */
+extern size_t srv_buf_pool_chunk_unit;
/** Scan depth for LRU flush batch i.e.: number of blocks scanned*/
extern ulong srv_LRU_scan_depth;
/** Whether or not to flush neighbors of a block */
@@ -459,9 +434,13 @@ extern my_bool srv_print_all_deadlocks;
extern my_bool srv_cmp_per_index_enabled;
+/** innodb_encrypt_log */
+extern my_bool srv_encrypt_log;
+
/* is encryption enabled */
extern ulong srv_encrypt_tables;
+
/** Status variables to be passed to MySQL */
extern struct export_var_t export_vars;
@@ -688,7 +667,6 @@ struct export_var_t{
ulint innodb_buffer_pool_pages_old;
ulint innodb_buffer_pool_read_requests; /*!< buf_pool.stat.n_page_gets */
ulint innodb_buffer_pool_reads; /*!< srv_buf_pool_reads */
- ulint innodb_buffer_pool_write_requests;/*!< srv_stats.buf_pool_write_requests */
ulint innodb_buffer_pool_read_ahead_rnd;/*!< srv_read_ahead_rnd */
ulint innodb_buffer_pool_read_ahead; /*!< srv_read_ahead */
ulint innodb_buffer_pool_read_ahead_evicted;/*!< srv_read_ahead evicted*/
@@ -696,8 +674,6 @@ struct export_var_t{
ulint innodb_checkpoint_max_age;
ulint innodb_data_pending_reads; /*!< Pending reads */
ulint innodb_data_pending_writes; /*!< Pending writes */
- ulint innodb_data_pending_fsyncs; /*!< Pending fsyncs */
- ulint innodb_data_fsyncs; /*!< Number of fsyncs so far */
ulint innodb_data_read; /*!< Data bytes read */
ulint innodb_data_writes; /*!< I/O write requests */
ulint innodb_data_written; /*!< Data bytes written */
@@ -706,9 +682,6 @@ struct export_var_t{
ulint innodb_dblwr_writes; /*!< srv_dblwr_writes */
ulint innodb_deadlocks;
ulint innodb_history_list_length;
- ulint innodb_log_waits; /*!< srv_log_waits */
- ulint innodb_log_write_requests; /*!< srv_log_write_requests */
- ulint innodb_log_writes; /*!< srv_log_writes */
lsn_t innodb_lsn_current;
lsn_t innodb_lsn_flushed;
lsn_t innodb_lsn_last_checkpoint;
@@ -717,10 +690,8 @@ struct export_var_t{
ulint innodb_mem_adaptive_hash;
#endif
ulint innodb_mem_dictionary;
- lsn_t innodb_os_log_written; /*!< srv_os_log_written */
- ulint innodb_os_log_fsyncs; /*!< n_log_flushes */
- ulint innodb_os_log_pending_writes; /*!< srv_os_log_pending_writes */
- ulint innodb_os_log_pending_fsyncs; /*!< n_pending_log_flushes */
+ /** log_sys.get_lsn() - recv_sys.lsn */
+ lsn_t innodb_os_log_written;
ulint innodb_row_lock_waits; /*!< srv_n_lock_wait_count */
ulint innodb_row_lock_current_waits; /*!< srv_n_lock_wait_current_count */
int64_t innodb_row_lock_time; /*!< srv_n_lock_wait_time
diff --git a/storage/innobase/include/univ.i b/storage/innobase/include/univ.i
index 567554f34c3..6dc1d8b7341 100644
--- a/storage/innobase/include/univ.i
+++ b/storage/innobase/include/univ.i
@@ -510,8 +510,6 @@ extern mysql_pfs_key_t fts_doc_id_mutex_key;
extern mysql_pfs_key_t ibuf_bitmap_mutex_key;
extern mysql_pfs_key_t ibuf_mutex_key;
extern mysql_pfs_key_t ibuf_pessimistic_insert_mutex_key;
-extern mysql_pfs_key_t log_sys_mutex_key;
-extern mysql_pfs_key_t log_flush_order_mutex_key;
extern mysql_pfs_key_t recalc_pool_mutex_key;
extern mysql_pfs_key_t purge_sys_pq_mutex_key;
extern mysql_pfs_key_t recv_sys_mutex_key;
@@ -538,6 +536,7 @@ extern mysql_pfs_key_t index_tree_rw_lock_key;
extern mysql_pfs_key_t index_online_log_key;
extern mysql_pfs_key_t trx_sys_rw_lock_key;
extern mysql_pfs_key_t lock_latch_key;
+extern mysql_pfs_key_t log_latch_key;
extern mysql_pfs_key_t trx_rseg_latch_key;
# endif /* UNIV_PFS_RWLOCK */
#endif /* HAVE_PSI_INTERFACE */
diff --git a/storage/innobase/include/ut0crc32.h b/storage/innobase/include/ut0crc32.h
deleted file mode 100644
index 0cbccb976e2..00000000000
--- a/storage/innobase/include/ut0crc32.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2011, 2015, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2016, 2020, MariaDB Corporation.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/ut0crc32.h
-CRC32 implementation
-
-Created Aug 10, 2011 Vasil Dimov
-*******************************************************/
-
-#ifndef ut0crc32_h
-#define ut0crc32_h
-
-#include "univ.i"
-#include <my_sys.h>
-static inline uint32_t ut_crc32(const byte *s, size_t size)
-{
- return my_crc32c(0, s, size);
-}
-
-#endif /* ut0crc32_h */
diff --git a/storage/innobase/include/ut0ut.h b/storage/innobase/include/ut0ut.h
index 89ff0ca709f..95541ea574e 100644
--- a/storage/innobase/include/ut0ut.h
+++ b/storage/innobase/include/ut0ut.h
@@ -305,6 +305,16 @@ operator<<(
return(lhs);
}
+/** This is a wrapper class, used to print any number in IEC style */
+struct bytes_iec {
+ explicit bytes_iec(unsigned long long t): m_val(t) {}
+ double get_double() const { return static_cast<double>(m_val); }
+ const unsigned long long m_val;
+};
+
+/** Like hex operator above, except for bytes_iec */
+std::ostream &operator<<(std::ostream &lhs, const bytes_iec &rhs);
+
/** The class logger is the base class of all the error log related classes.
It contains a std::ostringstream object. The main purpose of this class is
to forward operator<< to the underlying std::ostringstream object. Do not