summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarko Mäkelä <marko.makela@mariadb.com>2020-10-26 15:59:30 +0200
committerMarko Mäkelä <marko.makela@mariadb.com>2020-10-26 17:09:01 +0200
commit45ed9dd957eebc7fc84feb2509f4aa6baa908a95 (patch)
treeffc0c6988ce5edaf1f6fb60b4964c57e48f9c08a
parent3a9a3be1c64b14c05648e87ebe0f1dd96457de41 (diff)
downloadmariadb-git-45ed9dd957eebc7fc84feb2509f4aa6baa908a95.tar.gz
MDEV-23855: Remove fil_system.LRU and reduce fil_system.mutex contention
Also fixes MDEV-23929: innodb_flush_neighbors is not being ignored for system tablespace on SSD When the maximum configured number of file is exceeded, InnoDB will close data files. We used to maintain a fil_system.LRU list and a counter fil_node_t::n_pending to achieve this, at the huge cost of multiple fil_system.mutex operations per I/O operation. fil_node_open_file_low(): Implement a FIFO replacement policy: The last opened file will be moved to the end of fil_system.space_list, and files will be closed from the start of the list. However, we will not move tablespaces in fil_system.space_list while i_s_tablespaces_encryption_fill_table() is executing (producing output for INFORMATION_SCHEMA.INNODB_TABLESPACES_ENCRYPTION) because it may cause information of some tablespaces to go missing. We also avoid this in mariabackup --backup because datafiles_iter_next() assumes that the ordering is not changed. IORequest: Fold more parameters to IORequest::type. fil_space_t::io(): Replaces fil_io(). fil_space_t::flush(): Replaces fil_flush(). OS_AIO_IBUF: Remove. We will always issue synchronous reads of the change buffer pages in buf_read_page_low(). We will always ignore some errors for background reads. This should reduce fil_system.mutex contention a little. fil_node_t::complete_write(): Replaces fil_node_t::complete_io(). On both read and write completion, fil_space_t::release_for_io() will have to be called. fil_space_t::io(): Do not acquire fil_system.mutex in the normal code path. xb_delta_open_matching_space(): Do not try to open the system tablespace which was already opened. This fixes a file sharing violation in mariabackup --prepare --incremental. Reviewed by: Vladislav Vaintroub
-rw-r--r--extra/mariabackup/fil_cur.cc25
-rw-r--r--extra/mariabackup/xtrabackup.cc63
-rw-r--r--mysql-test/suite/encryption/t/innodb-remove-encryption.test3
-rw-r--r--mysql-test/suite/encryption/t/innodb_encrypt_key_rotation_age.test5
-rw-r--r--mysql-test/suite/innodb/r/table_definition_cache_debug.result2
-rw-r--r--mysql-test/suite/innodb/t/innodb-trim.test12
-rw-r--r--mysql-test/suite/innodb/t/table_definition_cache_debug.test2
-rw-r--r--storage/innobase/CMakeLists.txt2
-rw-r--r--storage/innobase/btr/btr0cur.cc37
-rw-r--r--storage/innobase/buf/buf0buf.cc27
-rw-r--r--storage/innobase/buf/buf0dblwr.cc88
-rw-r--r--storage/innobase/buf/buf0dump.cc45
-rw-r--r--storage/innobase/buf/buf0flu.cc238
-rw-r--r--storage/innobase/buf/buf0rea.cc175
-rw-r--r--storage/innobase/dict/dict0crea.cc2
-rw-r--r--storage/innobase/dict/dict0load.cc8
-rw-r--r--storage/innobase/fil/fil0crypt.cc15
-rw-r--r--storage/innobase/fil/fil0fil.cc1448
-rw-r--r--storage/innobase/fsp/fsp0file.cc5
-rw-r--r--storage/innobase/fsp/fsp0space.cc2
-rw-r--r--storage/innobase/fsp/fsp0sysspace.cc13
-rw-r--r--storage/innobase/handler/i_s.cc2
-rw-r--r--storage/innobase/ibuf/ibuf0ibuf.cc26
-rw-r--r--storage/innobase/include/buf0buf.h9
-rw-r--r--storage/innobase/include/buf0dblwr.h15
-rw-r--r--storage/innobase/include/buf0rea.h11
-rw-r--r--storage/innobase/include/fil0fil.h427
-rw-r--r--storage/innobase/include/fsp0types.h10
-rw-r--r--storage/innobase/include/os0api.h48
-rw-r--r--storage/innobase/include/os0file.h201
-rw-r--r--storage/innobase/include/os0file.ic10
-rw-r--r--storage/innobase/include/trx0sys.h5
-rw-r--r--storage/innobase/log/log0recv.cc13
-rw-r--r--storage/innobase/mtr/mtr0mtr.cc4
-rw-r--r--storage/innobase/os/os0file.cc136
-rw-r--r--storage/innobase/row/row0import.cc13
-rw-r--r--storage/innobase/row/row0quiesce.cc2
-rw-r--r--storage/innobase/srv/srv0start.cc47
-rw-r--r--storage/innobase/trx/trx0purge.cc7
39 files changed, 1303 insertions, 1900 deletions
diff --git a/extra/mariabackup/fil_cur.cc b/extra/mariabackup/fil_cur.cc
index 8f06005a9e4..7aa20b8700e 100644
--- a/extra/mariabackup/fil_cur.cc
+++ b/extra/mariabackup/fil_cur.cc
@@ -93,7 +93,6 @@ xb_fil_node_close_file(
mutex_enter(&fil_system.mutex);
ut_ad(node);
- ut_a(node->n_pending == 0);
ut_a(node->n_pending_flushes == 0);
ut_a(!node->being_extended);
@@ -108,20 +107,10 @@ xb_fil_node_close_file(
ut_a(ret);
node->handle = OS_FILE_CLOSED;
+ mutex_exit(&fil_system.mutex);
ut_a(fil_system.n_open > 0);
fil_system.n_open--;
-
- if (node->space->purpose == FIL_TYPE_TABLESPACE &&
- fil_is_user_tablespace_id(node->space->id)) {
-
- ut_a(UT_LIST_GET_LEN(fil_system.LRU) > 0);
-
- /* The node is in the LRU list, remove it */
- UT_LIST_REMOVE(fil_system.LRU, node);
- }
-
- mutex_exit(&fil_system.mutex);
}
/************************************************************************
@@ -180,18 +169,8 @@ xb_fil_cur_open(
return(XB_FIL_CUR_SKIP);
}
- mutex_enter(&fil_system.mutex);
fil_system.n_open++;
-
- if (node->space->purpose == FIL_TYPE_TABLESPACE &&
- fil_is_user_tablespace_id(node->space->id)) {
-
- /* Put the node to the LRU list */
- UT_LIST_ADD_FIRST(fil_system.LRU, node);
- }
-
- mutex_exit(&fil_system.mutex);
}
ut_ad(node->is_open());
@@ -427,7 +406,7 @@ xb_fil_cur_read(
retry_count = 10;
ret = XB_FIL_CUR_SUCCESS;
- fil_space_t *space = fil_space_acquire_for_io(cursor->space_id);
+ fil_space_t *space = fil_space_t::get_for_io(cursor->space_id);
if (!space) {
return XB_FIL_CUR_ERROR;
diff --git a/extra/mariabackup/xtrabackup.cc b/extra/mariabackup/xtrabackup.cc
index 3189bcd14cb..39025862276 100644
--- a/extra/mariabackup/xtrabackup.cc
+++ b/extra/mariabackup/xtrabackup.cc
@@ -3011,6 +3011,7 @@ void
xb_fil_io_init()
{
fil_system.create(srv_file_per_table ? 50000 : 5000);
+ fil_system.freeze_space_list = 1;
fil_system.space_id_reuse_warned = true;
}
@@ -3087,24 +3088,16 @@ xb_load_single_table_tablespace(
bool is_empty_file = file->exists() && file->is_empty_file();
if (err == DB_SUCCESS && file->space_id() != SRV_TMP_SPACE_ID) {
- os_offset_t node_size = os_file_get_size(file->handle());
- os_offset_t n_pages;
-
- ut_a(node_size != (os_offset_t) -1);
-
- n_pages = node_size / fil_space_t::physical_size(file->flags());
-
- space = fil_space_create(
+ space = fil_space_t::create(
name, file->space_id(), file->flags(),
FIL_TYPE_TABLESPACE, NULL/* TODO: crypt_data */);
ut_a(space != NULL);
- space->add(file->filepath(), OS_FILE_CLOSED, uint32_t(n_pages),
- false, false);
+ space->add(file->filepath(), OS_FILE_CLOSED, 0, false, false);
/* by opening the tablespace we forcing node and space objects
in the cache to be populated with fields from space header */
- space->open();
+ space->get_size();
if (srv_operation == SRV_OPERATION_RESTORE_DELTA
|| xb_close_files) {
@@ -3406,19 +3399,6 @@ xb_load_tablespaces()
return(DB_SUCCESS);
}
-/************************************************************************
-Initialize the tablespace memory cache and populate it by scanning for and
-opening data files.
-@returns DB_SUCCESS or error code.*/
-static
-dberr_t
-xb_data_files_init()
-{
- xb_fil_io_init();
-
- return(xb_load_tablespaces());
-}
-
/** Destroy the tablespace memory cache. */
static void xb_data_files_close()
{
@@ -4607,6 +4587,22 @@ xb_delta_open_matching_space(
return file;
}
+ if (!info.space_id && fil_system.sys_space) {
+ fil_node_t *node
+ = UT_LIST_GET_FIRST(fil_system.sys_space->chain);
+ for (; node; node = UT_LIST_GET_NEXT(chain, node)) {
+ if (!strcmp(node->name, real_name)) {
+ break;
+ }
+ }
+ if (node && node->handle != OS_FILE_CLOSED) {
+ *success = true;
+ return node->handle;
+ }
+ msg("mariabackup: Cannot find file %s\n", real_name);
+ return OS_FILE_CLOSED;
+ }
+
log_mutex_enter();
if (!fil_is_user_tablespace_id(info.space_id)) {
found:
@@ -4704,8 +4700,8 @@ exit:
ut_ad(fil_space_t::zip_size(flags) == info.zip_size);
ut_ad(fil_space_t::physical_size(flags) == info.page_size);
- if (fil_space_create(dest_space_name, info.space_id, flags,
- FIL_TYPE_TABLESPACE, 0)) {
+ if (fil_space_t::create(dest_space_name, info.space_id, flags,
+ FIL_TYPE_TABLESPACE, 0)) {
*success = xb_space_create_file(real_name, info.space_id,
flags, &file);
} else {
@@ -4925,7 +4921,7 @@ xtrabackup_apply_delta(
os_file_close(src_file);
os_file_delete(0,src_path);
}
- if (dst_file != OS_FILE_CLOSED)
+ if (dst_file != OS_FILE_CLOSED && info.space_id)
os_file_close(dst_file);
return TRUE;
@@ -4933,7 +4929,7 @@ error:
aligned_free(incremental_buffer);
if (src_file != OS_FILE_CLOSED)
os_file_close(src_file);
- if (dst_file != OS_FILE_CLOSED)
+ if (dst_file != OS_FILE_CLOSED && info.space_id)
os_file_close(dst_file);
msg("Error: xtrabackup_apply_delta(): "
"failed to apply %s to %s.\n", src_path, dst_path);
@@ -5387,8 +5383,8 @@ static bool xtrabackup_prepare_func(char** argv)
srv_allow_writes_event = os_event_create(0);
os_event_set(srv_allow_writes_event);
#endif
- dberr_t err = xb_data_files_init();
- if (err != DB_SUCCESS) {
+ xb_fil_io_init();
+ if (dberr_t err = xb_load_tablespaces()) {
msg("mariabackup: error: xb_data_files_init() failed "
"with error %s\n", ut_strerr(err));
goto error_cleanup;
@@ -5396,7 +5392,8 @@ static bool xtrabackup_prepare_func(char** argv)
inc_dir_tables_hash.create(1000);
- ok = xtrabackup_apply_deltas();
+ ok = fil_system.sys_space->open(false)
+ && xtrabackup_apply_deltas();
xb_data_files_close();
@@ -5426,6 +5423,8 @@ static bool xtrabackup_prepare_func(char** argv)
goto error_cleanup;
}
+ fil_system.freeze_space_list = 0;
+
/* increase IO threads */
if (srv_n_file_io_threads < 10) {
srv_n_read_io_threads = 4;
@@ -5447,6 +5446,8 @@ static bool xtrabackup_prepare_func(char** argv)
goto error_cleanup;
}
+ ut_ad(!fil_system.freeze_space_list);
+
if (ok) {
msg("Last binlog file %s, position %lld",
trx_sys.recovered_binlog_filename,
diff --git a/mysql-test/suite/encryption/t/innodb-remove-encryption.test b/mysql-test/suite/encryption/t/innodb-remove-encryption.test
index 90c6925d125..aeafd99325b 100644
--- a/mysql-test/suite/encryption/t/innodb-remove-encryption.test
+++ b/mysql-test/suite/encryption/t/innodb-remove-encryption.test
@@ -29,6 +29,7 @@ create table t1(a int not null primary key, b char(200)) engine=innodb;
--source include/wait_condition.inc
SELECT NAME FROM INFORMATION_SCHEMA.INNODB_TABLESPACES_ENCRYPTION WHERE MIN_KEY_VERSION = 0;
+--sorted_result
SELECT NAME FROM INFORMATION_SCHEMA.INNODB_TABLESPACES_ENCRYPTION WHERE MIN_KEY_VERSION <> 0;
--echo # Success!
@@ -41,6 +42,7 @@ SET GLOBAL innodb_encrypt_tables = off;
--let $wait_condition=SELECT COUNT(*) = $tables_count FROM INFORMATION_SCHEMA.INNODB_TABLESPACES_ENCRYPTION WHERE MIN_KEY_VERSION = 0 AND ROTATING_OR_FLUSHING = 0;
--source include/wait_condition.inc
+--sorted_result
SELECT NAME FROM INFORMATION_SCHEMA.INNODB_TABLESPACES_ENCRYPTION WHERE MIN_KEY_VERSION = 0;
SELECT NAME FROM INFORMATION_SCHEMA.INNODB_TABLESPACES_ENCRYPTION WHERE MIN_KEY_VERSION <> 0;
@@ -51,6 +53,7 @@ SELECT NAME FROM INFORMATION_SCHEMA.INNODB_TABLESPACES_ENCRYPTION WHERE MIN_KEY_
--let $restart_parameters=--skip-file-key-management --innodb-encrypt-tables=OFF --innodb-encryption-threads=0 --innodb-tablespaces-encryption
-- source include/restart_mysqld.inc
+--sorted_result
SELECT NAME FROM INFORMATION_SCHEMA.INNODB_TABLESPACES_ENCRYPTION WHERE MIN_KEY_VERSION = 0;
SELECT NAME FROM INFORMATION_SCHEMA.INNODB_TABLESPACES_ENCRYPTION WHERE MIN_KEY_VERSION <> 0;
diff --git a/mysql-test/suite/encryption/t/innodb_encrypt_key_rotation_age.test b/mysql-test/suite/encryption/t/innodb_encrypt_key_rotation_age.test
index bc4c43e1ce8..ef38560c469 100644
--- a/mysql-test/suite/encryption/t/innodb_encrypt_key_rotation_age.test
+++ b/mysql-test/suite/encryption/t/innodb_encrypt_key_rotation_age.test
@@ -26,6 +26,7 @@ let $restart_parameters= --innodb_encryption_threads=5 --innodb_encryption_rotat
--source include/wait_condition.inc
SELECT NAME FROM INFORMATION_SCHEMA.INNODB_TABLESPACES_ENCRYPTION WHERE MIN_KEY_VERSION = 0;
+--sorted_result
SELECT NAME FROM INFORMATION_SCHEMA.INNODB_TABLESPACES_ENCRYPTION WHERE MIN_KEY_VERSION <> 0;
--echo # Restart the server with innodb_encryption_rotate_key_age= 0
@@ -45,6 +46,7 @@ create table t4 (f1 int not null)engine=innodb encrypted=NO;
SELECT NAME FROM INFORMATION_SCHEMA.INNODB_TABLESPACES_ENCRYPTION WHERE MIN_KEY_VERSION = 0;
+--sorted_result
SELECT NAME FROM INFORMATION_SCHEMA.INNODB_TABLESPACES_ENCRYPTION WHERE MIN_KEY_VERSION <> 0;
--echo # Disable encryption when innodb_encryption_rotate_key_age is 0
@@ -57,6 +59,7 @@ set global innodb_encrypt_tables = OFF;
--let $wait_condition=SELECT COUNT(*) >= $tables_count FROM INFORMATION_SCHEMA.INNODB_TABLESPACES_ENCRYPTION WHERE MIN_KEY_VERSION = 0 AND ROTATING_OR_FLUSHING = 0;
--source include/wait_condition.inc
+--sorted_result
SELECT NAME FROM INFORMATION_SCHEMA.INNODB_TABLESPACES_ENCRYPTION WHERE MIN_KEY_VERSION = 0;
--echo # Display only encrypted create tables (t3)
SELECT NAME FROM INFORMATION_SCHEMA.INNODB_TABLESPACES_ENCRYPTION WHERE MIN_KEY_VERSION <> 0;
@@ -73,11 +76,13 @@ set global innodb_encrypt_tables = ON;
SELECT NAME FROM INFORMATION_SCHEMA.INNODB_TABLESPACES_ENCRYPTION WHERE MIN_KEY_VERSION = 0;
--echo # Display only unencrypted create tables (t4)
+--sorted_result
SELECT NAME FROM INFORMATION_SCHEMA.INNODB_TABLESPACES_ENCRYPTION WHERE MIN_KEY_VERSION <> 0;
--let $restart_parameters=
-- source include/restart_mysqld.inc
SELECT NAME FROM INFORMATION_SCHEMA.INNODB_TABLESPACES_ENCRYPTION WHERE MIN_KEY_VERSION = 0;
+--sorted_result
SELECT NAME FROM INFORMATION_SCHEMA.INNODB_TABLESPACES_ENCRYPTION WHERE MIN_KEY_VERSION <> 0;
DROP TABLE t4, t3, t2, t1;
diff --git a/mysql-test/suite/innodb/r/table_definition_cache_debug.result b/mysql-test/suite/innodb/r/table_definition_cache_debug.result
index 2c2c6de44ae..df171c89cd4 100644
--- a/mysql-test/suite/innodb/r/table_definition_cache_debug.result
+++ b/mysql-test/suite/innodb/r/table_definition_cache_debug.result
@@ -1,4 +1,4 @@
-call mtr.add_suppression("InnoDB: innodb_open_files=13 is exceeded");
+call mtr.add_suppression("InnoDB: innodb_open_files=.* is exceeded");
SET @save_tdc= @@GLOBAL.table_definition_cache;
SET @save_toc= @@GLOBAL.table_open_cache;
SET GLOBAL table_definition_cache= 400;
diff --git a/mysql-test/suite/innodb/t/innodb-trim.test b/mysql-test/suite/innodb/t/innodb-trim.test
index 0f38ea5ba84..3f8eb5f2c71 100644
--- a/mysql-test/suite/innodb/t/innodb-trim.test
+++ b/mysql-test/suite/innodb/t/innodb-trim.test
@@ -32,18 +32,6 @@ commit;
set autocommit=1;
-let $success= `SELECT variable_value FROM information_schema.global_status WHERE variable_name = 'innodb_num_page_compressed_trim_op'`;
-
-if (!$success) {
---disable_query_log
---disable_result_log
- DROP PROCEDURE innodb_insert_proc;
- DROP TABLE innodb_page_compressed;
---enable_query_log
---enable_result_log
- --skip "Test requires TRIM";
-}
-
DROP PROCEDURE innodb_insert_proc;
DROP TABLE innodb_page_compressed;
diff --git a/mysql-test/suite/innodb/t/table_definition_cache_debug.test b/mysql-test/suite/innodb/t/table_definition_cache_debug.test
index 70467b53435..6a466af4cc5 100644
--- a/mysql-test/suite/innodb/t/table_definition_cache_debug.test
+++ b/mysql-test/suite/innodb/t/table_definition_cache_debug.test
@@ -4,7 +4,7 @@
# This test is slow on buildbot.
--source include/big_test.inc
-call mtr.add_suppression("InnoDB: innodb_open_files=13 is exceeded");
+call mtr.add_suppression("InnoDB: innodb_open_files=.* is exceeded");
SET @save_tdc= @@GLOBAL.table_definition_cache;
SET @save_toc= @@GLOBAL.table_open_cache;
diff --git a/storage/innobase/CMakeLists.txt b/storage/innobase/CMakeLists.txt
index 4fac94d211e..10f183790a7 100644
--- a/storage/innobase/CMakeLists.txt
+++ b/storage/innobase/CMakeLists.txt
@@ -1,3 +1,4 @@
+
# Copyright (c) 2006, 2017, Oracle and/or its affiliates. All rights reserved.
# Copyright (c) 2014, 2020, MariaDB Corporation.
#
@@ -186,7 +187,6 @@ SET(INNOBASE_SOURCES
include/mtr0mtr.h
include/mtr0mtr.ic
include/mtr0types.h
- include/os0api.h
include/os0event.h
include/os0file.h
include/os0file.ic
diff --git a/storage/innobase/btr/btr0cur.cc b/storage/innobase/btr/btr0cur.cc
index 81a04701da1..c280ed555fe 100644
--- a/storage/innobase/btr/btr0cur.cc
+++ b/storage/innobase/btr/btr0cur.cc
@@ -3304,21 +3304,34 @@ upd_sys:
/**
Prefetch siblings of the leaf for the pessimistic operation.
-@param block leaf page */
-static void btr_cur_prefetch_siblings(const buf_block_t* block)
+@param block leaf page
+@param index index of the page */
+static void btr_cur_prefetch_siblings(const buf_block_t *block,
+ const dict_index_t *index)
{
- const page_t *page= block->frame;
- ut_ad(page_is_leaf(page));
+ ut_ad(page_is_leaf(block->frame));
+
+ if (index->is_ibuf())
+ return;
+ const page_t *page= block->frame;
uint32_t prev= mach_read_from_4(my_assume_aligned<4>(page + FIL_PAGE_PREV));
uint32_t next= mach_read_from_4(my_assume_aligned<4>(page + FIL_PAGE_NEXT));
if (prev != FIL_NULL)
- buf_read_page_background(page_id_t(block->page.id().space(), prev),
+ {
+ ut_a(index->table->space->acquire_for_io());
+ buf_read_page_background(index->table->space,
+ page_id_t(block->page.id().space(), prev),
block->zip_size(), false);
+ }
if (next != FIL_NULL)
- buf_read_page_background(page_id_t(block->page.id().space(), next),
+ {
+ ut_a(index->table->space->acquire_for_io());
+ buf_read_page_background(index->table->space,
+ page_id_t(block->page.id().space(), next),
block->zip_size(), false);
+ }
}
/*************************************************************//**
@@ -3436,8 +3449,8 @@ fail:
/* prefetch siblings of the leaf for the pessimistic
operation, if the page is leaf. */
- if (page_is_leaf(page) && !index->is_ibuf()) {
- btr_cur_prefetch_siblings(block);
+ if (page_is_leaf(page)) {
+ btr_cur_prefetch_siblings(block, index);
}
fail_err:
@@ -4575,7 +4588,7 @@ any_extern:
/* prefetch siblings of the leaf for the pessimistic
operation. */
- btr_cur_prefetch_siblings(block);
+ btr_cur_prefetch_siblings(block, index);
return(DB_OVERFLOW);
}
@@ -4766,10 +4779,10 @@ func_exit:
}
}
- if (err != DB_SUCCESS && !index->is_ibuf()) {
+ if (err != DB_SUCCESS) {
/* prefetch siblings of the leaf for the pessimistic
operation. */
- btr_cur_prefetch_siblings(block);
+ btr_cur_prefetch_siblings(block, index);
}
return(err);
@@ -5481,7 +5494,7 @@ btr_cur_optimistic_delete_func(
if (!no_compress_needed) {
/* prefetch siblings of the leaf for the pessimistic
operation. */
- btr_cur_prefetch_siblings(block);
+ btr_cur_prefetch_siblings(block, cursor->index);
goto func_exit;
}
diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc
index daf5e1aa511..2046ffd4273 100644
--- a/storage/innobase/buf/buf0buf.cc
+++ b/storage/innobase/buf/buf0buf.cc
@@ -2768,7 +2768,7 @@ buf_zip_decompress(
ulint size = page_zip_get_size(&block->page.zip);
/* The tablespace will not be found if this function is called
during IMPORT. */
- fil_space_t* space= fil_space_acquire_for_io(block->page.id().space());
+ fil_space_t* space= fil_space_t::get_for_io(block->page.id().space());
const unsigned key_version = mach_read_from_4(
frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION);
fil_space_crypt_t* crypt_data = space ? space->crypt_data : NULL;
@@ -3034,10 +3034,9 @@ buf_page_get_low(
/* fall through */
case BUF_GET:
case BUF_GET_IF_IN_POOL_OR_WATCH:
- fil_space_t* s = fil_space_acquire_for_io(page_id.space());
+ fil_space_t* s = fil_space_get(page_id.space());
ut_ad(s);
ut_ad(s->zip_size() == zip_size);
- s->release_for_io();
}
#endif /* UNIV_DEBUG */
@@ -3107,7 +3106,7 @@ lookup:
}
/* The call path is buf_read_page() ->
- buf_read_page_low() (fil_io()) ->
+ buf_read_page_low() (fil_space_t::io()) ->
buf_page_read_complete() ->
buf_decrypt_after_read(). Here fil_space_t* is used
and we decrypt -> buf_page_check_corrupt() where page
@@ -3161,8 +3160,7 @@ lookup:
asserting. */
if (page_id.space() == TRX_SYS_SPACE) {
} else if (page_id.space() == SRV_TMP_SPACE_ID) {
- } else if (fil_space_t* space
- = fil_space_acquire_for_io(
+ } else if (fil_space_t* space= fil_space_t::get_for_io(
page_id.space())) {
bool set = dict_set_corrupted_by_space(space);
space->release_for_io();
@@ -3376,8 +3374,8 @@ re_evict:
if (mode != BUF_GET_IF_IN_POOL
&& mode != BUF_GET_IF_IN_POOL_OR_WATCH) {
} else if (!ibuf_debug) {
- } else if (fil_space_t* space =
- fil_space_acquire_for_io(page_id.space())) {
+ } else if (fil_space_t* space
+ = fil_space_t::get_for_io(page_id.space())) {
/* Try to evict the block from the buffer pool, to use the
insert buffer (change buffer) as much as possible. */
@@ -4869,17 +4867,4 @@ std::ostream& operator<<(std::ostream &out, const page_id_t page_id)
<< ", page number=" << page_id.page_no() << "]";
return out;
}
-
-/**
-Calculate the length of trim (punch_hole) operation.
-@param[in] bpage Page control block
-@param[in] write_length Write length
-@return length of the trim or zero. */
-ulint
-buf_page_get_trim_length(
- const buf_page_t* bpage,
- ulint write_length)
-{
- return bpage->physical_size() - write_length;
-}
#endif /* !UNIV_INNOCHECKSUM */
diff --git a/storage/innobase/buf/buf0dblwr.cc b/storage/innobase/buf/buf0dblwr.cc
index ad515e4e194..6b1a32d8930 100644
--- a/storage/innobase/buf/buf0dblwr.cc
+++ b/storage/innobase/buf/buf0dblwr.cc
@@ -125,7 +125,8 @@ too_small:
byte *fseg_header= TRX_SYS_DOUBLEWRITE + TRX_SYS_DOUBLEWRITE_FSEG +
trx_sys_block->frame;
- for (uint32_t prev_page_no= 0, i= 0; i < 2 * size + FSP_EXTENT_SIZE / 2; i++)
+ for (uint32_t prev_page_no= 0, i= 0, extent_size= FSP_EXTENT_SIZE;
+ i < 2 * size + extent_size / 2; i++)
{
buf_block_t *new_block= fseg_alloc_free_page(fseg_header, prev_page_no + 1,
FSP_UP, &mtr);
@@ -362,15 +363,13 @@ void buf_dblwr_t::recover()
continue;
}
- fil_space_t* space= fil_space_acquire_for_io(space_id);
+ fil_space_t *space= fil_space_t::get_for_io(space_id);
if (!space)
/* The tablespace that this page once belonged to does not exist */
continue;
- fil_space_open_if_needed(space);
-
- if (UNIV_UNLIKELY(page_no >= space->size))
+ if (UNIV_UNLIKELY(page_no >= space->get_size()))
{
/* Do not report the warning for undo tablespaces, because they
can be truncated in place. */
@@ -385,7 +384,6 @@ next_page:
}
const ulint physical_size= space->physical_size();
- const ulint zip_size= space->zip_size();
ut_ad(!buf_is_zeroes(span<const byte>(page, physical_size)));
/* We want to ensure that for partial reads the unread portion of
@@ -393,18 +391,15 @@ next_page:
memset(read_buf, 0x0, physical_size);
/* Read in the actual page from the file */
- fil_io_t fio= fil_io(IORequest(IORequest::READ | IORequest::DBLWR_RECOVER),
- true, page_id, zip_size,
- 0, physical_size, read_buf, nullptr);
+ fil_io_t fio= space->io(IORequest(IORequest::DBLWR_RECOVER),
+ os_offset_t{page_no} * physical_size,
+ physical_size, read_buf);
if (UNIV_UNLIKELY(fio.err != DB_SUCCESS))
ib::warn() << "Double write buffer recovery: " << page_id
<< " (tablespace '" << space->name
<< "') read failed with error: " << fio.err;
- if (fio.node)
- fio.node->space->release_for_io();
-
if (buf_is_zeroes(span<const byte>(read_buf, physical_size)))
{
/* We will check if the copy in the doublewrite buffer is
@@ -425,17 +420,15 @@ next_page:
/* Write the good page from the doublewrite buffer to the intended
position. */
- fio= fil_io(IORequestWrite, true, page_id, zip_size, 0, physical_size,
- page, nullptr);
+ space->reacquire_for_io();
+ fio= space->io(IORequestWrite,
+ os_offset_t{page_id.page_no()} * physical_size,
+ physical_size, page);
- if (fio.node)
- {
- ut_ad(fio.err == DB_SUCCESS);
+ if (fio.err == DB_SUCCESS)
ib::info() << "Recovered page " << page_id << " to '" << fio.node->name
<< "' from the doublewrite buffer.";
- fio.node->space->release_for_io();
- goto next_page;
- }
+ goto next_page;
}
recv_sys.dblwr.pages.clear();
@@ -513,7 +506,7 @@ static void buf_dblwr_check_page_lsn(const page_t* page, const fil_space_t& s)
static void buf_dblwr_check_page_lsn(const buf_page_t &b, const byte *page)
{
- if (fil_space_t *space= fil_space_acquire_for_io(b.id().space()))
+ if (fil_space_t *space= fil_space_t::get_for_io(b.id().space()))
{
buf_dblwr_check_page_lsn(page, *space);
space->release_for_io();
@@ -577,7 +570,7 @@ bool buf_dblwr_t::flush_buffered_writes(const ulint size)
#ifdef UNIV_DEBUG
for (ulint len2= 0, i= 0; i < old_first_free; len2 += srv_page_size, i++)
{
- buf_page_t *bpage= buf_block_arr[i].bpage;
+ buf_page_t *bpage= buf_block_arr[i].request.bpage;
if (bpage->zip.data)
/* No simple validate for ROW_FORMAT=COMPRESSED pages exists. */
@@ -590,18 +583,22 @@ bool buf_dblwr_t::flush_buffered_writes(const ulint size)
}
#endif /* UNIV_DEBUG */
/* Write out the first block of the doublewrite buffer */
- fil_io_t fio= fil_io(IORequestWrite, true, block1, 0, 0,
- std::min(size, old_first_free) << srv_page_size_shift,
- write_buf, nullptr);
- fio.node->space->release_for_io();
+ ut_a(fil_system.sys_space->acquire_for_io());
+ fil_system.sys_space->io(IORequestWrite,
+ os_offset_t{block1.page_no()} <<
+ srv_page_size_shift,
+ std::min(size, old_first_free) <<
+ srv_page_size_shift, write_buf);
if (old_first_free > size)
{
/* Write out the second block of the doublewrite buffer. */
- fio= fil_io(IORequestWrite, true, block2, 0, 0,
- (old_first_free - size) << srv_page_size_shift,
- write_buf + (size << srv_page_size_shift), nullptr);
- fio.node->space->release_for_io();
+ ut_a(fil_system.sys_space->acquire_for_io());
+ fil_system.sys_space->io(IORequestWrite,
+ os_offset_t{block2.page_no()} <<
+ srv_page_size_shift,
+ (old_first_free - size) << srv_page_size_shift,
+ write_buf + (size << srv_page_size_shift));
}
/* increment the doublewrite flushed pages counter */
@@ -609,7 +606,7 @@ bool buf_dblwr_t::flush_buffered_writes(const ulint size)
srv_stats.dblwr_writes.inc();
/* Now flush the doublewrite buffer data to disk */
- fil_flush(TRX_SYS_SPACE);
+ fil_system.sys_space->flush();
/* We know that the writes have been flushed to disk now
and in recovery we will find them in the doublewrite buffer
@@ -629,8 +626,8 @@ bool buf_dblwr_t::flush_buffered_writes(const ulint size)
for (ulint i= 0; i < old_first_free; i++)
{
auto e= buf_block_arr[i];
- buf_page_t* bpage= e.bpage;
- ut_a(bpage->in_file());
+ buf_page_t* bpage= e.request.bpage;
+ ut_ad(bpage->in_file());
/* We request frame here to get correct buffer in case of
encryption and/or page compression */
@@ -650,8 +647,7 @@ bool buf_dblwr_t::flush_buffered_writes(const ulint size)
ut_d(buf_dblwr_check_page_lsn(*bpage, static_cast<const byte*>(frame)));
}
- fil_io(IORequest(IORequest::WRITE, bpage, e.lru), false,
- bpage->id(), bpage->zip_size(), 0, e_size, frame, bpage);
+ e.space->io(e.request, bpage->physical_offset(), e_size, frame, bpage);
}
return true;
@@ -680,12 +676,20 @@ void buf_dblwr_t::flush_buffered_writes()
/** Schedule a page write. If the doublewrite memory buffer is full,
flush_buffered_writes() will be invoked to make space.
-@param bpage buffer pool page to be written
-@param lru true=buf_pool.LRU; false=buf_pool.flush_list
+@param space tablespace
+@param request asynchronous write request
@param size payload size in bytes */
-void buf_dblwr_t::add_to_batch(buf_page_t *bpage, bool lru, size_t size)
+void buf_dblwr_t::add_to_batch(fil_space_t *space, const IORequest &request,
+ size_t size)
{
- ut_ad(bpage->in_file());
+ ut_ad(request.is_async());
+ ut_ad(request.is_write());
+ ut_ad(request.bpage);
+ ut_ad(request.bpage->in_file());
+ ut_ad(space->id == request.bpage->id().space());
+ ut_ad(space->pending_io());
+ ut_ad(!srv_read_only_mode);
+
const ulint buf_size= 2 * block_size();
mysql_mutex_lock(&mutex);
@@ -707,13 +711,13 @@ void buf_dblwr_t::add_to_batch(buf_page_t *bpage, bool lru, size_t size)
/* We request frame here to get correct buffer in case of
encryption and/or page compression */
- void *frame= buf_page_get_frame(bpage);
+ void *frame= buf_page_get_frame(request.bpage);
memcpy_aligned<OS_FILE_LOG_BLOCK_SIZE>(p, frame, size);
- ut_ad(!bpage->zip_size() || bpage->zip_size() == size);
+ ut_ad(!request.bpage->zip_size() || request.bpage->zip_size() == size);
ut_ad(reserved == first_free);
ut_ad(reserved < buf_size);
- buf_block_arr[first_free++]= { bpage, lru, size };
+ new (buf_block_arr + first_free++) element{space, request, size};
reserved= first_free;
if (first_free != buf_size || !flush_buffered_writes(buf_size / 2))
diff --git a/storage/innobase/buf/buf0dump.cc b/storage/innobase/buf/buf0dump.cc
index b66f5e39744..19a9e09e4a1 100644
--- a/storage/innobase/buf/buf0dump.cc
+++ b/storage/innobase/buf/buf0dump.cc
@@ -626,6 +626,14 @@ buf_load()
so all pages from a given tablespace are consecutive. */
ulint cur_space_id = dump[0].space();
fil_space_t* space = fil_space_acquire_silent(cur_space_id);
+ if (space) {
+ bool ok = space->acquire_for_io();
+ space->release();
+ if (!ok) {
+ space = nullptr;
+ }
+ }
+
ulint zip_size = space ? space->zip_size() : 0;
PSI_stage_progress* pfs_stage_progress __attribute__((unused))
@@ -644,24 +652,34 @@ buf_load()
}
if (this_space_id != cur_space_id) {
- if (space != NULL) {
- space->release();
+ if (space) {
+ space->release_for_io();
}
cur_space_id = this_space_id;
space = fil_space_acquire_silent(cur_space_id);
- if (space != NULL) {
- zip_size = space->zip_size();
+ if (!space) {
+ continue;
}
+
+ bool ok = space->acquire_for_io();
+ space->release();
+
+ if (!ok) {
+ space = nullptr;
+ continue;
+ }
+
+ zip_size = space->zip_size();
}
/* JAN: TODO: As we use background page read below,
if tablespace is encrypted we cant use it. */
- if (space == NULL ||
- (space && space->crypt_data &&
- space->crypt_data->encryption != FIL_ENCRYPTION_OFF &&
- space->crypt_data->type != CRYPT_SCHEME_UNENCRYPTED)) {
+ if (!space || dump[i].page_no() >= space->get_size() ||
+ (space->crypt_data &&
+ space->crypt_data->encryption != FIL_ENCRYPTION_OFF &&
+ space->crypt_data->type != CRYPT_SCHEME_UNENCRYPTED)) {
continue;
}
@@ -671,11 +689,12 @@ buf_load()
continue;
}
- buf_read_page_background(dump[i], zip_size, true);
+ space->reacquire_for_io();
+ buf_read_page_background(space, dump[i], zip_size, true);
if (buf_load_abort_flag) {
- if (space != NULL) {
- space->release();
+ if (space) {
+ space->release_for_io();
}
buf_load_abort_flag = false;
ut_free(dump);
@@ -702,8 +721,8 @@ buf_load()
#endif
}
- if (space != NULL) {
- space->release();
+ if (space) {
+ space->release_for_io();
}
ut_free(dump);
diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc
index b69026ef990..25523ab53f1 100644
--- a/storage/innobase/buf/buf0flu.cc
+++ b/storage/innobase/buf/buf0flu.cc
@@ -782,6 +782,11 @@ static bool buf_flush_page(buf_page_t *bpage, bool lru, fil_space_t *space)
{
ut_ad(bpage->in_file());
ut_ad(bpage->ready_for_flush());
+ ut_ad((space->purpose == FIL_TYPE_TEMPORARY) ==
+ (space == fil_system.temp_space));
+ ut_ad(space->purpose == FIL_TYPE_TABLESPACE ||
+ space->atomic_write_supported);
+ ut_ad(space->pending_io());
rw_lock_t *rw_lock;
@@ -807,11 +812,6 @@ static bool buf_flush_page(buf_page_t *bpage, bool lru, fil_space_t *space)
io_fix and oldest_modification()!=0. Thus, it cannot be relocated in
the buffer pool or removed from flush_list or LRU_list. */
- ut_ad((space->purpose == FIL_TYPE_TEMPORARY) ==
- (space == fil_system.temp_space));
- ut_ad(space->purpose == FIL_TYPE_TABLESPACE ||
- space->atomic_write_supported);
-
DBUG_PRINT("ib_buf", ("%s %u page %u:%u",
lru ? "LRU" : "flush_list",
bpage->id().space(), bpage->id().page_no()));
@@ -850,82 +850,66 @@ static bool buf_flush_page(buf_page_t *bpage, bool lru, fil_space_t *space)
}
}
- size_t size, orig_size;
- ulint type= IORequest::WRITE;
-
- if (UNIV_UNLIKELY(!rw_lock)) /* ROW_FORMAT=COMPRESSED */
- {
- ut_ad(!space->full_crc32());
- ut_ad(!space->is_compressed()); /* not page_compressed */
- orig_size= size= bpage->zip_size();
- if (status != buf_page_t::FREED)
- {
- buf_flush_update_zip_checksum(frame, orig_size);
- frame= buf_page_encrypt(space, bpage, frame, &size);
- }
- ut_ad(size == bpage->zip_size());
- }
+ if (status == buf_page_t::FREED)
+ buf_release_freed_page(&block->page);
else
{
- byte *page= block->frame;
- orig_size= size= block->physical_size();
+ space->reacquire_for_io();
+ ut_ad(status == buf_page_t::NORMAL || status == buf_page_t::INIT_ON_FLUSH);
+ size_t size, orig_size;
+ IORequest::Type type= lru ? IORequest::WRITE_LRU : IORequest::WRITE_ASYNC;
- if (status == buf_page_t::FREED);
- else if (space->full_crc32())
+ if (UNIV_UNLIKELY(!rw_lock)) /* ROW_FORMAT=COMPRESSED */
{
- /* innodb_checksum_algorithm=full_crc32 is not implemented for
- ROW_FORMAT=COMPRESSED pages. */
- ut_ad(!frame);
- page= buf_page_encrypt(space, bpage, page, &size);
- buf_flush_init_for_writing(block, page, nullptr, true);
+ ut_ad(!space->full_crc32());
+ ut_ad(!space->is_compressed()); /* not page_compressed */
+ orig_size= size= bpage->zip_size();
+ buf_flush_update_zip_checksum(frame, size);
+ frame= buf_page_encrypt(space, bpage, frame, &size);
+ ut_ad(size == bpage->zip_size());
}
else
{
- buf_flush_init_for_writing(block, page, frame ? &bpage->zip : nullptr,
- false);
- page= buf_page_encrypt(space, bpage, frame ? frame : page, &size);
- }
+ byte *page= block->frame;
+ orig_size= size= block->physical_size();
+
+ if (space->full_crc32())
+ {
+ /* innodb_checksum_algorithm=full_crc32 is not implemented for
+ ROW_FORMAT=COMPRESSED pages. */
+ ut_ad(!frame);
+ page= buf_page_encrypt(space, bpage, page, &size);
+ buf_flush_init_for_writing(block, page, nullptr, true);
+ }
+ else
+ {
+ buf_flush_init_for_writing(block, page, frame ? &bpage->zip : nullptr,
+ false);
+ page= buf_page_encrypt(space, bpage, frame ? frame : page, &size);
+ }
#if defined HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE || defined _WIN32
- if (size != orig_size && space->punch_hole)
- type|= IORequest::PUNCH_HOLE;
+ if (size != orig_size && space->punch_hole)
+ type= lru ? IORequest::PUNCH_LRU : IORequest::PUNCH;
#else
DBUG_EXECUTE_IF("ignore_punch_hole",
if (size != orig_size && space->punch_hole)
- type|= IORequest::PUNCH_HOLE;);
+ type= lru ? IORequest::PUNCH_LRU : IORequest::PUNCH;);
#endif
- frame= page;
- }
-
- IORequest request(type, bpage, lru);
+ frame=page;
+ }
- ut_ad(status == bpage->status);
+ ut_ad(status == bpage->status);
- switch (status) {
- default:
- ut_ad(status == buf_page_t::FREED);
- buf_release_freed_page(bpage);
- break;
- case buf_page_t::NORMAL:
- if (space->use_doublewrite())
- {
- ut_ad(!srv_read_only_mode);
- if (lru)
- buf_pool.n_flush_LRU++;
- else
- buf_pool.n_flush_list++;
- buf_dblwr.add_to_batch(bpage, lru, size);
- break;
- }
- /* fall through */
- case buf_page_t::INIT_ON_FLUSH:
if (lru)
buf_pool.n_flush_LRU++;
else
buf_pool.n_flush_list++;
- /* FIXME: pass space to fil_io() */
- fil_io(request, false, bpage->id(), bpage->zip_size(), 0,
- bpage->physical_size(), frame, bpage);
+ if (status != buf_page_t::NORMAL || !space->use_doublewrite())
+ space->io(IORequest(type, bpage),
+ bpage->physical_offset(), size, frame, bpage);
+ else
+ buf_dblwr.add_to_batch(space, IORequest(type, bpage), size);
}
/* Increment the I/O operation count used for selecting LRU policy. */
@@ -973,8 +957,7 @@ static page_id_t buf_flush_check_neighbors(const fil_space_t &space,
? static_cast<uint32_t>(s) : read_ahead;
page_id_t low= id - (id.page_no() % buf_flush_area);
page_id_t high= low + buf_flush_area;
- high.set_page_no(std::min(high.page_no(),
- static_cast<uint32_t>(space.committed_size - 1)));
+ high.set_page_no(std::min(high.page_no(), space.last_page_number()));
if (!contiguous)
{
@@ -1018,13 +1001,12 @@ static page_id_t buf_flush_check_neighbors(const fil_space_t &space,
return i;
}
+MY_ATTRIBUTE((nonnull))
/** Write punch-hole or zeroes of the freed ranges when
innodb_immediate_scrub_data_uncompressed from the freed ranges.
-@param[in] space tablespace which contains freed ranges
-@param[in] freed_ranges freed ranges of the page to be flushed */
+@param space tablespace which may contain ranges of freed pages */
static void buf_flush_freed_pages(fil_space_t *space)
{
- ut_ad(space != NULL);
const bool punch_hole= space->punch_hole;
if (!srv_immediate_scrub_data_uncompressed && !punch_hole)
return;
@@ -1043,27 +1025,24 @@ static void buf_flush_freed_pages(fil_space_t *space)
for (const auto &range : freed_ranges)
{
- ulint page_size= space->zip_size();
- if (!page_size)
- page_size= srv_page_size;
+ const ulint physical_size= space->physical_size();
if (punch_hole)
{
- const auto len= (range.last - range.first + 1) * page_size;
- const page_id_t page_id(space->id, range.first);
- fil_io_t fio= fil_io(IORequestWrite, true, page_id, space->zip_size(),
- 0, len, nullptr, nullptr, false, true);
- if (fio.node)
- fio.node->space->release_for_io();
+ space->reacquire_for_io();
+ space->io(IORequest(IORequest::PUNCH_RANGE),
+ os_offset_t{range.first} * physical_size,
+ (range.last - range.first + 1) * physical_size,
+ nullptr);
}
else if (srv_immediate_scrub_data_uncompressed)
{
- for (auto i= range.first; i <= range.last; i++)
+ for (os_offset_t i= range.first; i <= range.last; i++)
{
- const page_id_t page_id(space->id, i);
- fil_io(IORequestWrite, false, page_id, space->zip_size(), 0,
- space->zip_size() ? space->zip_size() : srv_page_size,
- const_cast<byte*>(field_ref_zero), nullptr, false, false);
+ space->reacquire_for_io();
+ space->io(IORequest(IORequest::WRITE_ASYNC),
+ i * physical_size, physical_size,
+ const_cast<byte*>(field_ref_zero));
}
}
buf_pool.stat.n_pages_written+= (range.last - range.first + 1);
@@ -1093,7 +1072,8 @@ static ulint buf_flush_try_neighbors(fil_space_t *space,
ut_ad(page_id >= id);
ut_ad(page_id < high);
- for (ulint id_fold= id.fold(); id < high; ++id, ++id_fold)
+ for (ulint id_fold= id.fold(); id < high && !space->is_stopping();
+ ++id, ++id_fold)
{
if (count + n_flushed >= n_to_flush)
{
@@ -1190,7 +1170,7 @@ static ulint buf_free_from_unzip_LRU_list_batch(ulint max)
@retval nullptr if the pages for this tablespace should be discarded */
static fil_space_t *buf_flush_space(const uint32_t id)
{
- fil_space_t *space= fil_space_acquire_for_io(id);
+ fil_space_t *space= fil_space_t::get_for_io(id);
if (space)
buf_flush_freed_pages(space);
return space;
@@ -1204,6 +1184,37 @@ struct flush_counters_t
ulint evicted;
};
+/** Try to discard a dirty page.
+@param bpage dirty page whose tablespace is not accessible */
+static void buf_flush_discard_page(buf_page_t *bpage)
+{
+ mysql_mutex_assert_owner(&buf_pool.mutex);
+ mysql_mutex_assert_not_owner(&buf_pool.flush_list_mutex);
+ ut_ad(bpage->in_file());
+ ut_ad(bpage->oldest_modification());
+
+ rw_lock_t *rw_lock;
+
+ if (bpage->state() != BUF_BLOCK_FILE_PAGE)
+ rw_lock= nullptr;
+ else
+ {
+ rw_lock= &reinterpret_cast<buf_block_t*>(bpage)->lock;
+ if (!rw_lock_sx_lock_nowait(rw_lock, 0))
+ return;
+ }
+
+ bpage->status= buf_page_t::NORMAL;
+ mysql_mutex_lock(&buf_pool.flush_list_mutex);
+ buf_flush_remove(bpage);
+ mysql_mutex_unlock(&buf_pool.flush_list_mutex);
+
+ if (rw_lock)
+ rw_lock_sx_unlock(rw_lock);
+
+ buf_LRU_free_page(bpage, true);
+}
+
/** Flush dirty blocks from the end of the LRU list.
@param max maximum number of blocks to make available in buf_pool.free
@param n counts of flushed and evicted pages */
@@ -1219,6 +1230,9 @@ static void buf_flush_LRU_list_batch(ulint max, flush_counters_t *n)
const auto neighbors= UT_LIST_GET_LEN(buf_pool.LRU) < BUF_LRU_OLD_MIN_LEN
? 0 : srv_flush_neighbors;
fil_space_t *space= nullptr;
+ uint32_t last_space_id= FIL_NULL;
+ static_assert(FIL_NULL > SRV_TMP_SPACE_ID, "consistency");
+ static_assert(FIL_NULL > SRV_SPACE_ID_UPPER_BOUND, "consistency");
for (buf_page_t *bpage= UT_LIST_GET_LAST(buf_pool.LRU);
bpage && n->flushed + n->evicted < max &&
@@ -1244,13 +1258,25 @@ static void buf_flush_LRU_list_batch(ulint max, flush_counters_t *n)
const uint32_t space_id= page_id.space();
if (!space || space->id != space_id)
{
- if (space)
- space->release_for_io();
- space= buf_flush_space(space_id);
- if (!space)
- continue;
+ if (last_space_id != space_id)
+ {
+ if (space)
+ space->release_for_io();
+ space= buf_flush_space(space_id);
+ last_space_id= space_id;
+ }
+ else
+ ut_ad(!space);
+ }
+ else if (space->is_stopping())
+ {
+ space->release_for_io();
+ space= nullptr;
}
- if (neighbors && space->is_rotational())
+
+ if (!space)
+ buf_flush_discard_page(bpage);
+ else if (neighbors && space->is_rotational())
{
mysql_mutex_unlock(&buf_pool.mutex);
n->flushed+= buf_flush_try_neighbors(space, page_id, neighbors == 1,
@@ -1328,6 +1354,9 @@ static ulint buf_do_flush_list_batch(ulint max_n, lsn_t lsn)
const auto neighbors= UT_LIST_GET_LEN(buf_pool.LRU) < BUF_LRU_OLD_MIN_LEN
? 0 : srv_flush_neighbors;
fil_space_t *space= nullptr;
+ uint32_t last_space_id= FIL_NULL;
+ static_assert(FIL_NULL > SRV_TMP_SPACE_ID, "consistency");
+ static_assert(FIL_NULL > SRV_SPACE_ID_UPPER_BOUND, "consistency");
/* Start from the end of the list looking for a suitable block to be
flushed. */
@@ -1361,17 +1390,29 @@ static ulint buf_do_flush_list_batch(ulint max_n, lsn_t lsn)
const uint32_t space_id= page_id.space();
if (!space || space->id != space_id)
{
- if (space)
- space->release_for_io();
- space= buf_flush_space(space_id);
- if (!space)
- continue;
+ if (last_space_id != space_id)
+ {
+ if (space)
+ space->release_for_io();
+ space= buf_flush_space(space_id);
+ last_space_id= space_id;
+ }
+ else
+ ut_ad(!space);
}
- if (neighbors && space->is_rotational())
+ else if (space->is_stopping())
+ {
+ space->release_for_io();
+ space= nullptr;
+ }
+
+ if (!space)
+ buf_flush_discard_page(bpage);
+ else if (neighbors && space->is_rotational())
{
mysql_mutex_unlock(&buf_pool.mutex);
count+= buf_flush_try_neighbors(space, page_id, neighbors == 1,
- false, count, max_n);
+ false, count, max_n);
reacquire_mutex:
mysql_mutex_lock(&buf_pool.mutex);
}
@@ -1476,10 +1517,9 @@ ulint buf_flush_lists(ulint max_n, lsn_t lsn)
while not holding buf_pool.flush_list_mutex */
if (running || !UT_LIST_GET_LEN(buf_pool.flush_list))
{
+ if (!running)
+ mysql_cond_broadcast(cond);
mysql_mutex_unlock(&buf_pool.mutex);
- if (running)
- return 0;
- mysql_cond_broadcast(cond);
return 0;
}
n_flush++;
diff --git a/storage/innobase/buf/buf0rea.cc b/storage/innobase/buf/buf0rea.cc
index bc81a8e9b86..daea53ec130 100644
--- a/storage/innobase/buf/buf0rea.cc
+++ b/storage/innobase/buf/buf0rea.cc
@@ -261,26 +261,23 @@ flag is cleared and the x-lock released by an i/o-handler thread.
@param[out] err DB_SUCCESS or DB_TABLESPACE_DELETED
if we are trying
to read from a non-existent tablespace
+@param[in,out] space tablespace
@param[in] sync true if synchronous aio is desired
@param[in] mode BUF_READ_IBUF_PAGES_ONLY, ...,
@param[in] page_id page id
@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
@param[in] unzip true=request uncompressed page
-@param[in] ignore whether to ignore out-of-bounds page_id
-@return 1 if a read request was queued, 0 if the page already resided
-in buf_pool, or if the page is in the doublewrite buffer blocks in
-which case it is never read into the pool, or if the tablespace does
-not exist or is being dropped */
+@return whether a read request was queued */
static
-ulint
+bool
buf_read_page_low(
dberr_t* err,
+ fil_space_t* space,
bool sync,
ulint mode,
const page_id_t page_id,
ulint zip_size,
- bool unzip,
- bool ignore = false)
+ bool unzip)
{
buf_page_t* bpage;
@@ -290,17 +287,22 @@ buf_read_page_low(
ib::error() << "Trying to read doublewrite buffer page "
<< page_id;
ut_ad(0);
- return(0);
+nothing_read:
+ space->release_for_io();
+ return false;
}
- if (ibuf_bitmap_page(page_id, zip_size) || trx_sys_hdr_page(page_id)) {
+ if (sync) {
+ } else if (trx_sys_hdr_page(page_id)
+ || ibuf_bitmap_page(page_id, zip_size)
+ || (!recv_no_ibuf_operations
+ && ibuf_page(page_id, zip_size, nullptr))) {
/* Trx sys header is so low in the latching order that we play
safe and do not leave the i/o-completion to an asynchronous
- i/o-thread. Ibuf bitmap pages must always be read with
+ i/o-thread. Change buffer pages must always be read with
syncronous i/o, to make sure they do not get involved in
thread deadlocks. */
-
sync = true;
}
@@ -311,20 +313,19 @@ buf_read_page_low(
bpage = buf_page_init_for_read(mode, page_id, zip_size, unzip);
if (bpage == NULL) {
-
- return(0);
+ goto nothing_read;
}
- DBUG_LOG("ib_buf",
- "read page " << page_id << " zip_size=" << zip_size
- << " unzip=" << unzip << ',' << (sync ? "sync" : "async"));
-
ut_ad(bpage->in_file());
if (sync) {
- thd_wait_begin(NULL, THD_WAIT_DISKIO);
+ thd_wait_begin(nullptr, THD_WAIT_DISKIO);
}
+ DBUG_LOG("ib_buf",
+ "read page " << page_id << " zip_size=" << zip_size
+ << " unzip=" << unzip << ',' << (sync ? "sync" : "async"));
+
void* dst;
if (zip_size) {
@@ -335,20 +336,18 @@ buf_read_page_low(
dst = ((buf_block_t*) bpage)->frame;
}
- fil_io_t fio = fil_io(
- IORequestRead, sync, page_id, zip_size, 0,
- zip_size ? zip_size : srv_page_size,
- dst, bpage, ignore);
+ const ulint len = zip_size ? zip_size : srv_page_size;
+ auto fio = space->io(IORequest(sync
+ ? IORequest::READ_SYNC
+ : IORequest::READ_ASYNC),
+ page_id.page_no() * len, len, dst, bpage);
*err= fio.err;
if (UNIV_UNLIKELY(fio.err != DB_SUCCESS)) {
- if (ignore || fio.err == DB_TABLESPACE_DELETED) {
+ if (!sync || fio.err == DB_TABLESPACE_DELETED) {
buf_pool.corrupted_evict(bpage);
- if (sync && fio.node) {
- fio.node->space->release_for_io();
- }
- return(0);
+ return false;
}
ut_error;
@@ -357,16 +356,16 @@ buf_read_page_low(
if (sync) {
thd_wait_end(NULL);
- /* The i/o was already completed in fil_io() */
+ /* The i/o was already completed in space->io() */
*err = buf_page_read_complete(bpage, *fio.node);
- fio.node->space->release_for_io();
+ space->release_for_io();
if (*err != DB_SUCCESS) {
- return(0);
+ return false;
}
}
- return(1);
+ return true;
}
/** Applies a random read-ahead in buf_pool if there are at least a threshold
@@ -411,7 +410,7 @@ buf_read_ahead_random(const page_id_t page_id, ulint zip_size, bool ibuf)
ulint count= 5 + buf_read_ahead_area / 8;
const page_id_t low= page_id - (page_id.page_no() % buf_read_ahead_area);
page_id_t high= low + buf_read_ahead_area;
- high.set_page_no(std::min(high.page_no(), space->committed_size - 1));
+ high.set_page_no(std::min(high.page_no(), space->last_page_number()));
/* Count how many blocks in the area have been recently accessed,
that is, reside near the start of the LRU list. */
@@ -427,10 +426,14 @@ buf_read_ahead_random(const page_id_t page_id, ulint zip_size, bool ibuf)
goto read_ahead;
}
+no_read_ahead:
space->release();
return 0;
read_ahead:
+ if (!space->acquire_for_io())
+ goto no_read_ahead;
+
/* Read all the suitable blocks within the area */
const ulint ibuf_mode= ibuf ? BUF_READ_IBUF_PAGES_ONLY : BUF_READ_ANY_PAGE;
@@ -441,13 +444,16 @@ read_ahead:
if (space->is_stopping())
break;
dberr_t err;
- count+= buf_read_page_low(&err, false, ibuf_mode, i, zip_size, false);
+ space->reacquire_for_io();
+ if (buf_read_page_low(&err, space, false, ibuf_mode, i, zip_size, false))
+ count++;
}
if (count)
DBUG_PRINT("ib_buf", ("random read-ahead %zu pages from %s: %u",
count, space->chain.start->name,
low.page_no()));
+ space->release_for_io();
space->release();
/* Read ahead is considered one I/O operation for the purpose of
@@ -472,41 +478,49 @@ after decryption normal page checksum does not match.
@retval DB_TABLESPACE_DELETED if tablespace .ibd file is missing */
dberr_t buf_read_page(const page_id_t page_id, ulint zip_size)
{
- dberr_t err = DB_SUCCESS;
-
- ulint count = buf_read_page_low(
- &err, true, BUF_READ_ANY_PAGE, page_id, zip_size, false);
-
- srv_stats.buf_pool_reads.add(count);
+ fil_space_t *space= fil_space_acquire(page_id.space());
+ if (!space)
+ {
+ ib::info() << "trying to read page " << page_id
+ << " in nonexisting or being-dropped tablespace";
+ return DB_TABLESPACE_DELETED;
+ }
+ else if (!space->acquire_for_io())
+ {
+ ib::warn() << "unable to read " << page_id << " from tablespace "
+ << space->name;
+ space->release();
+ return DB_PAGE_CORRUPTED;
+ }
- if (err == DB_TABLESPACE_DELETED) {
- ib::info() << "trying to read page " << page_id
- << " in nonexisting or being-dropped tablespace";
- }
+ space->release();
- /* Increment number of I/O operations used for LRU policy. */
- buf_LRU_stat_inc_io();
+ dberr_t err;
+ if (buf_read_page_low(&err, space, true, BUF_READ_ANY_PAGE,
+ page_id, zip_size, false))
+ srv_stats.buf_pool_reads.add(1);
- return(err);
+ buf_LRU_stat_inc_io();
+ return err;
}
/** High-level function which reads a page asynchronously from a file to the
buffer buf_pool if it is not already there. Sets the io_fix flag and sets
an exclusive lock on the buffer frame. The flag is cleared and the x-lock
released by the i/o-handler thread.
+@param[in,out] space tablespace
@param[in] page_id page id
@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
@param[in] sync true if synchronous aio is desired */
-void
-buf_read_page_background(const page_id_t page_id, ulint zip_size, bool sync)
+void buf_read_page_background(fil_space_t *space, const page_id_t page_id,
+ ulint zip_size, bool sync)
{
- ulint count;
dberr_t err;
- count = buf_read_page_low(
- &err, sync,
- BUF_READ_ANY_PAGE,
- page_id, zip_size, false, true);
+ if (buf_read_page_low(&err, space, sync, BUF_READ_ANY_PAGE,
+ page_id, zip_size, false)) {
+ srv_stats.buf_pool_reads.add(1);
+ }
switch (err) {
case DB_SUCCESS:
@@ -528,8 +542,6 @@ buf_read_page_background(const page_id_t page_id, ulint zip_size, bool sync)
<< page_id;
}
- srv_stats.buf_pool_reads.add(count);
-
/* We do not increment number of I/O operations used for LRU policy
here (buf_LRU_stat_inc_io()). We use this in heuristics to decide
about evicting uncompressed version of compressed pages from the
@@ -598,10 +610,19 @@ buf_read_ahead_linear(const page_id_t page_id, ulint zip_size, bool ibuf)
fil_space_t *space= fil_space_acquire(page_id.space());
if (!space)
return 0;
- if (high_1.page_no() >= space->committed_size)
+ else
{
- /* The area is not whole. */
+ bool ok= space->acquire_for_io();
space->release();
+ if (!ok)
+ return 0;
+ }
+
+ if (high_1.page_no() > space->last_page_number())
+ {
+ /* The area is not whole. */
+fail:
+ space->release_for_io();
return 0;
}
@@ -628,8 +649,7 @@ buf_read_ahead_linear(const page_id_t page_id, ulint zip_size, bool ibuf)
{
hard_fail:
hash_lock->read_unlock();
- space->release();
- return 0;
+ goto fail;
}
const byte *f;
switch (UNIV_EXPECT(bpage->state(), BUF_BLOCK_FILE_PAGE)) {
@@ -661,7 +681,7 @@ hard_fail:
if (id != new_low && id != new_high_1)
/* This is not a border page of the area: return */
goto hard_fail;
- if (new_high_1.page_no() >= space->committed_size)
+ if (new_high_1.page_no() > space->last_page_number())
/* The area is not whole */
goto hard_fail;
}
@@ -671,8 +691,7 @@ failed:
hash_lock->read_unlock();
if (--count)
continue;
- space->release();
- return 0;
+ goto fail;
}
const unsigned accessed= bpage->is_accessed();
@@ -702,7 +721,8 @@ failed:
if (space->is_stopping())
break;
dberr_t err;
- count+= buf_read_page_low(&err, false, ibuf_mode, new_low, zip_size,
+ space->reacquire_for_io();
+ count+= buf_read_page_low(&err, space, false, ibuf_mode, new_low, zip_size,
false);
}
@@ -710,7 +730,7 @@ failed:
DBUG_PRINT("ib_buf", ("random read-ahead %zu pages from %s: %u",
count, space->chain.start->name,
new_low.page_no()));
- space->release();
+ space->release_for_io();
/* Read ahead is considered one I/O operation for the purpose of
LRU policy decision. */
@@ -721,24 +741,19 @@ failed:
}
/** Issues read requests for pages which recovery wants to read in.
-@param[in] sync true if the caller wants this function to wait
-for the highest address page to get read in, before this function returns
@param[in] space_id tablespace id
@param[in] page_nos array of page numbers to read, with the
highest page number the last in the array
@param[in] n number of page numbers in the array */
-void buf_read_recv_pages(bool sync, ulint space_id, const uint32_t *page_nos,
- ulint n)
+void buf_read_recv_pages(ulint space_id, const uint32_t* page_nos, ulint n)
{
- fil_space_t* space = fil_space_get(space_id);
+ fil_space_t* space = fil_space_t::get_for_io(space_id);
- if (space == NULL) {
- /* The tablespace is missing: do nothing */
+ if (!space) {
+ /* The tablespace is missing or unreadable: do nothing */
return;
}
- fil_space_open_if_needed(space);
-
const ulint zip_size = space->zip_size();
for (ulint i = 0; i < n; i++) {
@@ -769,9 +784,10 @@ void buf_read_recv_pages(bool sync, ulint space_id, const uint32_t *page_nos,
}
dberr_t err;
- buf_read_page_low(
- &err, sync && i + 1 == n,
- BUF_READ_ANY_PAGE, cur_page_id, zip_size, true);
+ space->reacquire_for_io();
+ buf_read_page_low(&err, space, false,
+ BUF_READ_ANY_PAGE, cur_page_id, zip_size,
+ true);
if (err == DB_DECRYPTION_FAILED || err == DB_PAGE_CORRUPTED) {
ib::error() << "Recovery failed to read or decrypt "
@@ -779,5 +795,8 @@ void buf_read_recv_pages(bool sync, ulint space_id, const uint32_t *page_nos,
}
}
- DBUG_PRINT("ib_buf", ("recovery read-ahead (%u pages)", n));
+
+ DBUG_PRINT("ib_buf", ("recovery read (%u pages) for %s", n,
+ space->chain.start->name));
+ space->release_for_io();
}
diff --git a/storage/innobase/dict/dict0crea.cc b/storage/innobase/dict/dict0crea.cc
index 7a27160ccd5..fb3247ecdcf 100644
--- a/storage/innobase/dict/dict0crea.cc
+++ b/storage/innobase/dict/dict0crea.cc
@@ -951,7 +951,7 @@ void dict_drop_index_tree(btr_pcur_t* pcur, trx_t* trx, mtr_t* mtr)
if (fil_space_t* s = fil_space_acquire_silent(space_id)) {
/* Ensure that the tablespace file exists
in order to avoid a crash in buf_page_get_gen(). */
- if (s->size || fil_space_get_size(space_id)) {
+ if (root_page_no < s->get_size()) {
btr_free_if_exists(page_id_t(space_id, root_page_no),
s->zip_size(),
mach_read_from_8(ptr), mtr);
diff --git a/storage/innobase/dict/dict0load.cc b/storage/innobase/dict/dict0load.cc
index 489f4d491d1..753bcf74967 100644
--- a/storage/innobase/dict/dict0load.cc
+++ b/storage/innobase/dict/dict0load.cc
@@ -2975,15 +2975,15 @@ err_exit:
}
if (err == DB_SUCCESS && table->is_readable()) {
- if (table->space && !fil_space_get_size(table->space_id)) {
+ const auto root = dict_table_get_first_index(table)->page;
+
+ if (root >= table->space->get_size()) {
corrupted:
table->corrupted = true;
table->file_unreadable = true;
err = DB_CORRUPTION;
} else {
- const page_id_t page_id(
- table->space->id,
- dict_table_get_first_index(table)->page);
+ const page_id_t page_id(table->space->id, root);
mtr.start();
buf_block_t* block = buf_page_get(
page_id, table->space->zip_size(),
diff --git a/storage/innobase/fil/fil0crypt.cc b/storage/innobase/fil/fil0crypt.cc
index 5587355f23a..e73337a3bdd 100644
--- a/storage/innobase/fil/fil0crypt.cc
+++ b/storage/innobase/fil/fil0crypt.cc
@@ -975,8 +975,7 @@ static inline
void
fil_crypt_read_crypt_data(fil_space_t* space)
{
- if (space->crypt_data || space->size
- || !fil_space_get_size(space->id)) {
+ if (space->crypt_data || space->size || !space->get_size()) {
/* The encryption metadata has already been read, or
the tablespace is not encrypted and the file has been
opened already, or the file cannot be accessed,
@@ -2246,15 +2245,9 @@ static void fil_crypt_rotation_list_fill()
}
/* Ensure that crypt_data has been initialized. */
- if (!space->size) {
- ut_d(const fil_space_t* s=)
- fil_system.read_page0(space->id);
- ut_ad(!s || s == space);
- if (!space->size) {
- /* Page 0 was not loaded.
- Skip this tablespace. */
- goto next;
- }
+ if (!space->get_size()) {
+ /* Page 0 was not loaded. Skip this tablespace. */
+ goto next;
}
/* Skip ENCRYPTION!=DEFAULT tablespaces. */
diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc
index 2da60b079f7..ad9d2828467 100644
--- a/storage/innobase/fil/fil0fil.cc
+++ b/storage/innobase/fil/fil0fil.cc
@@ -49,25 +49,81 @@ Created 10/25/1995 Heikki Tuuri
#include "os0event.h"
#include "sync0sync.h"
#include "buf0flu.h"
-#include "os0api.h"
#ifdef UNIV_LINUX
# include <sys/types.h>
# include <sys/sysmacros.h>
# include <dirent.h>
#endif
-/** Tries to close a file in the LRU list. The caller must hold the fil_sys
-mutex.
-@return true if success, false if should retry later; since i/o's
-generally complete in < 100 ms, and as InnoDB writes at most 128 pages
-from the buffer pool in a batch, and then immediately flushes the
-files, there is a good chance that the next time we find a suitable
-node from the LRU list.
-@param[in] print_info if true, prints information why it
- cannot close a file */
-static
-bool
-fil_try_to_close_file_in_LRU(bool print_info);
+/** Determine if the space id is a user tablespace id or not.
+@param space_id tablespace identifier
+@return true if it is a user tablespace ID */
+inline bool fil_is_user_tablespace_id(ulint space_id)
+{
+ return space_id != TRX_SYS_SPACE && space_id != SRV_TMP_SPACE_ID &&
+ !srv_is_undo_tablespace(space_id);
+}
+
+/** Try to close a file.
+@return true if success, false if should retry later
+@param print_info if true, prints information why it cannot close a file */
+static bool fil_try_to_close_file(bool print_info)
+{
+ ut_ad(mutex_own(&fil_system.mutex));
+ for (fil_space_t *space= UT_LIST_GET_FIRST(fil_system.space_list); space;
+ space= UT_LIST_GET_NEXT(space_list, space))
+ {
+ switch (space->purpose) {
+ case FIL_TYPE_TEMPORARY:
+ continue;
+ case FIL_TYPE_IMPORT:
+ break;
+ case FIL_TYPE_TABLESPACE:
+ if (!fil_is_user_tablespace_id(space->id))
+ continue;
+ }
+
+ /* We are using an approximation of LRU replacement policy. In
+ fil_node_open_file_low(), newly opened files are moved to the end
+ of fil_system.space_list, so that they would be less likely to be
+ closed here. */
+ fil_node_t *node= UT_LIST_GET_FIRST(space->chain);
+ ut_ad(node);
+ ut_ad(!UT_LIST_GET_NEXT(chain, node));
+
+ if (!node->is_open())
+ continue;
+
+ if (auto n= space->set_closing())
+ {
+ if (print_info)
+ ib::info() << "Cannot close file " << node->name
+ << " because of " << n << " pending operations";
+ continue;
+ }
+
+ if (auto n= node->n_pending_flushes)
+ {
+ if (print_info)
+ ib::info() << "Cannot close file " << node->name
+ << ", because n_pending_flushes " << n;
+ continue;
+ }
+
+ if (node->needs_flush)
+ {
+ if (print_info)
+ ib::info() << "Cannot close file " << node->name
+ << ", because is should be flushed first";
+ continue;
+ }
+
+ node->close();
+ return true;
+ }
+
+ return false;
+}
/** Test if a tablespace file can be renamed to a new filepath by checking
if that the old filepath exists and the new filepath does not exist.
@@ -143,16 +199,7 @@ from a file, versus reading from a raw disk.
To have fast access to a tablespace or a log file, we put the data structures
to a hash table. Each tablespace and log file is given an unique 32-bit
-identifier.
-
-Some operating systems do not support many open files at the same time,
-though NT seems to tolerate at least 900 open files. Therefore, we put the
-open files in an LRU-list. If we need to open another file, we may close the
-file at the end of the LRU-list. When an i/o-operation is pending on a file,
-the file cannot be closed. We take the file nodes with pending i/o-operations
-out of the LRU-list and keep a count of pending operations. When an operation
-completes, we decrement the count and return the file node to the LRU-list if
-the count drops to zero. */
+identifier. */
/** Reference to the server data directory. Usually it is the
current working directory ".", but in the MySQL Embedded Server Library
@@ -172,18 +219,6 @@ fil_system_t fil_system;
/** At this age or older a space/page will be rotated */
UNIV_INTERN extern uint srv_fil_crypt_rotate_key_age;
-/** Determine if the space id is a user tablespace id or not.
-@param[in] space_id Space ID to check
-@return true if it is a user tablespace ID */
-inline
-bool
-fil_is_user_tablespace_id(ulint space_id)
-{
- return(space_id != TRX_SYS_SPACE
- && space_id != SRV_TMP_SPACE_ID
- && !srv_is_undo_tablespace(space_id));
-}
-
#ifdef UNIV_DEBUG
/** Try fil_validate() every this many times */
# define FIL_VALIDATE_SKIP 17
@@ -205,43 +240,6 @@ fil_validate_skip(void)
}
#endif /* UNIV_DEBUG */
-/********************************************************************//**
-Determines if a file node belongs to the least-recently-used list.
-@return true if the file belongs to fil_system.LRU mutex. */
-UNIV_INLINE
-bool
-fil_space_belongs_in_lru(
-/*=====================*/
- const fil_space_t* space) /*!< in: file space */
-{
- switch (space->purpose) {
- case FIL_TYPE_TEMPORARY:
- return(false);
- case FIL_TYPE_TABLESPACE:
- return(fil_is_user_tablespace_id(space->id));
- case FIL_TYPE_IMPORT:
- return(true);
- }
-
- ut_ad(0);
- return(false);
-}
-
-/********************************************************************//**
-NOTE: you must call fil_mutex_enter_and_prepare_for_io() first!
-
-Prepares a file node for i/o. Opens the file if it is closed. Updates the
-pending i/o's field in the node and the system appropriately. Takes the node
-off the LRU list if it is in the LRU list. The caller must hold the fil_sys
-mutex.
-@return false if the file can't be opened, otherwise true */
-static
-bool
-fil_node_prepare_for_io(
-/*====================*/
- fil_node_t* node, /*!< in: file node */
- fil_space_t* space); /*!< in: space */
-
/*******************************************************************//**
Returns the table space by a given id, NULL if not found.
It is unsafe to dereference the returned pointer. It is fine to check
@@ -351,7 +349,7 @@ static bool fil_comp_algo_validate(const fil_space_t* space)
@param[in] is_raw whether this is a raw device
@param[in] atomic_write true if atomic write could be enabled
@param[in] max_pages maximum number of pages in file,
-or ULINT_MAX for unlimited
+or UINT32_MAX for unlimited
@return file object */
fil_node_t* fil_space_t::add(const char* name, pfs_os_file_t handle,
uint32_t size, bool is_raw, bool atomic_write,
@@ -387,114 +385,108 @@ fil_node_t* fil_space_t::add(const char* name, pfs_os_file_t handle,
this->size += size;
UT_LIST_ADD_LAST(chain, node);
if (node->is_open()) {
- fil_system.n_open++;
+ ++fil_system.n_open;
}
mutex_exit(&fil_system.mutex);
return node;
}
-/** Open a file node of a tablespace.
-@param[in,out] node File node
-@return false if the file can't be opened, otherwise true */
-static bool fil_node_open_file(fil_node_t* node)
+/** Open a tablespace file.
+@param node data file
+@return whether the file was successfully opened */
+static bool fil_node_open_file_low(fil_node_t *node)
{
- bool success;
- bool read_only_mode;
- fil_space_t* space = node->space;
-
- ut_ad(mutex_own(&fil_system.mutex));
- ut_a(node->n_pending == 0);
- ut_a(!node->is_open());
-
- read_only_mode = space->purpose != FIL_TYPE_TEMPORARY
- && srv_read_only_mode;
-
- const bool first_time_open = node->size == 0;
-
- bool o_direct_possible = !FSP_FLAGS_HAS_PAGE_COMPRESSION(space->flags);
- if (const ulint ssize = FSP_FLAGS_GET_ZIP_SSIZE(space->flags)) {
- compile_time_assert(((UNIV_ZIP_SIZE_MIN >> 1) << 3) == 4096);
- if (ssize < 3) {
- o_direct_possible = false;
- }
- }
-
- if (first_time_open
- || (space->purpose == FIL_TYPE_TABLESPACE
- && node == UT_LIST_GET_FIRST(space->chain)
- && srv_startup_is_before_trx_rollback_phase)) {
- /* We do not know the size of the file yet. First we
- open the file in the normal mode, no async I/O here,
- for simplicity. Then do some checks, and close the
- file again. NOTE that we could not use the simple
- file read function os_file_read() in Windows to read
- from a file opened for async I/O! */
-
-retry:
- node->handle = os_file_create(
- innodb_data_file_key, node->name,
- node->is_raw_disk
- ? OS_FILE_OPEN_RAW | OS_FILE_ON_ERROR_NO_EXIT
- : OS_FILE_OPEN | OS_FILE_ON_ERROR_NO_EXIT,
- OS_FILE_AIO,
- o_direct_possible
- ? OS_DATA_FILE
- : OS_DATA_FILE_NO_O_DIRECT,
- read_only_mode,
- &success);
-
- if (!success) {
- /* The following call prints an error message */
- ulint err = os_file_get_last_error(true);
- if (err == EMFILE + 100) {
- if (fil_try_to_close_file_in_LRU(true))
- goto retry;
- }
-
- ib::warn() << "Cannot open '" << node->name << "'."
- " Have you deleted .ibd files under a"
- " running mysqld server?";
- return(false);
- }
-
- if (!node->read_page0(first_time_open)) {
-fail:
- os_file_close(node->handle);
- node->handle = OS_FILE_CLOSED;
- return false;
- }
+ ut_ad(!node->is_open());
+ ut_ad(node->space->is_closing());
+ ut_ad(mutex_own(&fil_system.mutex));
+ const auto flags= node->space->flags;
+ bool o_direct_possible= !FSP_FLAGS_HAS_PAGE_COMPRESSION(flags);
+ static_assert(((UNIV_ZIP_SIZE_MIN >> 1) << 3) == 4096, "compatibility");
+ if (const auto ssize= FSP_FLAGS_GET_ZIP_SSIZE(flags))
+ if (ssize < 3)
+ o_direct_possible= false;
+
+ for (;;)
+ {
+ bool success;
+ node->handle= os_file_create(innodb_data_file_key, node->name,
+ node->is_raw_disk
+ ? OS_FILE_OPEN_RAW | OS_FILE_ON_ERROR_NO_EXIT
+ : OS_FILE_OPEN | OS_FILE_ON_ERROR_NO_EXIT,
+ OS_FILE_AIO, o_direct_possible
+ ? OS_DATA_FILE : OS_DATA_FILE_NO_O_DIRECT,
+ srv_read_only_mode, &success);
+ if (success)
+ break;
+
+ /* The following call prints an error message */
+ if (os_file_get_last_error(true) == EMFILE + 100 &&
+ fil_try_to_close_file(true))
+ continue;
- if (first_time_open && !fil_comp_algo_validate(space)) {
- goto fail;
- }
+ ib::warn() << "Cannot open '" << node->name << "'.";
+ return false;
+ }
- } else {
- node->handle = os_file_create(
- innodb_data_file_key, node->name,
- node->is_raw_disk
- ? OS_FILE_OPEN_RAW | OS_FILE_ON_ERROR_NO_EXIT
- : OS_FILE_OPEN | OS_FILE_ON_ERROR_NO_EXIT,
- OS_FILE_AIO,
- o_direct_possible
- ? OS_DATA_FILE
- : OS_DATA_FILE_NO_O_DIRECT,
- read_only_mode,
- &success);
- }
+ if (node->size);
+ else if (!node->read_page0() || !fil_comp_algo_validate(node->space))
+ {
+ os_file_close(node->handle);
+ node->handle= OS_FILE_CLOSED;
+ return false;
+ }
- ut_a(success);
- ut_a(node->is_open());
+ ut_ad(node->is_open());
- fil_system.n_open++;
+ if (UNIV_LIKELY(!fil_system.freeze_space_list))
+ {
+ /* Move the file last in fil_system.space_list, so that
+ fil_try_to_close_file() should close it as a last resort. */
+ UT_LIST_REMOVE(fil_system.space_list, node->space);
+ UT_LIST_ADD_LAST(fil_system.space_list, node->space);
+ }
- if (fil_space_belongs_in_lru(space)) {
+ fil_system.n_open++;
+ return true;
+}
- /* Put the node to the LRU list */
- UT_LIST_ADD_FIRST(fil_system.LRU, node);
- }
+/** Open a tablespace file.
+@param node data file
+@return whether the file was successfully opened */
+static bool fil_node_open_file(fil_node_t *node)
+{
+ ut_ad(mutex_own(&fil_system.mutex));
+ ut_ad(!node->is_open());
+ ut_ad(fil_is_user_tablespace_id(node->space->id) ||
+ srv_operation == SRV_OPERATION_BACKUP ||
+ srv_operation == SRV_OPERATION_RESTORE ||
+ srv_operation == SRV_OPERATION_RESTORE_DELTA);
+ ut_ad(node->space->purpose != FIL_TYPE_TEMPORARY);
+ ut_ad(node->space->pending_io());
+
+ for (ulint count= 0; fil_system.n_open >= srv_max_n_open_files; count++)
+ {
+ if (fil_try_to_close_file(count > 1))
+ count= 0;
+ else if (count >= 2)
+ {
+ ib::warn() << "innodb_open_files=" << srv_max_n_open_files
+ << " is exceeded (" << fil_system.n_open
+ << ") files stay open)";
+ break;
+ }
+ else
+ {
+ mutex_exit(&fil_system.mutex);
+ os_thread_sleep(20000);
+ /* Flush tablespaces so that we can close modified files. */
+ fil_flush_file_spaces();
+ mutex_enter(&fil_system.mutex);
+ }
+ }
- return(true);
+ return fil_node_open_file_low(node);
}
/** Close the file handle. */
@@ -520,8 +512,9 @@ pfs_os_file_t fil_node_t::detach()
void fil_node_t::prepare_to_close_or_detach()
{
ut_ad(mutex_own(&fil_system.mutex));
+ ut_ad(space->is_closing());
+ ut_ad(!space->pending_io());
ut_a(is_open());
- ut_a(n_pending == 0);
ut_a(n_pending_flushes == 0);
ut_a(!being_extended);
ut_a(!needs_flush || space->purpose == FIL_TYPE_TEMPORARY ||
@@ -529,78 +522,13 @@ void fil_node_t::prepare_to_close_or_detach()
ut_a(fil_system.n_open > 0);
fil_system.n_open--;
-
- if (fil_space_belongs_in_lru(space))
- {
- ut_a(UT_LIST_GET_LEN(fil_system.LRU) > 0);
- UT_LIST_REMOVE(fil_system.LRU, this);
- }
-}
-
-/** Tries to close a file in the LRU list. The caller must hold the fil_sys
-mutex.
-@return true if success, false if should retry later; since i/o's
-generally complete in < 100 ms, and as InnoDB writes at most 128 pages
-from the buffer pool in a batch, and then immediately flushes the
-files, there is a good chance that the next time we find a suitable
-node from the LRU list.
-@param[in] print_info if true, prints information why it
- cannot close a file*/
-static
-bool
-fil_try_to_close_file_in_LRU(
-
- bool print_info)
-{
- fil_node_t* node;
-
- ut_ad(mutex_own(&fil_system.mutex));
-
- if (print_info) {
- ib::info() << "fil_sys open file LRU len "
- << UT_LIST_GET_LEN(fil_system.LRU);
- }
-
- for (node = UT_LIST_GET_LAST(fil_system.LRU);
- node != NULL;
- node = UT_LIST_GET_PREV(LRU, node)) {
-
- if (!node->needs_flush
- && node->n_pending_flushes == 0
- && !node->being_extended) {
-
- node->close();
-
- return(true);
- }
-
- if (!print_info) {
- continue;
- }
-
- if (const auto n = node->n_pending_flushes) {
- ib::info() << "Cannot close file " << node->name
- << ", because n_pending_flushes " << n;
- }
-
- if (node->needs_flush) {
- ib::warn() << "Cannot close file " << node->name
- << ", because is should be flushed first";
- }
-
- if (node->being_extended) {
- ib::info() << "Cannot close file " << node->name
- << ", because it is being extended";
- }
- }
-
- return(false);
}
/** Flush any writes cached by the file system.
@param[in,out] space tablespace
-@param[in] metadata whether to update file system metadata */
-static void fil_flush_low(fil_space_t* space, bool metadata = false)
+@param[in] metadata whether to update file system metadata
+@return whether fil_system.mutex was released and reacquired */
+static bool fil_flush_low(fil_space_t* space, bool metadata = false)
{
ut_ad(mutex_own(&fil_system.mutex));
ut_ad(!space->is_stopping());
@@ -621,9 +549,10 @@ static void fil_flush_low(fil_space_t* space, bool metadata = false)
}
#endif /* UNIV_DEBUG */
- if (!metadata) return;
+ if (!metadata) return false;
}
+ bool reacquired = false;
/* Prevent dropping of the space while we are flushing */
space->n_pending_flushes++;
@@ -653,6 +582,7 @@ static void fil_flush_low(fil_space_t* space, bool metadata = false)
mutex_exit(&fil_system.mutex);
os_file_flush(node->handle);
+ reacquired = true;
mutex_enter(&fil_system.mutex);
@@ -673,6 +603,7 @@ skip_flush:
}
space->n_pending_flushes--;
+ return reacquired;
}
/** Try to extend a tablespace.
@@ -692,6 +623,8 @@ fil_space_extend_must_retry(
ut_ad(mutex_own(&fil_system.mutex));
ut_ad(UT_LIST_GET_LAST(space->chain) == node);
ut_ad(size >= FIL_IBD_FILE_INITIAL_SIZE);
+ ut_ad(node->space == space);
+ ut_ad(space->pending_io());
*success = space->size >= size;
@@ -712,12 +645,6 @@ fil_space_extend_must_retry(
node->being_extended = true;
- if (!fil_node_prepare_for_io(node, space)) {
- /* The tablespace data file, such as .ibd file, is missing */
- node->being_extended = false;
- return(false);
- }
-
/* At this point it is safe to release fil_system.mutex. No
other thread can rename, delete, close or extend the file because
we have set the node->being_extended flag. */
@@ -765,8 +692,6 @@ fil_space_extend_must_retry(
const uint32_t pages_in_MiB = node->size
& ~uint32_t((1U << (20U - srv_page_size_shift)) - 1);
- node->complete_io();
-
/* Keep the last data file size info up to date, rounded to
full megabytes */
@@ -790,105 +715,56 @@ fil_space_extend_must_retry(
}
}
-/** Acquire fil_system.mutex and try to make sure we can open at least one
-file while holding it. This should be called before calling
-fil_node_prepare_for_io(), because that function may need to open a file. */
-static
-fil_space_t*
-fil_mutex_enter_and_prepare_for_io(
- ulint space_id) /*!< in: space id */
+/** @return whether the file is usable for io() */
+ATTRIBUTE_COLD bool fil_space_t::prepare_for_io()
{
- for (ulint count = 0;;) {
- mutex_enter(&fil_system.mutex);
-
- fil_space_t* space = fil_space_get_by_id(space_id);
+ ut_ad(pending_io());
+ mutex_enter(&fil_system.mutex);
+ fil_node_t *node= UT_LIST_GET_LAST(chain);
+ ut_ad(!id || purpose == FIL_TYPE_TEMPORARY ||
+ node == UT_LIST_GET_FIRST(chain));
- if (!space) {
- return nullptr;
- }
+ const bool is_open= node && (node->is_open() || fil_node_open_file(node));
- fil_node_t* node = UT_LIST_GET_LAST(space->chain);
- ut_ad(space->id == 0
- || node == UT_LIST_GET_FIRST(space->chain));
-
- if (space->id == 0) {
- /* We keep the system tablespace files always
- open; this is important in preventing
- deadlocks in this module, as a page read
- completion often performs another read from
- the insert buffer. The insert buffer is in
- tablespace 0, and we cannot end up waiting in
- this function. */
- } else if (!node || node->is_open()) {
- /* If the file is already open, no need to do
- anything; if the space does not exist, we handle the
- situation in the function which called this
- function */
- } else {
- while (fil_system.n_open >= srv_max_n_open_files) {
- /* Too many files are open */
- if (fil_try_to_close_file_in_LRU(count > 1)) {
- /* No problem */
- } else if (count >= 2) {
- ib::warn() << "innodb_open_files="
- << srv_max_n_open_files
- << " is exceeded ("
- << fil_system.n_open
- << ") files stay open)";
- break;
- } else {
- mutex_exit(&fil_system.mutex);
- os_thread_sleep(20000);
- /* Flush tablespaces so that we can
- close modified files in the LRU list */
- fil_flush_file_spaces();
-
- count++;
- mutex_enter(&fil_system.mutex);
- continue;
- }
- }
- }
-
- uint32_t size = space->recv_size;
- if (UNIV_UNLIKELY(size != 0)) {
- ut_ad(node);
- bool success;
- if (fil_space_extend_must_retry(space, node, size,
- &success)) {
- continue;
- }
+ if (!is_open)
+ release_for_io();
+ else if (auto desired_size= recv_size)
+ {
+ bool success;
+ while (fil_space_extend_must_retry(this, node, desired_size, &success))
+ mutex_enter(&fil_system.mutex);
- ut_ad(mutex_own(&fil_system.mutex));
- /* Crash recovery requires the file extension
- to succeed. */
- ut_a(success);
- /* InnoDB data files cannot shrink. */
- ut_a(space->size >= size);
- if (size > space->committed_size) {
- space->committed_size = size;
- }
+ ut_ad(mutex_own(&fil_system.mutex));
+ /* Crash recovery requires the file extension to succeed. */
+ ut_a(success);
+ /* InnoDB data files cannot shrink. */
+ ut_a(size >= desired_size);
+ if (desired_size > committed_size)
+ committed_size= desired_size;
- /* There could be multiple concurrent I/O requests for
- this tablespace (multiple threads trying to extend
- this tablespace).
+ /* There could be multiple concurrent I/O requests for this
+ tablespace (multiple threads trying to extend this tablespace).
- Also, fil_space_set_recv_size_and_flags() may have been
- invoked again during the file extension while
- fil_system.mutex was not being held by us.
+ Also, fil_space_set_recv_size_and_flags() may have been invoked
+ again during the file extension while fil_system.mutex was not
+ being held by us.
- Only if space->recv_size matches what we read
- originally, reset the field. In this way, a
- subsequent I/O request will handle any pending
- fil_space_set_recv_size_and_flags(). */
+ Only if recv_size matches what we read originally, reset the
+ field. In this way, a subsequent I/O request will handle any
+ pending fil_space_set_recv_size_and_flags(). */
- if (size == space->recv_size) {
- space->recv_size = 0;
- }
- }
+ if (desired_size == recv_size)
+ {
+ recv_size= 0;
+ goto clear;
+ }
+ }
+ else
+clear:
+ n_pending_ios.fetch_and(NOT_CLOSING);
- return space;
- }
+ mutex_exit(&fil_system.mutex);
+ return is_open;
}
/** Try to extend a tablespace if it is smaller than the specified size.
@@ -897,18 +773,20 @@ fil_mutex_enter_and_prepare_for_io(
@return whether the tablespace is at least as big as requested */
bool fil_space_extend(fil_space_t *space, uint32_t size)
{
- ut_ad(!srv_read_only_mode || space->purpose == FIL_TYPE_TEMPORARY);
+ ut_ad(!srv_read_only_mode || space->purpose == FIL_TYPE_TEMPORARY);
+ if (!space->acquire_for_io())
+ return false;
- bool success;
+ bool success;
- do {
- fil_mutex_enter_and_prepare_for_io(space->id);
- } while (fil_space_extend_must_retry(
- space, UT_LIST_GET_LAST(space->chain), size,
- &success));
+ do
+ mutex_enter(&fil_system.mutex);
+ while (fil_space_extend_must_retry(space, UT_LIST_GET_LAST(space->chain),
+ size, &success));
- mutex_exit(&fil_system.mutex);
- return(success);
+ mutex_exit(&fil_system.mutex);
+ space->release_for_io();
+ return success;
}
/** Prepare to free a file from fil_system. */
@@ -927,7 +805,7 @@ pfs_os_file_t fil_node_t::close_to_free(bool detach_handle)
fil_system.unflushed_spaces.remove(*space);
}
- if (n_pending || n_pending_flushes)
+ if (n_pending_flushes || space->set_closing())
{
mutex_exit(&fil_system.mutex);
os_thread_sleep(100);
@@ -935,11 +813,6 @@ pfs_os_file_t fil_node_t::close_to_free(bool detach_handle)
continue;
}
- if (fil_space_belongs_in_lru(space))
- {
- ut_ad(UT_LIST_GET_LEN(fil_system.LRU) > 0);
- UT_LIST_REMOVE(fil_system.LRU, this);
- }
ut_a(!being_extended);
if (detach_handle)
{
@@ -1020,7 +893,7 @@ fil_space_free_low(
/* Wait for fil_space_t::release_for_io(); after
fil_system_t::detach(), the tablespace cannot be found, so
- fil_space_acquire_for_io() would return NULL */
+ fil_space_t::get_for_io() would return NULL */
while (space->pending_io()) {
os_thread_sleep(100);
}
@@ -1092,24 +965,19 @@ fil_space_free(
return(space != NULL);
}
-/** Create a space memory object and put it to the fil_system hash table.
-Error messages are issued to the server log.
-@param[in] name tablespace name
-@param[in] id tablespace identifier
-@param[in] flags tablespace flags
-@param[in] purpose tablespace purpose
-@param[in,out] crypt_data encryption information
-@param[in] mode encryption mode
-@return pointer to created tablespace, to be filled in with fil_space_t::add()
-@retval NULL on failure (such as when the same tablespace exists) */
-fil_space_t*
-fil_space_create(
- const char* name,
- ulint id,
- ulint flags,
- fil_type_t purpose,
- fil_space_crypt_t* crypt_data,
- fil_encryption_t mode)
+/** Create a tablespace in fil_system.
+@param name tablespace name
+@param id tablespace identifier
+@param flags tablespace flags
+@param purpose tablespace purpose
+@param crypt_data encryption information
+@param mode encryption mode
+@return pointer to created tablespace, to be filled in with add()
+@retval nullptr on failure (such as when the same tablespace exists) */
+fil_space_t *fil_space_t::create(const char *name, ulint id, ulint flags,
+ fil_type_t purpose,
+ fil_space_crypt_t *crypt_data,
+ fil_encryption_t mode)
{
fil_space_t* space;
@@ -1119,19 +987,6 @@ fil_space_create(
DBUG_EXECUTE_IF("fil_space_create_failure", return(NULL););
- mutex_enter(&fil_system.mutex);
-
- space = fil_space_get_by_id(id);
-
- if (space != NULL) {
- ib::error() << "Trying to add tablespace '" << name
- << "' with id " << id
- << " to the tablespace memory cache, but tablespace '"
- << space->name << "' already exists in the cache!";
- mutex_exit(&fil_system.mutex);
- return(NULL);
- }
-
/* FIXME: if calloc() is defined as an inline function that calls
memset() or bzero(), then GCC 6 -flifetime-dse can optimize it away */
space= new (ut_zalloc_nokey(sizeof(*space))) fil_space_t;
@@ -1141,24 +996,12 @@ fil_space_create(
UT_LIST_INIT(space->chain, &fil_node_t::chain);
- if ((purpose == FIL_TYPE_TABLESPACE || purpose == FIL_TYPE_IMPORT)
- && id > fil_system.max_assigned_id) {
- if (!fil_system.space_id_reuse_warned) {
- fil_system.space_id_reuse_warned = true;
-
- ib::warn() << "Allocated tablespace ID " << id
- << " for " << name << ", old maximum was "
- << fil_system.max_assigned_id;
- }
-
- fil_system.max_assigned_id = id;
- }
-
space->purpose = purpose;
space->flags = flags;
space->magic_n = FIL_SPACE_MAGIC_N;
space->crypt_data = crypt_data;
+ space->n_pending_ios.store(CLOSING, std::memory_order_relaxed);
DBUG_LOG("tablespace",
"Created metadata for " << id << " name " << name);
@@ -1183,6 +1026,34 @@ fil_space_create(
space->atomic_write_supported = true;
}
+ mutex_enter(&fil_system.mutex);
+
+ if (const fil_space_t *old_space = fil_space_get_by_id(id)) {
+ ib::error() << "Trying to add tablespace '" << name
+ << "' with id " << id
+ << " to the tablespace memory cache, but tablespace '"
+ << old_space->name << "' already exists in the cache!";
+ mutex_exit(&fil_system.mutex);
+ rw_lock_free(&space->latch);
+ space->~fil_space_t();
+ ut_free(space->name);
+ ut_free(space);
+ return(NULL);
+ }
+
+ if ((purpose == FIL_TYPE_TABLESPACE || purpose == FIL_TYPE_IMPORT)
+ && id > fil_system.max_assigned_id) {
+ if (!fil_system.space_id_reuse_warned) {
+ fil_system.space_id_reuse_warned = true;
+
+ ib::warn() << "Allocated tablespace ID " << id
+ << " for " << name << ", old maximum was "
+ << fil_system.max_assigned_id;
+ }
+
+ fil_system.max_assigned_id = id;
+ }
+
HASH_INSERT(fil_space_t, hash, &fil_system.spaces, id, space);
UT_LIST_ADD_LAST(fil_system.space_list, space);
@@ -1192,6 +1063,17 @@ fil_space_create(
fil_system.max_assigned_id = id;
}
+ switch (id) {
+ case 0:
+ ut_ad(!fil_system.sys_space);
+ fil_system.sys_space = space;
+ break;
+ case SRV_TMP_SPACE_ID:
+ ut_ad(!fil_system.temp_space);
+ fil_system.temp_space = space;
+ break;
+ }
+
/* Inform key rotation that there could be something
to do */
if (purpose == FIL_TYPE_TABLESPACE
@@ -1261,62 +1143,33 @@ fil_assign_new_space_id(
return(success);
}
-/** Trigger a call to fil_node_t::read_page0()
-@param[in] id tablespace identifier
-@return tablespace
-@retval NULL if the tablespace does not exist or cannot be read */
-fil_space_t* fil_system_t::read_page0(ulint id)
+/** Read the first page of a data file.
+@return whether the page was found valid */
+bool fil_space_t::read_page0()
{
- mutex_exit(&mutex);
-
- ut_ad(id != 0);
-
- /* It is possible that the tablespace is dropped while we are
- not holding the mutex. */
- fil_space_t* space = fil_mutex_enter_and_prepare_for_io(id);
-
- if (space == NULL || UT_LIST_GET_LEN(space->chain) == 0) {
- return(NULL);
- }
-
- /* The following code must change when InnoDB supports
- multiple datafiles per tablespace. */
- ut_a(1 == UT_LIST_GET_LEN(space->chain));
-
- fil_node_t* node = UT_LIST_GET_FIRST(space->chain);
-
- /* It must be a single-table tablespace and we have not opened
- the file yet; the following calls will open it and update the
- size fields */
-
- if (!fil_node_prepare_for_io(node, space)) {
- /* The single-table tablespace can't be opened,
- because the ibd file is missing. */
- return(NULL);
- }
+ ut_ad(fil_system.is_initialised());
+ ut_ad(mutex_own(&fil_system.mutex));
+ if (size)
+ return true;
- node->complete_io();
+ fil_node_t *node= UT_LIST_GET_FIRST(chain);
+ if (!node)
+ return false;
+ ut_ad(!UT_LIST_GET_NEXT(chain, node));
- return space;
+ n_pending_ios.fetch_add(1, std::memory_order_acquire);
+ const bool ok= node->is_open() || fil_node_open_file(node);
+ release_for_io();
+ return ok;
}
-/*******************************************************************//**
-Returns a pointer to the fil_space_t that is in the memory cache
-associated with a space id. The caller must lock fil_system.mutex.
-@return file_space_t pointer, NULL if space not found */
-UNIV_INLINE
-fil_space_t*
-fil_space_get_space(
-/*================*/
- ulint id) /*!< in: space id */
+/** Look up a tablespace and ensure that its first page has been validated. */
+static fil_space_t *fil_space_get_space(ulint id)
{
- fil_space_t* space = fil_space_get_by_id(id);
- if (space == NULL || space->size != 0) {
- return(space);
- }
-
- space = fil_system.read_page0(id);
- return(space);
+ if (fil_space_t *space= fil_space_get_by_id(id))
+ if (space->read_page0())
+ return space;
+ return nullptr;
}
void fil_space_set_recv_size_and_flags(ulint id, uint32_t size, uint32_t flags)
@@ -1333,53 +1186,52 @@ void fil_space_set_recv_size_and_flags(ulint id, uint32_t size, uint32_t flags)
mutex_exit(&fil_system.mutex);
}
-/*******************************************************************//**
-Returns the size of the space in pages. The tablespace must be cached in the
-memory cache.
-@return space size, 0 if space not found */
-ulint
-fil_space_get_size(
-/*===============*/
- ulint id) /*!< in: space id */
+/** Open each file. Never invoked on .ibd files.
+@param create_new_db whether to skip the call to fil_node_t::read_page0()
+@return whether all files were opened */
+bool fil_space_t::open(bool create_new_db)
{
- fil_space_t* space;
- ulint size;
+ ut_ad(fil_system.is_initialised());
+ ut_ad(!id || create_new_db);
- ut_ad(fil_system.is_initialised());
- mutex_enter(&fil_system.mutex);
-
- space = fil_space_get_space(id);
-
- size = space ? space->size : 0;
+ bool success= true;
+ bool skip_read= create_new_db;
- mutex_exit(&fil_system.mutex);
+ mutex_enter(&fil_system.mutex);
- return(size);
-}
+ for (fil_node_t *node= UT_LIST_GET_FIRST(chain); node;
+ node= UT_LIST_GET_NEXT(chain, node))
+ {
+ if (!node->is_open() && !fil_node_open_file_low(node))
+ {
+err_exit:
+ success= false;
+ break;
+ }
-/** Open each file. Only invoked on fil_system.temp_space.
-@return whether all files were opened */
-bool fil_space_t::open()
-{
- ut_ad(fil_system.is_initialised());
+ if (create_new_db)
+ continue;
+ if (skip_read)
+ {
+ size+= node->size;
+ continue;
+ }
- mutex_enter(&fil_system.mutex);
- ut_ad(this == fil_system.temp_space
- || srv_operation == SRV_OPERATION_BACKUP
- || srv_operation == SRV_OPERATION_RESTORE
- || srv_operation == SRV_OPERATION_RESTORE_DELTA);
+ if (!node->read_page0())
+ {
+ fil_system.n_open--;
+ os_file_close(node->handle);
+ node->handle= OS_FILE_CLOSED;
+ goto err_exit;
+ }
- for (fil_node_t* node = UT_LIST_GET_FIRST(chain);
- node != NULL;
- node = UT_LIST_GET_NEXT(chain, node)) {
- if (!node->is_open() && !fil_node_open_file(node)) {
- mutex_exit(&fil_system.mutex);
- return false;
- }
- }
+ skip_read= true;
+ }
- mutex_exit(&fil_system.mutex);
- return true;
+ if (!create_new_db)
+ committed_size= size;
+ mutex_exit(&fil_system.mutex);
+ return success;
}
/** Close each file. Only invoked on fil_system.temp_space. */
@@ -1491,7 +1343,6 @@ void fil_system_t::create(ulint hash_size)
void fil_system_t::close()
{
ut_ad(this == &fil_system);
- ut_a(!UT_LIST_GET_LEN(LRU));
ut_a(unflushed_spaces.empty());
ut_a(!UT_LIST_GET_LEN(space_list));
ut_ad(!sys_space);
@@ -1513,67 +1364,6 @@ void fil_system_t::close()
#endif /* UNIV_LINUX */
}
-/** Opens all system tablespace data files. They stay open until the
-database server shutdown. This should be called at a server startup after the
-space objects for the system tablespace have been created. The
-purpose of this operation is to make sure we never run out of file descriptors
-if we need to read from the insert buffer. */
-void
-fil_open_system_tablespace_files()
-{
- fil_space_t* space;
-
- mutex_enter(&fil_system.mutex);
-
- for (space = UT_LIST_GET_FIRST(fil_system.space_list);
- space != NULL;
- space = UT_LIST_GET_NEXT(space_list, space)) {
-
- fil_node_t* node;
-
- if (fil_space_belongs_in_lru(space)) {
-
- continue;
- }
-
- for (node = UT_LIST_GET_FIRST(space->chain);
- node != NULL;
- node = UT_LIST_GET_NEXT(chain, node)) {
-
- if (!node->is_open()) {
- if (!fil_node_open_file(node)) {
- /* This func is called during server's
- startup. If some file of log or system
- tablespace is missing, the server
- can't start successfully. So we should
- assert for it. */
- ut_a(0);
- }
- }
-
- if (srv_max_n_open_files < 10 + fil_system.n_open) {
-
- ib::warn() << "You must raise the value of"
- " innodb_open_files in my.cnf!"
- " Remember that InnoDB keeps all"
- " log files and all system"
- " tablespace files open"
- " for the whole time mysqld is"
- " running, and needs to open also"
- " some .ibd files if the"
- " file-per-table storage model is used."
- " Current open files "
- << fil_system.n_open
- << ", max allowed open files "
- << srv_max_n_open_files
- << ".";
- }
- }
- }
-
- mutex_exit(&fil_system.mutex);
-}
-
/** Close all tablespace files at shutdown */
void fil_close_all_files()
{
@@ -1605,21 +1395,21 @@ next:
}
for (ulint count = 10000; count--; ) {
+ if (!space->set_closing()
+ && !node->n_pending_flushes) {
+ node->close();
+ goto next;
+ }
mutex_exit(&fil_system.mutex);
os_thread_sleep(100);
mutex_enter(&fil_system.mutex);
if (!node->is_open()) {
goto next;
}
- if (!node->n_pending
- && !node->n_pending_flushes) {
- node->close();
- goto next;
- }
}
ib::error() << "File '" << node->name
- << "' has " << node->n_pending
+ << "' has " << space->pending_io()
<< " operations and "
<< node->n_pending_flushes
<< " flushes";
@@ -1670,16 +1460,18 @@ fil_write_flushed_lsn(
byte* buf;
ut_ad(!srv_read_only_mode);
- buf = static_cast<byte*>(aligned_malloc(srv_page_size, srv_page_size));
+ if (!fil_system.sys_space->acquire_for_io()) {
+ return DB_ERROR;
+ }
- const page_id_t page_id(TRX_SYS_SPACE, 0);
+ buf = static_cast<byte*>(aligned_malloc(srv_page_size, srv_page_size));
- fil_io_t fio = fil_io(IORequestRead, true, page_id, 0, 0,
- srv_page_size, buf, NULL);
+ auto fio = fil_system.sys_space->io(IORequestRead, 0, srv_page_size,
+ buf);
if (fio.err == DB_SUCCESS) {
- fio.node->space->release_for_io();
- mach_write_to_8(buf + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, lsn);
+ mach_write_to_8(buf + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION,
+ lsn);
ulint fsp_flags = mach_read_from_4(
buf + FSP_HEADER_OFFSET + FSP_SPACE_FLAGS);
@@ -1688,13 +1480,11 @@ fil_write_flushed_lsn(
buf_flush_assign_full_crc32_checksum(buf);
}
- fio = fil_io(IORequestWrite, true, page_id, 0, 0,
- srv_page_size, buf, NULL);
+ fio = fil_system.sys_space->io(IORequestWrite,
+ 0, srv_page_size, buf);
fil_flush_file_spaces();
- }
-
- if (fio.node) {
- fio.node->space->release_for_io();
+ } else {
+ fil_system.sys_space->release_for_io();
}
aligned_free(buf);
@@ -1735,20 +1525,25 @@ when it could be dropped concurrently.
@param[in] id tablespace ID
@return the tablespace
@retval NULL if missing */
-fil_space_t*
-fil_space_acquire_for_io(ulint id)
+fil_space_t *fil_space_t::get_for_io(ulint id)
{
- mutex_enter(&fil_system.mutex);
+ mutex_enter(&fil_system.mutex);
- fil_space_t* space = fil_space_get_by_id(id);
+ fil_space_t *space= fil_space_get_by_id(id);
- if (space) {
- space->acquire_for_io();
- }
+ uint32_t f= space
+ ? space->n_pending_ios.fetch_add(1, std::memory_order_relaxed)
+ : 0;
- mutex_exit(&fil_system.mutex);
+ mutex_exit(&fil_system.mutex);
- return(space);
+ if ((f & CLOSING) && !space->prepare_for_io())
+ {
+ // FIXME: issue an error message!
+ space= nullptr;
+ }
+
+ return space;
}
/** Write a log record about a file operation.
@@ -1986,12 +1781,12 @@ fil_check_pending_io(
/* The following code must change when InnoDB supports
multiple datafiles per tablespace. */
- ut_a(UT_LIST_GET_LEN(space->chain) == 1);
+ ut_ad(UT_LIST_GET_LEN(space->chain) == 1);
*node = UT_LIST_GET_FIRST(space->chain);
const auto f = space->n_pending_flushes;
- const auto p = (*node)->n_pending;
+ const auto p = space->pending_io();
if (f || p) {
ut_a(!(*node)->being_extended);
@@ -2105,15 +1900,14 @@ void fil_close_tablespace(ulint id)
rw_lock_x_lock(&space->latch);
/* Invalidate in the buffer pool all pages belonging to the
- tablespace. Since we have set space->stop_new_ops = true, readahead
+ tablespace. Since we have invoked space->set_stopping(), readahead
can no longer read more pages of this tablespace to buf_pool.
Thus we can clean the tablespace out of buf_pool
- completely and permanently. The flag stop_new_ops also prevents
- fil_flush() from being applied to this tablespace. */
+ completely and permanently. */
while (buf_flush_dirty_pages(id));
/* Ensure that all asynchronous IO is completed. */
os_aio_wait_until_no_pending_writes();
- fil_flush(id);
+ ut_ad(space->is_stopping());
/* If the free is successful, the X lock will be released before
the space memory data structure is freed. */
@@ -2191,7 +1985,7 @@ dberr_t fil_delete_tablespace(ulint id, bool if_exists,
when we checked it above.
A write request can be issued any time because we don't check
- the ::stop_new_ops flag when queueing a block for write.
+ fil_space_t::is_stopping() when queueing a block for write.
We deal with pending write requests in the following function
where we'd minimally evict all dirty pages belonging to this
@@ -2199,7 +1993,7 @@ dberr_t fil_delete_tablespace(ulint id, bool if_exists,
we'll wait for IO to complete.
To deal with potential read requests, we will check the
- ::stop_new_ops flag in fil_io(). */
+ is_stopping() in fil_space_t::io(). */
err = DB_SUCCESS;
buf_flush_remove_pages(id);
@@ -2728,14 +2522,14 @@ err_exit:
buf_flush_init_for_writing(NULL, page, &page_zip, false);
- *err = os_file_write(
- IORequestWrite, path, file, page_zip.data, 0, zip_size);
+ *err = os_file_write(IORequestWrite, path, file,
+ page_zip.data, 0, zip_size);
} else {
buf_flush_init_for_writing(NULL, page, NULL,
fil_space_t::full_crc32(flags));
- *err = os_file_write(
- IORequestWrite, path, file, page, 0, srv_page_size);
+ *err = os_file_write(IORequestWrite, path, file,
+ page, 0, srv_page_size);
}
aligned_free(page);
@@ -2763,9 +2557,9 @@ err_exit:
}
}
- fil_space_t* space = fil_space_create(name, space_id, flags,
- FIL_TYPE_TABLESPACE,
- crypt_data, mode);
+ fil_space_t* space = fil_space_t::create(name, space_id, flags,
+ FIL_TYPE_TABLESPACE,
+ crypt_data, mode);
if (!space) {
free(crypt_data);
*err = DB_ERROR;
@@ -3143,7 +2937,7 @@ skip_validate:
first_page)
: NULL;
- fil_space_t* space = fil_space_create(
+ fil_space_t* space = fil_space_t::create(
tablename.m_name, id, flags, purpose, crypt_data);
if (!space) {
goto error;
@@ -3157,11 +2951,17 @@ skip_validate:
df_dict.is_open() ? df_dict.filepath() :
df_default.filepath(), OS_FILE_CLOSED, 0, false, true);
- if (validate && purpose != FIL_TYPE_IMPORT && !srv_read_only_mode) {
+ if (validate && !srv_read_only_mode) {
df_remote.close();
df_dict.close();
df_default.close();
- fsp_flags_try_adjust(space, flags & ~FSP_FLAGS_MEM_MASK);
+ if (space->acquire_for_io()) {
+ if (purpose != FIL_TYPE_IMPORT) {
+ fsp_flags_try_adjust(space, flags
+ & ~FSP_FLAGS_MEM_MASK);
+ }
+ space->release_for_io();
+ }
}
if (err) *err = DB_SUCCESS;
@@ -3491,7 +3291,7 @@ fil_ibd_load(
? fil_space_read_crypt_data(fil_space_t::zip_size(flags),
first_page)
: NULL;
- space = fil_space_create(
+ space = fil_space_t::create(
file.name(), space_id, flags, FIL_TYPE_TABLESPACE, crypt_data);
if (space == NULL) {
@@ -3557,7 +3357,7 @@ void fsp_flags_try_adjust(fil_space_t* space, ulint flags)
return;
}
if (!space->size && (space->purpose != FIL_TYPE_TABLESPACE
- || !fil_space_get_size(space->id))) {
+ || !space->get_size())) {
return;
}
/* This code is executed during server startup while no
@@ -3596,7 +3396,7 @@ func_exit:
memory cache. Note that if we have not done a crash recovery at the database
startup, there may be many tablespaces which are not yet in the memory cache.
@param[in] id Tablespace ID
-@param[in] name Tablespace name used in fil_space_create().
+@param[in] name Tablespace name used in fil_space_t::create().
@param[in] table_flags table flags
@return the tablespace
@retval NULL if no matching tablespace exists in the memory cache */
@@ -3648,281 +3448,159 @@ func_exit:
/*============================ FILE I/O ================================*/
-/********************************************************************//**
-NOTE: you must call fil_mutex_enter_and_prepare_for_io() first!
-
-Prepares a file node for i/o. Opens the file if it is closed. Updates the
-pending i/o's field in the node and the system appropriately. Takes the node
-off the LRU list if it is in the LRU list. The caller must hold the fil_sys
-mutex.
-@return false if the file can't be opened, otherwise true */
-static
-bool
-fil_node_prepare_for_io(
-/*====================*/
- fil_node_t* node, /*!< in: file node */
- fil_space_t* space) /*!< in: space */
-{
- ut_ad(node && space);
- ut_ad(mutex_own(&fil_system.mutex));
-
- if (fil_system.n_open > srv_max_n_open_files + 5) {
- ib::warn() << "Open files " << fil_system.n_open
- << " exceeds the limit " << srv_max_n_open_files;
- }
-
- if (!node->is_open()) {
- /* File is closed: open it */
- ut_a(node->n_pending == 0);
-
- if (!fil_node_open_file(node)) {
- return(false);
- }
- }
-
- if (node->n_pending++ == 0 && fil_space_belongs_in_lru(space)) {
- UT_LIST_REMOVE(fil_system.LRU, node);
- }
-
- return(true);
-}
-
/** Report information about an invalid page access. */
ATTRIBUTE_COLD __attribute__((noreturn))
static void
-fil_report_invalid_page_access(const page_id_t id, const char *name,
- ulint byte_offset, ulint len, bool is_read)
+fil_report_invalid_page_access(const char *name,
+ os_offset_t offset, ulint len, bool is_read)
{
- ib::fatal()
- << "Trying to " << (is_read ? "read " : "write ")
- << id
- << " which is outside the bounds of tablespace " << name
- << ". Byte offset " << byte_offset << ", len " << len;
+ ib::fatal() << "Trying to " << (is_read ? "read " : "write ") << len
+ << " bytes at " << offset
+ << " outside the bounds of the file: " << name;
}
-/** Reads or writes data. This operation could be asynchronous (aio).
-
-@param[in,out] type IO context
-@param[in] sync true if synchronous aio is desired
-@param[in] page_id page id
-@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
-@param[in] byte_offset remainder of offset in bytes; in aio this
- must be divisible by the OS block size
-@param[in] len how many bytes to read or write; this must
- not cross a file boundary; in aio this must
- be a block size multiple
-@param[in,out] buf buffer where to store read data or from where
- to write; in aio this must be appropriately
- aligned
-@param[in] message message for aio handler if non-sync aio
- used, else ignored
-@param[in] ignore whether to ignore errors
-@param[in] punch_hole punch the hole to the file for page_compressed
- tablespace
-@return status and file descriptor */
-fil_io_t
-fil_io(
- const IORequest& type,
- bool sync,
- const page_id_t page_id,
- ulint zip_size,
- ulint byte_offset,
- ulint len,
- void* buf,
- void* message,
- bool ignore,
- bool punch_hole)
+
+/** Update the data structures on write completion */
+inline void fil_node_t::complete_write()
{
- os_offset_t offset;
-
- ut_ad(type.validate());
-
- ut_ad(len > 0);
- ut_ad(byte_offset < srv_page_size);
- ut_ad(!zip_size || byte_offset == 0);
- ut_ad(srv_page_size == 1UL << srv_page_size_shift);
- compile_time_assert((1U << UNIV_PAGE_SIZE_SHIFT_MAX)
- == UNIV_PAGE_SIZE_MAX);
- compile_time_assert((1U << UNIV_PAGE_SIZE_SHIFT_MIN)
- == UNIV_PAGE_SIZE_MIN);
- ut_ad(fil_validate_skip());
+ ut_ad(!mutex_own(&fil_system.mutex));
+ ut_ad(space->pending_io());
- /* ibuf bitmap pages must be read in the sync AIO mode: */
- ut_ad(recv_no_ibuf_operations
- || type.is_write()
- || !ibuf_bitmap_page(page_id, zip_size)
- || sync);
+ if (space->purpose != FIL_TYPE_TEMPORARY && !space->is_stopping() &&
+ srv_file_flush_method != SRV_O_DIRECT_NO_FSYNC)
+ {
+ mutex_enter(&fil_system.mutex);
+ if (!space->is_stopping())
+ {
+ needs_flush= true;
- ulint mode;
+ if (!space->is_in_unflushed_spaces)
+ {
+ space->is_in_unflushed_spaces= true;
+ fil_system.unflushed_spaces.push_front(*space);
+ }
+ }
+ mutex_exit(&fil_system.mutex);
+ }
+#ifdef UNIV_DEBUG
+ else
+ {
+ mutex_enter(&fil_system.mutex);
+ if (!space->is_stopping())
+ {
+ ut_ad(!space->is_in_unflushed_spaces);
+ ut_ad(!needs_flush);
+ }
+ mutex_exit(&fil_system.mutex);
+ }
+#endif /* UNIV_DEBUG */
+}
- if (sync) {
- mode = OS_AIO_SYNC;
- } else if (type.is_read()
- && !recv_no_ibuf_operations
- && ibuf_page(page_id, zip_size, NULL)) {
- mode = OS_AIO_IBUF;
- } else {
- mode = OS_AIO_NORMAL;
- }
+/** Read or write data.
+@param type I/O context
+@param offset offset in bytes
+@param len number of bytes
+@param buf the data to be read or written
+@param bpage buffer block (for type.is_async() completion callback)
+@return status and file descriptor */
+fil_io_t fil_space_t::io(const IORequest &type, os_offset_t offset, size_t len,
+ void *buf, buf_page_t *bpage)
+{
+ ut_ad(pending_io());
+ ut_ad(offset % OS_FILE_LOG_BLOCK_SIZE == 0);
+ ut_ad((len % OS_FILE_LOG_BLOCK_SIZE) == 0);
+ ut_ad(fil_validate_skip());
if (type.is_read()) {
-
srv_stats.data_read.add(len);
-
- } else if (type.is_write()) {
-
- ut_ad(!srv_read_only_mode
- || fsp_is_system_temporary(page_id.space()));
-
+ } else {
+ ut_ad(type.is_write() || type.type == IORequest::PUNCH_RANGE);
+ ut_ad(!srv_read_only_mode || this == fil_system.temp_space);
srv_stats.data_written.add(len);
}
- /* Acquire fil_system.mutex and make sure that we can open at
- least one file while holding it, if the file is not already open */
- fil_space_t* space = fil_mutex_enter_and_prepare_for_io(
- page_id.space());
-
- if (!space
- || (type.is_read()
- && !sync
- && space->is_stopping()
- && !space->is_being_truncated)) {
-
- mutex_exit(&fil_system.mutex);
- if (!ignore) {
- ib::error()
- << "Trying to do I/O to a tablespace which"
- " does not exist. I/O type: "
- << (type.is_read() ? "read" : "write")
- << ", page: " << page_id
- << ", I/O length: " << len << " bytes";
- }
+ fil_node_t* node= UT_LIST_GET_FIRST(chain);
+ ut_ad(node);
+ if (type.type == IORequest::READ_ASYNC && is_stopping()
+ && !is_being_truncated) {
+ release_for_io();
return {DB_TABLESPACE_DELETED, nullptr};
}
- ulint cur_page_no = page_id.page_no();
- fil_node_t* node = UT_LIST_GET_FIRST(space->chain);
-
- for (;;) {
-
- if (node == NULL) {
- if (ignore) {
- mutex_exit(&fil_system.mutex);
- return {DB_ERROR, nullptr};
- }
-
- fil_report_invalid_page_access(
- page_id, space->name, byte_offset, len,
- type.is_read());
+ ulint p = static_cast<ulint>(offset >> srv_page_size_shift);
- } else if (fil_is_user_tablespace_id(space->id)
- && node->size == 0) {
-
- /* We do not know the size of a single-table tablespace
- before we open the file */
- break;
-
- } else if (node->size > cur_page_no) {
- /* Found! */
- break;
-
- } else {
- cur_page_no -= node->size;
+ if (UNIV_LIKELY_NULL(UT_LIST_GET_NEXT(chain, node))) {
+ ut_ad(this == fil_system.sys_space
+ || this == fil_system.temp_space);
+ ut_ad(!(offset & ((1 << srv_page_size_shift) - 1)));
+ while (node->size <= p) {
+ p -= node->size;
node = UT_LIST_GET_NEXT(chain, node);
- }
- }
-
- /* Open file if closed */
- if (UNIV_UNLIKELY(!fil_node_prepare_for_io(node, space))) {
- ut_ad(fil_is_user_tablespace_id(space->id));
- mutex_exit(&fil_system.mutex);
-
- if (!ignore) {
- ib::error()
- << "Trying to do I/O to a tablespace '"
- << space->name
- << "' which exists without .ibd data file."
- " I/O type: "
- << (type.is_read()
- ? "read" : "write")
- << ", page: "
- << page_id
- << ", I/O length: " << len << " bytes";
+ if (!node) {
+ if (type.type == IORequest::READ_ASYNC) {
+ release_for_io();
+ return {DB_ERROR, nullptr};
+ }
+ fil_report_invalid_page_access(name, offset,
+ len,
+ type.is_read());
+ }
}
- return {DB_TABLESPACE_DELETED, nullptr};
+ offset = os_offset_t{p} << srv_page_size_shift;
}
- if (node->size <= cur_page_no) {
- if (ignore) {
+ if (UNIV_UNLIKELY(node->size <= p)) {
+ if (type.type == IORequest::READ_ASYNC) {
+ release_for_io();
/* If we can tolerate the non-existent pages, we
should return with DB_ERROR and let caller decide
what to do. */
- node->complete_io(type.is_write());
- mutex_exit(&fil_system.mutex);
return {DB_ERROR, nullptr};
}
fil_report_invalid_page_access(
- page_id, space->name, byte_offset, len,
- type.is_read());
+ node->name, offset, len, type.is_read());
}
- space->acquire_for_io();
- /* Now we have made the changes in the data structures of fil_system */
- mutex_exit(&fil_system.mutex);
-
- if (!zip_size) zip_size = srv_page_size;
-
- offset = os_offset_t(cur_page_no) * zip_size + byte_offset;
- ut_ad(node->size - cur_page_no >= (len + (zip_size - 1)) / zip_size);
-
- /* Do AIO */
-
- ut_a(byte_offset % OS_FILE_LOG_BLOCK_SIZE == 0);
- ut_a((len % OS_FILE_LOG_BLOCK_SIZE) == 0);
-
- const char* name = node->name == NULL ? space->name : node->name;
-
- ut_ad(!type.is_write()
- || !fil_is_user_tablespace_id(page_id.space())
- || offset == page_id.page_no() * zip_size);
-
- dberr_t err = DB_SUCCESS;
+ dberr_t err;
- if (punch_hole) {
- /* Punch the hole to the file */
+ if (type.type == IORequest::PUNCH_RANGE) {
err = os_file_punch_hole(node->handle, offset, len);
/* Punch hole is not supported, make space not to
support punch hole */
if (UNIV_UNLIKELY(err == DB_IO_NO_PUNCH_HOLE)) {
- node->space->punch_hole = false;
+ punch_hole = false;
err = DB_SUCCESS;
}
+ goto release_sync_write;
} else {
- IORequest req_type(type);
- req_type.set_fil_node(node);
/* Queue the aio request */
err = os_aio(
- req_type,
- mode, name, node->handle, buf, offset, len,
- space->purpose != FIL_TYPE_TEMPORARY
- && srv_read_only_mode,
- node, message);
+ IORequest(type, node),
+ node->name, node->handle, buf, offset, len,
+ purpose != FIL_TYPE_TEMPORARY && srv_read_only_mode,
+ node, bpage);
}
/* We an try to recover the page from the double write buffer if
the decompression fails or the page is corrupt. */
- ut_a(type.is_dblwr_recover() || err == DB_SUCCESS);
- if (sync) {
- mutex_enter(&fil_system.mutex);
- node->complete_io(type.is_write());
- mutex_exit(&fil_system.mutex);
+ ut_a(type.type == IORequest::DBLWR_RECOVER || err == DB_SUCCESS);
+ if (!type.is_async()) {
+ if (type.is_write()) {
+release_sync_write:
+ node->complete_write();
+release:
+ release_for_io();
+ }
ut_ad(fil_validate_skip());
}
+ if (err != DB_SUCCESS) {
+ goto release;
+ }
return {err, node};
}
@@ -3941,8 +3619,6 @@ void fil_aio_callback(os_aio_userdata_t *data)
return;
}
- ut_ad(data->type.validate());
-
buf_page_t *bpage= static_cast<buf_page_t*>(data->message);
if (!bpage)
{
@@ -3951,14 +3627,9 @@ void fil_aio_callback(os_aio_userdata_t *data)
ut_ad(data->type.is_write());
ut_ad(!srv_read_only_mode);
write_completed:
- mutex_enter(&fil_system.mutex);
- node->complete_io(true);
- mutex_exit(&fil_system.mutex);
- node->space->release_for_io();
- return;
+ node->complete_write();
}
-
- if (data->type.is_write())
+ else if (data->type.is_write())
{
ut_ad(!srv_read_only_mode || node->space->purpose == FIL_TYPE_TEMPORARY);
bool dblwr= node->space->use_doublewrite();
@@ -3970,111 +3641,68 @@ write_completed:
buf_page_write_complete(bpage, data->type, dblwr);
goto write_completed;
}
+ else
+ {
+ ut_ad(data->type.is_read());
- ut_ad(data->type.is_read());
-
- /* IMPORTANT: since i/o handling for reads will read also the insert
- buffer in fil_system.sys_space, we have to be very careful not to
- introduce deadlocks. We never close the system tablespace (0) data
- files via fil_system.LRU and we use a dedicated I/O thread to serve
- change buffer requests. */
- const page_id_t id(bpage->id());
+ /* IMPORTANT: since i/o handling for reads will read also the insert
+ buffer in fil_system.sys_space, we have to be very careful not to
+ introduce deadlocks. We never close the system tablespace (0) data
+ files via fil_system.LRU and we never issue asynchronous reads of
+ change buffer pages. */
+ const page_id_t id(bpage->id());
- if (dberr_t err= buf_page_read_complete(bpage, *node))
- {
- if (recv_recovery_is_on() && !srv_force_recovery)
- recv_sys.found_corrupt_fs= true;
+ if (dberr_t err= buf_page_read_complete(bpage, *node))
+ {
+ if (recv_recovery_is_on() && !srv_force_recovery)
+ recv_sys.found_corrupt_fs= true;
- ib::error() << "Failed to read page " << id.page_no()
- << " from file '" << node->name << "': " << err;
+ ib::error() << "Failed to read page " << id.page_no()
+ << " from file '" << node->name << "': " << err;
+ }
}
- mutex_enter(&fil_system.mutex);
- node->complete_io();
- mutex_exit(&fil_system.mutex);
node->space->release_for_io();
}
-/**********************************************************************//**
-Flushes to disk possible writes cached by the OS. If the space does not exist
-or is being dropped, does not do anything. */
-void
-fil_flush(
-/*======*/
- ulint space_id) /*!< in: file space id (this can be a group of
- log files or a tablespace of the database) */
+/** Flush pending writes from the file system cache to the file */
+void fil_space_t::flush()
{
- mutex_enter(&fil_system.mutex);
-
- if (fil_space_t* space = fil_space_get_by_id(space_id)) {
- if (space->purpose != FIL_TYPE_TEMPORARY
- && !space->is_stopping()) {
- fil_flush_low(space);
- }
- }
-
- mutex_exit(&fil_system.mutex);
-}
-
-/** Flush a tablespace.
-@param[in,out] space tablespace to flush */
-void
-fil_flush(fil_space_t* space)
-{
- ut_ad(space->pending_io());
- ut_ad(space->purpose == FIL_TYPE_TABLESPACE
- || space->purpose == FIL_TYPE_IMPORT);
-
- if (!space->is_stopping()) {
- mutex_enter(&fil_system.mutex);
- if (!space->is_stopping()) {
- fil_flush_low(space);
- }
- mutex_exit(&fil_system.mutex);
- }
+ ut_ad(purpose == FIL_TYPE_TABLESPACE || purpose == FIL_TYPE_IMPORT);
+ if (!is_stopping())
+ {
+ mutex_enter(&fil_system.mutex);
+ if (!is_stopping())
+ fil_flush_low(this);
+ mutex_exit(&fil_system.mutex);
+ }
}
/** Flush to disk the writes in file spaces of the given type
possibly cached by the OS. */
void fil_flush_file_spaces()
{
- ulint* space_ids;
- ulint n_space_ids;
-
- mutex_enter(&fil_system.mutex);
-
- n_space_ids = fil_system.unflushed_spaces.size();
- if (n_space_ids == 0) {
-
- mutex_exit(&fil_system.mutex);
+ if (srv_file_flush_method == SRV_O_DIRECT_NO_FSYNC) {
+ ut_d(mutex_enter(&fil_system.mutex));
+ ut_ad(fil_system.unflushed_spaces.empty());
+ ut_d(mutex_exit(&fil_system.mutex));
return;
}
- space_ids = static_cast<ulint*>(
- ut_malloc_nokey(n_space_ids * sizeof(*space_ids)));
-
- n_space_ids = 0;
+rescan:
+ mutex_enter(&fil_system.mutex);
for (sized_ilist<fil_space_t, unflushed_spaces_tag_t>::iterator it
= fil_system.unflushed_spaces.begin(),
end = fil_system.unflushed_spaces.end();
it != end; ++it) {
-
- if (it->purpose == FIL_TYPE_TABLESPACE && !it->is_stopping()) {
- space_ids[n_space_ids++] = it->id;
+ if (!it->is_stopping() && fil_flush_low(&*it)) {
+ mutex_exit(&fil_system.mutex);
+ goto rescan;
}
}
mutex_exit(&fil_system.mutex);
-
- /* Flush the spaces. It will not hurt to call fil_flush() on
- a non-existing space id. */
- for (ulint i = 0; i < n_space_ids; i++) {
-
- fil_flush(space_ids[i]);
- }
-
- ut_free(space_ids);
}
/** Functor to validate the file node list of a tablespace. */
@@ -4091,7 +3719,6 @@ struct Check {
@param[in] elem file node to visit */
void operator()(const fil_node_t* elem)
{
- ut_a(elem->is_open() || !elem->n_pending);
n_open += elem->is_open();
size += elem->size;
}
@@ -4128,7 +3755,6 @@ Checks the consistency of the tablespace cache.
@return true if ok */
bool fil_validate()
{
- fil_node_t* fil_node;
ulint n_open = 0;
mutex_enter(&fil_system.mutex);
@@ -4141,18 +3767,6 @@ bool fil_validate()
ut_a(fil_system.n_open == n_open);
- ut_list_validate(fil_system.LRU);
-
- for (fil_node = UT_LIST_GET_FIRST(fil_system.LRU);
- fil_node != 0;
- fil_node = UT_LIST_GET_NEXT(LRU, fil_node)) {
-
- ut_a(fil_node->n_pending == 0);
- ut_a(!fil_node->being_extended);
- ut_a(fil_node->is_open());
- ut_a(fil_space_belongs_in_lru(fil_node->space));
- }
-
mutex_exit(&fil_system.mutex);
return(true);
diff --git a/storage/innobase/fsp/fsp0file.cc b/storage/innobase/fsp/fsp0file.cc
index e8fc47f3e41..57164113647 100644
--- a/storage/innobase/fsp/fsp0file.cc
+++ b/storage/innobase/fsp/fsp0file.cc
@@ -296,8 +296,6 @@ Datafile::read_first_page(bool read_only_mode)
m_first_page = static_cast<byte*>(
aligned_malloc(UNIV_PAGE_SIZE_MAX, srv_page_size));
- constexpr IORequest request(IORequest::READ |
- IORequest::DISABLE_PARTIAL_IO_WARNINGS);
dberr_t err = DB_ERROR;
size_t page_size = UNIV_PAGE_SIZE_MAX;
@@ -308,7 +306,8 @@ Datafile::read_first_page(bool read_only_mode)
ulint n_read = 0;
err = os_file_read_no_error_handling(
- request, m_handle, m_first_page, 0, page_size, &n_read);
+ IORequestReadPartial, m_handle, m_first_page, 0,
+ page_size, &n_read);
if (err == DB_IO_ERROR && n_read >= UNIV_PAGE_SIZE_MIN) {
diff --git a/storage/innobase/fsp/fsp0space.cc b/storage/innobase/fsp/fsp0space.cc
index 1ed4af86367..b0a80efe7c4 100644
--- a/storage/innobase/fsp/fsp0space.cc
+++ b/storage/innobase/fsp/fsp0space.cc
@@ -130,7 +130,7 @@ Tablespace::open_or_create(bool is_temp)
fsp_flags = FSP_FLAGS_PAGE_SSIZE();
}
- space = fil_space_create(
+ space = fil_space_t::create(
m_name, m_space_id, fsp_flags,
is_temp
? FIL_TYPE_TEMPORARY : FIL_TYPE_TABLESPACE,
diff --git a/storage/innobase/fsp/fsp0sysspace.cc b/storage/innobase/fsp/fsp0sysspace.cc
index f8342157560..a2c9e1bc688 100644
--- a/storage/innobase/fsp/fsp0sysspace.cc
+++ b/storage/innobase/fsp/fsp0sysspace.cc
@@ -906,13 +906,10 @@ SysTablespace::open_or_create(
if (it != begin) {
} else if (is_temp) {
ut_ad(space_id() == SRV_TMP_SPACE_ID);
- space = fil_space_create(
+ space = fil_space_t::create(
name(), SRV_TMP_SPACE_ID, flags(),
FIL_TYPE_TEMPORARY, NULL);
-
- mutex_enter(&fil_system.mutex);
- fil_system.temp_space = space;
- mutex_exit(&fil_system.mutex);
+ ut_ad(space == fil_system.temp_space);
if (!space) {
return DB_ERROR;
}
@@ -920,12 +917,10 @@ SysTablespace::open_or_create(
ut_ad(space->full_crc32());
} else {
ut_ad(space_id() == TRX_SYS_SPACE);
- space = fil_space_create(
+ space = fil_space_t::create(
name(), TRX_SYS_SPACE, it->flags(),
FIL_TYPE_TABLESPACE, NULL);
- mutex_enter(&fil_system.mutex);
- fil_system.sys_space = space;
- mutex_exit(&fil_system.mutex);
+ ut_ad(space == fil_system.sys_space);
if (!space) {
return DB_ERROR;
}
diff --git a/storage/innobase/handler/i_s.cc b/storage/innobase/handler/i_s.cc
index de4195b5727..e7e66bb0e8d 100644
--- a/storage/innobase/handler/i_s.cc
+++ b/storage/innobase/handler/i_s.cc
@@ -7044,6 +7044,7 @@ i_s_tablespaces_encryption_fill_table(
}
mutex_enter(&fil_system.mutex);
+ fil_system.freeze_space_list++;
for (fil_space_t* space = UT_LIST_GET_FIRST(fil_system.space_list);
space; space = UT_LIST_GET_NEXT(space_list, space)) {
@@ -7060,6 +7061,7 @@ i_s_tablespaces_encryption_fill_table(
}
}
+ fil_system.freeze_space_list--;
mutex_exit(&fil_system.mutex);
DBUG_RETURN(0);
}
diff --git a/storage/innobase/ibuf/ibuf0ibuf.cc b/storage/innobase/ibuf/ibuf0ibuf.cc
index 494ae2798ee..9e9bc241828 100644
--- a/storage/innobase/ibuf/ibuf0ibuf.cc
+++ b/storage/innobase/ibuf/ibuf0ibuf.cc
@@ -2300,7 +2300,7 @@ static void ibuf_read_merge_pages(const uint32_t* space_ids,
for (ulint i = 0; i < n_stored; i++) {
const ulint space_id = space_ids[i];
- fil_space_t* s = fil_space_acquire_for_io(space_id);
+ fil_space_t* s = fil_space_t::get_for_io(space_id);
if (!s) {
tablespace_deleted:
/* The tablespace was not found: remove all
@@ -4631,26 +4631,14 @@ dberr_t ibuf_check_bitmap_on_import(const trx_t* trx, fil_space_t* space)
const unsigned zip_size = space->zip_size();
const unsigned physical_size = space->physical_size();
- /* fil_space_t::size and fil_space_t::free_limit would still be 0
- at this point. So, we will have to read page 0. */
- ut_ad(!space->free_limit);
- ut_ad(!space->size);
- mtr_t mtr;
- uint32_t size;
- mtr.start();
- if (buf_block_t* sp = buf_page_get(page_id_t(space->id, 0),
- zip_size,
- RW_S_LATCH, &mtr)) {
- size = std::min(
- mach_read_from_4(FSP_HEADER_OFFSET + FSP_FREE_LIMIT
- + sp->frame),
- mach_read_from_4(FSP_HEADER_OFFSET + FSP_SIZE
- + sp->frame));
- } else {
- size = 0;
+ uint32_t size= std::min(space->free_limit, space->size);
+
+ if (size == 0) {
+ return(DB_TABLE_NOT_FOUND);
}
- mtr.commit();
+
+ mtr_t mtr;
mutex_enter(&ibuf_mutex);
diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h
index d2b52c4f520..d8e152f1ffa 100644
--- a/storage/innobase/include/buf0buf.h
+++ b/storage/innobase/include/buf0buf.h
@@ -978,6 +978,15 @@ public:
return zip.ssize ? (UNIV_ZIP_SIZE_MIN >> 1) << zip.ssize : 0;
}
+ /** @return the byte offset of the page within a file */
+ os_offset_t physical_offset() const
+ {
+ os_offset_t o= id().page_no();
+ return zip.ssize
+ ? o << (zip.ssize + (UNIV_ZIP_SIZE_SHIFT_MIN - 1))
+ : o << srv_page_size_shift;
+ }
+
/** @return whether the block is mapped to a data file */
bool in_file() const
{
diff --git a/storage/innobase/include/buf0dblwr.h b/storage/innobase/include/buf0dblwr.h
index 1b9415d38be..aac4715250d 100644
--- a/storage/innobase/include/buf0dblwr.h
+++ b/storage/innobase/include/buf0dblwr.h
@@ -52,10 +52,10 @@ class buf_dblwr_t
struct element
{
- /** block descriptor */
- buf_page_t *bpage;
- /** true=buf_pool.flush_list, false=buf_pool.LRU */
- bool lru;
+ /** tablespace */
+ fil_space_t *space;
+ /** asynchronous write request */
+ IORequest request;
/** payload size in bytes */
size_t size;
};
@@ -103,10 +103,11 @@ public:
/** Schedule a page write. If the doublewrite memory buffer is full,
flush_buffered_writes() will be invoked to make space.
- @param bpage buffer pool page to be written
- @param lru true=buf_pool.LRU; false=buf_pool.flush_list
+ @param space tablespace
+ @param request asynchronous write request
@param size payload size in bytes */
- void add_to_batch(buf_page_t *bpage, bool lru, size_t size);
+ void add_to_batch(fil_space_t *space, const IORequest &request,
+ size_t size) MY_ATTRIBUTE((nonnull));
/** Determine whether the doublewrite buffer is initialized */
bool is_initialised() const
diff --git a/storage/innobase/include/buf0rea.h b/storage/innobase/include/buf0rea.h
index e111bbd7a02..87c6b5d7e75 100644
--- a/storage/innobase/include/buf0rea.h
+++ b/storage/innobase/include/buf0rea.h
@@ -46,11 +46,13 @@ dberr_t buf_read_page(const page_id_t page_id, ulint zip_size);
buffer buf_pool if it is not already there. Sets the io_fix flag and sets
an exclusive lock on the buffer frame. The flag is cleared and the x-lock
released by the i/o-handler thread.
+@param[in,out] space tablespace
@param[in] page_id page id
@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
@param[in] sync true if synchronous aio is desired */
-void
-buf_read_page_background(const page_id_t page_id, ulint zip_size, bool sync);
+void buf_read_page_background(fil_space_t *space, const page_id_t page_id,
+ ulint zip_size, bool sync)
+ MY_ATTRIBUTE((nonnull));
/** Applies a random read-ahead in buf_pool if there are at least a threshold
value of accessed pages from the random read-ahead area. Does not read any
@@ -101,14 +103,11 @@ ulint
buf_read_ahead_linear(const page_id_t page_id, ulint zip_size, bool ibuf);
/** Issues read requests for pages which recovery wants to read in.
-@param[in] sync true if the caller wants this function to wait
-for the highest address page to get read in, before this function returns
@param[in] space_id tablespace id
@param[in] page_nos array of page numbers to read, with the
highest page number the last in the array
@param[in] n number of page numbers in the array */
-void buf_read_recv_pages(bool sync, ulint space_id, const uint32_t *page_nos,
- ulint n);
+void buf_read_recv_pages(ulint space_id, const uint32_t* page_nos, ulint n);
/** @name Modes used in read-ahead @{ */
/** read only pages belonging to the insert buffer tree */
diff --git a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h
index 0fa0c0b598b..57e5c43199b 100644
--- a/storage/innobase/include/fil0fil.h
+++ b/storage/innobase/include/fil0fil.h
@@ -313,6 +313,25 @@ new_range:
/** Tablespace or log data space */
#ifndef UNIV_INNOCHECKSUM
+struct fil_io_t
+{
+ /** error code */
+ dberr_t err;
+ /** file; node->space->release_for_io() must follow IORequestRead call */
+ fil_node_t *node;
+};
+
+/** Tablespace encryption mode */
+enum fil_encryption_t
+{
+ /** Encrypted if innodb_encrypt_tables=ON (srv_encrypt_tables) */
+ FIL_ENCRYPTION_DEFAULT,
+ /** Encrypted */
+ FIL_ENCRYPTION_ON,
+ /** Not encrypted */
+ FIL_ENCRYPTION_OFF
+};
+
struct fil_space_t : ilist_node<unflushed_spaces_tag_t>,
ilist_node<rotation_list_tag_t>
#else
@@ -348,8 +367,6 @@ struct fil_space_t
/*!< recovered tablespace size in pages;
0 if no size change was read from the redo log,
or if the size change was implemented */
- /** the committed size of the tablespace in pages */
- Atomic_relaxed<uint32_t> committed_size;
ulint n_reserved_extents;
/*!< number of reserved free extents for
ongoing operations like B-tree page split */
@@ -357,28 +374,33 @@ struct fil_space_t
the tablespace to disk; dropping of the
tablespace is forbidden if this is positive */
private:
+ /** the committed size of the tablespace in pages */
+ Atomic_relaxed<uint32_t> committed_size;
/** Number of pending buffer pool operations accessing the
tablespace without holding a table lock or dict_operation_lock
S-latch that would prevent the table (and tablespace) from being
dropped. An example is encryption key rotation.
- The tablespace cannot be dropped while this is nonzero, or while
- fil_node_t::n_pending is nonzero.
+ The tablespace cannot be dropped while this is nonzero.
The most significant bit contains the STOP_NEW_OPS flag. */
- Atomic_relaxed<size_t> n_pending_ops;
+ Atomic_relaxed<uint32_t> n_pending_ops;
+ /** Number of pending block read or write operations
+ The tablespace object cannot be freed while this is nonzero,
+ but it can be detached from fil_system.
+
+ The most significant bit contains the CLOSING flag. */
+ std::atomic<uint32_t> n_pending_ios;
/** Flag in n_pending_ops that indicates that the tablespace is being
deleted, and no further operations should be performed */
static constexpr uint32_t STOP_NEW_OPS= ~(~uint32_t(0) >> 1);
+ /** Flag in n_pending_ios that indicates that the tablespace is a candidate
+ for being closed, and fil_node_t::is_open() can only be trusted after
+ acquiring fil_system.mutex and resetting the flag */
+ static constexpr uint32_t CLOSING= STOP_NEW_OPS;
+ static constexpr uint32_t NOT_CLOSING= ~CLOSING;
public:
- /** Number of pending block read or write operations
- (when a write is imminent or a read has recently completed).
- The tablespace object cannot be freed while this is nonzero,
- but it can be detached from fil_system.
- Note that fil_node_t::n_pending tracks actual pending I/O requests.
- Protected by fil_system.mutex and std::atomic. */
- std::atomic<ulint> n_pending_ios;
rw_lock_t latch; /*!< latch protecting the file space storage
allocation */
UT_LIST_NODE_T(fil_space_t) named_spaces;
@@ -476,19 +498,20 @@ public:
dberr_t rename(const char* name, const char* path, bool log,
bool replace = false);
- /** Note that the tablespace has been imported.
- Initially, purpose=FIL_TYPE_IMPORT so that no redo log is
- written while the space ID is being updated in each page. */
- inline void set_imported();
+ /** Note that the tablespace has been imported.
+ Initially, purpose=FIL_TYPE_IMPORT so that no redo log is
+ written while the space ID is being updated in each page. */
+ inline void set_imported();
- /** @return whether the storage device is rotational (HDD, not SSD) */
- inline bool is_rotational() const;
+ /** @return whether the storage device is rotational (HDD, not SSD) */
+ inline bool is_rotational() const;
- /** Open each file. Only invoked on fil_system.temp_space.
- @return whether all files were opened */
- bool open();
- /** Close each file. Only invoked on fil_system.temp_space. */
- void close();
+ /** Open each file. Never invoked on .ibd files.
+ @param create_new_db whether to skip the call to fil_node_t::read_page0()
+ @return whether all files were opened */
+ bool open(bool create_new_db);
+ /** Close each file. Only invoked on fil_system.temp_space. */
+ void close();
/** @return whether the tablespace is about to be dropped */
bool is_stopping() const { return n_pending_ops & STOP_NEW_OPS; }
@@ -497,17 +520,13 @@ public:
size_t referenced() const { return n_pending_ops & ~STOP_NEW_OPS; }
/** Note that operations on the tablespace must stop or can resume */
- void set_stopping(bool stopping)
- {
- ut_d(auto n=) n_pending_ops.fetch_xor(STOP_NEW_OPS);
- ut_ad(!(n & STOP_NEW_OPS) == stopping);
- }
+ inline void set_stopping(bool stopping);
MY_ATTRIBUTE((warn_unused_result))
/** @return whether a tablespace reference was successfully acquired */
bool acquire()
{
- size_t n= 0;
+ uint32_t n= 0;
while (!n_pending_ops.compare_exchange_strong(n, n + 1,
std::memory_order_acquire,
std::memory_order_relaxed))
@@ -523,30 +542,41 @@ public:
ut_ad(n & ~STOP_NEW_OPS);
return (n & ~STOP_NEW_OPS) == 1;
}
- /** Acquire a tablespace reference for I/O. */
- void acquire_for_io() { n_pending_ios++; }
+
+ MY_ATTRIBUTE((warn_unused_result))
+ /** Acquire a tablespace reference for I/O.
+ @return whether the file is usable */
+ bool acquire_for_io()
+ {
+ return UNIV_LIKELY(!(n_pending_ios.fetch_add(1, std::memory_order_acquire)&
+ CLOSING)) ||
+ prepare_for_io();
+ }
+
+ /** Acquire another tablespace reference for I/O. */
+ inline void reacquire_for_io();
+
/** Release a tablespace reference for I/O. */
- void release_for_io() { ut_d(auto n=) n_pending_ios--; ut_ad(n); }
- /** @return whether I/O is pending */
- bool pending_io() const { return n_pending_ios; }
+ void release_for_io()
+ {
+ ut_d(uint32_t n=) n_pending_ios.fetch_sub(1, std::memory_order_release);
+ ut_ad(n & NOT_CLOSING);
+ }
+ /** @return number of pending reads or writes */
+ uint32_t pending_io() const
+ { return n_pending_ios.load(std::memory_order_acquire) & NOT_CLOSING; }
- /** @return whether the tablespace file can be closed and reopened */
- bool belongs_in_lru() const
+ MY_ATTRIBUTE((warn_unused_result))
+ /** Prepare to close the file handle.
+ @return number of pending operations */
+ uint32_t set_closing()
{
- switch (purpose) {
- case FIL_TYPE_TEMPORARY:
- ut_ad(id == SRV_TMP_SPACE_ID);
- return false;
- case FIL_TYPE_IMPORT:
- ut_ad(id != SRV_TMP_SPACE_ID);
- return true;
- case FIL_TYPE_TABLESPACE:
- ut_ad(id != SRV_TMP_SPACE_ID);
- return id && !srv_is_undo_tablespace(id);
- }
- ut_ad(0);
- return false;
+ return n_pending_ios.fetch_or(CLOSING, std::memory_order_acquire) &
+ NOT_CLOSING;
}
+ /** @return whether close() of the file handle has been requested */
+ bool is_closing() const
+ { return n_pending_ios.load(std::memory_order_acquire) & CLOSING; }
/** @return last_freed_lsn */
lsn_t get_last_freed_lsn() { return last_freed_lsn; }
@@ -835,6 +865,25 @@ public:
}
#ifndef UNIV_INNOCHECKSUM
+ MY_ATTRIBUTE((warn_unused_result))
+ /** Create a tablespace in fil_system.
+ @param name tablespace name
+ @param id tablespace identifier
+ @param flags tablespace flags
+ @param purpose tablespace purpose
+ @param crypt_data encryption information
+ @param mode encryption mode
+ @return pointer to created tablespace, to be filled in with add()
+ @retval nullptr on failure (such as when the same tablespace exists) */
+ static fil_space_t *create(const char *name, ulint id, ulint flags,
+ fil_type_t purpose, fil_space_crypt_t *crypt_data,
+ fil_encryption_t mode= FIL_ENCRYPTION_DEFAULT);
+
+ /** Acquire a tablespace for reading or writing a block.
+ @param id tablespace ID
+ @return the tablespace, or nullptr if missing or inaccessible */
+ static fil_space_t *get_for_io(ulint id);
+
/** Add/remove the free page in the freed ranges list.
@param[in] offset page number to be added
@param[in] free true if page to be freed */
@@ -863,8 +912,47 @@ public:
std::lock_guard<std::mutex> freed_lock(freed_range_mutex);
freed_ranges.add_range(range);
}
-#endif /*!UNIV_INNOCHECKSUM */
+ /** Set the tablespace size in pages */
+ void set_sizes(uint32_t s)
+ {
+ ut_ad(id ? !size : (size >= s));
+ size= s; committed_size= s;
+ }
+
+ /** Update committed_size in mtr_t::commit() */
+ void set_committed_size()
+ {
+ ut_ad(rw_lock_own(&latch, RW_LOCK_X));
+ committed_size= size;
+ }
+
+ /** @return the last persisted page number */
+ uint32_t last_page_number() const { return committed_size - 1; }
+
+ /** @return the size in pages (0 if unreadable) */
+ inline uint32_t get_size();
+
+ /** Read or write data.
+ @param type I/O context
+ @param offset offset in bytes
+ @param len number of bytes
+ @param buf the data to be read or written
+ @param bpage buffer block (for type.is_async() completion callback)
+ @return status and file descriptor */
+ fil_io_t io(const IORequest &type, os_offset_t offset, size_t len,
+ void *buf, buf_page_t *bpage= nullptr);
+ /** Flush pending writes from the file system cache to the file */
+ void flush();
+
+ /** Read the first page of a data file.
+ @return whether the page was found valid */
+ bool read_page0();
+
+private:
+ /** @return whether the file is usable for io() */
+ ATTRIBUTE_COLD bool prepare_for_io();
+#endif /*!UNIV_INNOCHECKSUM */
};
#ifndef UNIV_INNOCHECKSUM
@@ -892,8 +980,6 @@ struct fil_node_t {
uint32_t init_size;
/** maximum size of the file in database pages (0 if unlimited) */
uint32_t max_size;
- /** count of pending i/o's; is_open must be true if nonzero */
- ulint n_pending;
/** count of pending flushes; is_open must be true if nonzero */
ulint n_pending_flushes;
/** whether the file is currently being extended */
@@ -902,8 +988,6 @@ struct fil_node_t {
bool needs_flush;
/** link to other files in this tablespace */
UT_LIST_NODE_T(fil_node_t) chain;
- /** link to the fil_system.LRU list (keeping track of open files) */
- UT_LIST_NODE_T(fil_node_t) LRU;
/** whether this file could use atomic write (data file) */
bool atomic_write;
@@ -921,9 +1005,8 @@ struct fil_node_t {
}
/** Read the first page of a data file.
- @param[in] first whether this is the very first read
@return whether the page was found valid */
- bool read_page0(bool first);
+ bool read_page0();
/** Determine some file metadata when creating or reading the file.
@param file the file that is being created, or OS_FILE_CLOSED */
@@ -942,8 +1025,8 @@ struct fil_node_t {
@return detached handle or OS_FILE_CLOSED */
pfs_os_file_t close_to_free(bool detach_handle= false);
- /** Update the data structures on I/O completion */
- inline void complete_io(bool write= false);
+ /** Update the data structures on write completion */
+ inline void complete_write();
private:
/** Does stuff common for close() and detach() */
@@ -953,22 +1036,27 @@ private:
/** Value of fil_node_t::magic_n */
#define FIL_NODE_MAGIC_N 89389
+inline void fil_space_t::reacquire_for_io()
+{
+ ut_d(uint32_t n=) n_pending_ios.fetch_add(1, std::memory_order_relaxed);
+ ut_ad(n & NOT_CLOSING);
+ ut_ad(UT_LIST_GET_FIRST(chain)->is_open());
+}
+
inline void fil_space_t::set_imported()
{
- ut_ad(purpose == FIL_TYPE_IMPORT);
- purpose = FIL_TYPE_TABLESPACE;
- UT_LIST_GET_FIRST(chain)->find_metadata();
+ ut_ad(purpose == FIL_TYPE_IMPORT);
+ purpose= FIL_TYPE_TABLESPACE;
+ UT_LIST_GET_FIRST(chain)->find_metadata();
}
inline bool fil_space_t::is_rotational() const
{
- for (const fil_node_t* node = UT_LIST_GET_FIRST(chain); node;
- node = UT_LIST_GET_NEXT(chain, node)) {
- if (!node->on_ssd) {
- return true;
- }
- }
- return false;
+ for (const fil_node_t *node= UT_LIST_GET_FIRST(chain); node;
+ node= UT_LIST_GET_NEXT(chain, node))
+ if (!node->on_ssd)
+ return true;
+ return false;
}
/** Common InnoDB file extensions */
@@ -1179,16 +1267,6 @@ index */
#define fil_page_index_page_check(page) \
fil_page_type_is_index(fil_page_get_type(page))
-/** Enum values for encryption table option */
-enum fil_encryption_t {
- /** Encrypted if innodb_encrypt_tables=ON (srv_encrypt_tables) */
- FIL_ENCRYPTION_DEFAULT,
- /** Encrypted */
- FIL_ENCRYPTION_ON,
- /** Not encrypted */
- FIL_ENCRYPTION_OFF
-};
-
/** Get the file page type.
@param[in] page file page
@return page type */
@@ -1227,7 +1305,6 @@ struct fil_system_t {
*/
fil_system_t(): m_initialised(false)
{
- UT_LIST_INIT(LRU, &fil_node_t::LRU);
UT_LIST_INIT(space_list, &fil_space_t::space_list);
UT_LIST_INIT(named_spaces, &fil_space_t::named_spaces);
}
@@ -1275,30 +1352,23 @@ public:
fil_space_t* temp_space; /*!< The innodb_temporary tablespace */
/** Map of fil_space_t::id to fil_space_t* */
hash_table_t spaces;
- UT_LIST_BASE_NODE_T(fil_node_t) LRU;
- /*!< base node for the LRU list of the
- most recently used open files with no
- pending i/o's; if we start an i/o on
- the file, we first remove it from this
- list, and return it to the start of
- the list when the i/o ends;
- log files and the system tablespace are
- not put to this list: they are opened
- after the startup, and kept open until
- shutdown */
sized_ilist<fil_space_t, unflushed_spaces_tag_t> unflushed_spaces;
/*!< list of those
tablespaces whose files contain
unflushed writes; those spaces have
at least one file node where
needs_flush == true */
- ulint n_open; /*!< number of files currently open */
+ /** number of currently open files; protected by mutex */
+ ulint n_open;
ulint max_assigned_id;/*!< maximum space id in the existing
tables, or assigned during the time
mysqld has been up; at an InnoDB
startup we scan the data dictionary
and set here the maximum of the
space id's of the tables there */
+ /** nonzero if fil_node_open_file_low() should avoid moving the tablespace
+ to the end of space_list, for FIFO policy of try_to_close() */
+ ulint freeze_space_list;
UT_LIST_BASE_NODE_T(fil_space_t) space_list;
/*!< list of all file spaces */
UT_LIST_BASE_NODE_T(fil_space_t) named_spaces;
@@ -1312,16 +1382,10 @@ public:
key rotation.*/
bool space_id_reuse_warned;
- /*!< whether fil_space_create()
+ /*!< whether fil_space_t::create()
has issued a warning about
potential space_id reuse */
- /** Trigger a call to fil_node_t::read_page0()
- @param[in] id tablespace identifier
- @return tablespace
- @retval NULL if the tablespace does not exist or cannot be read */
- fil_space_t* read_page0(ulint id);
-
/** Return the next tablespace from rotation_list.
@param space previous tablespace (NULL to start from the start)
@param recheck whether the removal condition needs to be rechecked after
@@ -1336,63 +1400,28 @@ public:
/** The tablespace memory cache. */
extern fil_system_t fil_system;
-/** Update the data structures on I/O completion */
-inline void fil_node_t::complete_io(bool write)
+/** Note that operations on the tablespace must stop or can resume */
+inline void fil_space_t::set_stopping(bool stopping)
{
ut_ad(mutex_own(&fil_system.mutex));
+ ut_d(auto n=) n_pending_ops.fetch_xor(STOP_NEW_OPS);
+ ut_ad(!(n & STOP_NEW_OPS) == stopping);
+}
- if (write)
+/** @return the size in pages (0 if unreadable) */
+inline uint32_t fil_space_t::get_size()
+{
+ if (!size)
{
- if (srv_file_flush_method == SRV_O_DIRECT_NO_FSYNC)
- {
- /* We don't need to keep track of unflushed changes as user has
- explicitly disabled buffering. */
- ut_ad(!space->is_in_unflushed_spaces);
- ut_ad(!needs_flush);
- }
- else if (!space->is_stopping())
- {
- needs_flush= true;
- if (!space->is_in_unflushed_spaces)
- {
- space->is_in_unflushed_spaces= true;
- fil_system.unflushed_spaces.push_front(*space);
- }
- }
- }
-
- switch (n_pending--) {
- case 0:
- ut_error;
- case 1:
- if (space->belongs_in_lru())
- /* The node must be put back to the LRU list */
- UT_LIST_ADD_FIRST(fil_system.LRU, this);
+ mutex_enter(&fil_system.mutex);
+ read_page0();
+ mutex_exit(&fil_system.mutex);
}
+ return size;
}
#include "fil0crypt.h"
-/** Create a space memory object and put it to the fil_system hash table.
-Error messages are issued to the server log.
-@param[in] name tablespace name
-@param[in] id tablespace identifier
-@param[in] flags tablespace flags
-@param[in] purpose tablespace purpose
-@param[in,out] crypt_data encryption information
-@param[in] mode encryption mode
-@return pointer to created tablespace, to be filled in with fil_space_t::add()
-@retval NULL on failure (such as when the same tablespace exists) */
-fil_space_t*
-fil_space_create(
- const char* name,
- ulint id,
- ulint flags,
- fil_type_t purpose,
- fil_space_crypt_t* crypt_data,
- fil_encryption_t mode = FIL_ENCRYPTION_DEFAULT)
- MY_ATTRIBUTE((warn_unused_result));
-
/*******************************************************************//**
Assigns a new space id for a new single-table tablespace. This works simply by
incrementing the global counter. If 4 billion id's is not enough, we may need
@@ -1421,21 +1450,6 @@ fil_space_free(
void fil_space_set_recv_size_and_flags(ulint id, uint32_t size,
uint32_t flags);
-/*******************************************************************//**
-Returns the size of the space in pages. The tablespace must be cached in the
-memory cache.
-@return space size, 0 if space not found */
-ulint
-fil_space_get_size(
-/*===============*/
- ulint id); /*!< in: space id */
-
-/** Opens all system tablespace data files. They stay open until the
-database server shutdown. This should be called at a server startup after the
-space objects for the system tablespace have been created. The
-purpose of this operation is to make sure we never run out of file descriptors
-if we need to read from the insert buffer. */
-void fil_open_system_tablespace_files();
/** Close all tablespace files at shutdown */
void fil_close_all_files();
/*******************************************************************//**
@@ -1491,14 +1505,6 @@ fil_space_acquire_silent(ulint id)
return (fil_space_acquire_low(id, true));
}
-/** Acquire a tablespace for reading or writing a block,
-when it could be dropped concurrently.
-@param[in] id tablespace ID
-@return the tablespace
-@retval NULL if missing */
-fil_space_t*
-fil_space_acquire_for_io(ulint id);
-
/** Replay a file rename operation if possible.
@param[in] space_id tablespace identifier
@param[in] name old file name
@@ -1674,7 +1680,7 @@ fil_file_readdir_next_file(
memory cache. Note that if we have not done a crash recovery at the database
startup, there may be many tablespaces which are not yet in the memory cache.
@param[in] id Tablespace ID
-@param[in] name Tablespace name used in fil_space_create().
+@param[in] name Tablespace name used in fil_space_t::create().
@param[in] table_flags table flags
@return the tablespace
@retval NULL if no matching tablespace exists in the memory cache */
@@ -1690,70 +1696,6 @@ fil_space_for_table_exists_in_mem(
@return whether the tablespace is at least as big as requested */
bool fil_space_extend(fil_space_t *space, uint32_t size);
-struct fil_io_t
-{
- /** error code */
- dberr_t err;
- /** file; node->space->release_for_io() must follow fil_io(sync=true) call */
- fil_node_t *node;
-};
-
-/** Reads or writes data. This operation could be asynchronous (aio).
-
-@param[in] type IO context
-@param[in] sync true if synchronous aio is desired
-@param[in] page_id page id
-@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
-@param[in] byte_offset remainder of offset in bytes; in aio this
- must be divisible by the OS block size
-@param[in] len how many bytes to read or write; this must
- not cross a file boundary; in aio this must
- be a block size multiple
-@param[in,out] buf buffer where to store read data or from where
- to write; in aio this must be appropriately
- aligned
-@param[in] message message for aio handler if non-sync aio
- used, else ignored
-@param[in] ignore whether to ignore errors
-@param[in] punch_hole punch the hole to the file for page_compressed
- tablespace
-@return status and file descriptor */
-fil_io_t
-fil_io(
- const IORequest& type,
- bool sync,
- const page_id_t page_id,
- ulint zip_size,
- ulint byte_offset,
- ulint len,
- void* buf,
- void* message,
- bool ignore = false,
- bool punch_hole = false);
-
-/**********************************************************************//**
-Waits for an aio operation to complete. This function is used to write the
-handler for completed requests. The aio array of pending requests is divided
-into segments (see os0file.cc for more info). The thread specifies which
-segment it wants to wait for. */
-void
-fil_aio_wait(
-/*=========*/
- ulint segment); /*!< in: the number of the segment in the aio
- array to wait for */
-/**********************************************************************//**
-Flushes to disk possible writes cached by the OS. If the space does not exist
-or is being dropped, does not do anything. */
-void
-fil_flush(
-/*======*/
- ulint space_id); /*!< in: file space id (this can be a group of
- log files or a tablespace of the database) */
-/** Flush a tablespace.
-@param[in,out] space tablespace to flush */
-void
-fil_flush(fil_space_t* space);
-
/** Flush to disk the writes in file spaces of the given type
possibly cached by the OS. */
void fil_flush_file_spaces();
@@ -1846,23 +1788,6 @@ inline bool fil_names_write_if_was_clean(fil_space_t* space)
return(was_clean);
}
-/** During crash recovery, open a tablespace if it had not been opened
-yet, to get valid size and flags.
-@param[in,out] space tablespace */
-inline void fil_space_open_if_needed(fil_space_t* space)
-{
- ut_ad(recv_recovery_is_on());
-
- if (space->size == 0) {
- /* Initially, size and flags will be set to 0,
- until the files are opened for the first time.
- fil_space_get_size() will open the file
- and adjust the size and flags. */
- ut_d(ulint size =) fil_space_get_size(space->id);
- ut_ad(size == space->size);
- }
-}
-
/** On a log checkpoint, reset fil_names_dirty_and_write() flags
and write out FILE_MODIFY and FILE_CHECKPOINT if needed.
@param[in] lsn checkpoint LSN
diff --git a/storage/innobase/include/fsp0types.h b/storage/innobase/include/fsp0types.h
index 5057ed98aba..f8e4c06baae 100644
--- a/storage/innobase/include/fsp0types.h
+++ b/storage/innobase/include/fsp0types.h
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2014, 2019, MariaDB Corporation.
+Copyright (c) 2014, 2020, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -24,9 +24,7 @@ File space management types
Created May 26, 2009 Vasil Dimov
*******************************************************/
-#ifndef fsp0types_h
-#define fsp0types_h
-
+#pragma once
#include <cstddef>
/** The fil_space_t::id of the redo log. All persistent tablespaces
@@ -402,4 +400,6 @@ in full crc32 format. */
/* @} */
-#endif /* fsp0types_h */
+struct fil_node_t;
+struct fil_space_t;
+class buf_page_t;
diff --git a/storage/innobase/include/os0api.h b/storage/innobase/include/os0api.h
deleted file mode 100644
index bd9dc5b73a1..00000000000
--- a/storage/innobase/include/os0api.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/***********************************************************************
-
-Copyright (c) 2017, 2019, MariaDB Corporation.
-
-This program is free software; you can redistribute it and/or modify it
-under the terms of the GNU General Public License as published by the
-Free Software Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
-Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
-
-***********************************************************************/
-
-/**************************************************//**
-@file os0api.h
-The interface to the helper functions.
-These functions are used on os0file.h where
-including full full header is not feasible and
-implemented on buf0buf.cc and fil0fil.cc.
-*******************************************************/
-
-#ifndef OS_API_H
-#define OS_API_H 1
-
-/** Page control block */
-class buf_page_t;
-
-/** File Node */
-struct fil_node_t;
-
-/**
-Calculate the length of trim (punch_hole) operation.
-@param[in] bpage Page control block
-@param[in] write_length Write length
-@return length of the trim or zero. */
-ulint
-buf_page_get_trim_length(
- const buf_page_t* bpage,
- ulint write_length)
- MY_ATTRIBUTE((warn_unused_result));
-
-#endif /* OS_API_H */
diff --git a/storage/innobase/include/os0file.h b/storage/innobase/include/os0file.h
index def091c9771..0db22abfb19 100644
--- a/storage/innobase/include/os0file.h
+++ b/storage/innobase/include/os0file.h
@@ -37,7 +37,6 @@ Created 10/21/1995 Heikki Tuuri
#define os0file_h
#include "fsp0types.h"
-#include "os0api.h"
#include "tpool.h"
#ifndef _WIN32
@@ -46,10 +45,6 @@ Created 10/21/1995 Heikki Tuuri
#include <time.h>
#endif /* !_WIN32 */
-/** File node of a tablespace or the log data space */
-struct fil_node_t;
-struct fil_space_t;
-
extern bool os_has_said_disk_full;
/** File offset in bytes */
@@ -188,117 +183,75 @@ The I/O context that is passed down to the low level IO code */
class IORequest
{
public:
- constexpr IORequest(ulint type= READ, buf_page_t *bpage= nullptr,
- bool lru= false) :
- m_bpage(bpage), m_type(static_cast<uint16_t>(type)), m_LRU(lru) {}
-
- /** Flags passed in the request, they can be ORred together. */
- enum {
- READ = 1,
- WRITE = 2,
-
- /** Double write buffer recovery. */
- DBLWR_RECOVER = 4,
-
- /** Enumarations below can be ORed to READ/WRITE above*/
-
- /** Data file */
- DATA_FILE = 8,
-
- /** Disable partial read warnings */
- DISABLE_PARTIAL_IO_WARNINGS = 32,
-
- /** Use punch hole if available*/
- PUNCH_HOLE = 64,
- };
-
- /** @return true if it is a read request */
- bool is_read() const
- MY_ATTRIBUTE((warn_unused_result))
- {
- return((m_type & READ) == READ);
- }
-
- /** @return true if it is a write request */
- bool is_write() const
- MY_ATTRIBUTE((warn_unused_result))
- {
- return((m_type & WRITE) == WRITE);
- }
-
- /** @return true if partial read warning disabled */
- bool is_partial_io_warning_disabled() const
- MY_ATTRIBUTE((warn_unused_result))
- {
- return !!(m_type & DISABLE_PARTIAL_IO_WARNINGS);
- }
-
- /** @return true if punch hole should be used */
- bool punch_hole() const
- MY_ATTRIBUTE((warn_unused_result))
- {
- return((m_type & PUNCH_HOLE) == PUNCH_HOLE);
- }
-
- /** @return true if the read should be validated */
- bool validate() const
- MY_ATTRIBUTE((warn_unused_result))
- {
- return(is_read() ^ is_write());
- }
-
- /** Set the pointer to file node for IO
- @param[in] node File node */
- void set_fil_node(fil_node_t *node) { m_fil_node= node; }
-
- bool operator==(const IORequest& rhs) const
- {
- return(m_type == rhs.m_type);
- }
-
- /** @return true if the request is from the dblwr recovery */
- bool is_dblwr_recover() const
- MY_ATTRIBUTE((warn_unused_result))
- {
- return((m_type & DBLWR_RECOVER) == DBLWR_RECOVER);
- }
-
- ulint get_trim_length(ulint write_length) const
- {
- return (m_bpage ?
- buf_page_get_trim_length(m_bpage, write_length)
- : 0);
- }
-
- inline bool should_punch_hole() const;
-
- /** Free storage space associated with a section of the file.
- @param[in] fh Open file handle
- @param[in] off Starting offset (SEEK_SET)
- @param[in] len Size of the hole
- @return DB_SUCCESS or error code */
- dberr_t punch_hole(os_file_t fh, os_offset_t off, ulint len);
-
- /** @return type of page flush (for writes) */
- bool is_LRU() const { return m_LRU; }
+ enum Type
+ {
+ /** Synchronous read */
+ READ_SYNC= 2,
+ /** Asynchronous read; some errors will be ignored */
+ READ_ASYNC= READ_SYNC | 1,
+ /** Possibly partial read; only used with
+ os_file_read_no_error_handling() */
+ READ_MAYBE_PARTIAL= READ_SYNC | 4,
+ /** Read for doublewrite buffer recovery */
+ DBLWR_RECOVER= READ_SYNC | 8,
+ /** Synchronous write */
+ WRITE_SYNC= 16,
+ /** Asynchronous write */
+ WRITE_ASYNC= WRITE_SYNC | 1,
+ /** Write data; evict the block on write completion */
+ WRITE_LRU= WRITE_ASYNC | 32,
+ /** Write data and punch hole for the rest */
+ PUNCH= WRITE_ASYNC | 64,
+ /** Write data and punch hole; evict the block on write completion */
+ PUNCH_LRU= PUNCH | WRITE_LRU,
+ /** Zero out a range of bytes in fil_space_t::io() */
+ PUNCH_RANGE= WRITE_SYNC | 128,
+ };
+
+ constexpr IORequest(Type type= READ_SYNC, buf_page_t *bpage= nullptr) :
+ bpage(bpage), type(type) {}
+
+ constexpr IORequest(const IORequest &old, fil_node_t *node= nullptr) :
+ bpage(old.bpage), node(node), type(old.type) {}
+
+ bool is_read() const { return (type & READ_SYNC) != 0; }
+ bool is_write() const { return (type & WRITE_SYNC) != 0; }
+ bool is_LRU() const { return (type & (WRITE_LRU ^ WRITE_ASYNC)) != 0; }
+ bool is_async() const { return (type & (READ_SYNC ^ READ_ASYNC)) != 0; }
+
+ /** If requested, free storage space associated with a section of the file.
+ @param off byte offset from the start (SEEK_SET)
+ @param len size of the hole in bytes
+ @return DB_SUCCESS or error code */
+ dberr_t maybe_punch_hole(os_offset_t off, ulint len)
+ {
+ return off && len && node && (type & (PUNCH ^ WRITE_ASYNC))
+ ? punch_hole(off, len)
+ : DB_SUCCESS;
+ }
private:
- /** Page to be written on write operation. */
- buf_page_t* const m_bpage= nullptr;
+ /** Free storage space associated with a section of the file.
+ @param off byte offset from the start (SEEK_SET)
+ @param len size of the hole in bytes
+ @return DB_SUCCESS or error code */
+ dberr_t punch_hole(os_offset_t off, ulint len) const
+ MY_ATTRIBUTE((nonnull));
- /** File node */
- fil_node_t* m_fil_node= nullptr;
+public:
+ /** Page to be written on write operation */
+ buf_page_t* const bpage= nullptr;
- /** Request type bit flags */
- const uint16_t m_type;
+ /** File descriptor */
+ const fil_node_t *const node= nullptr;
- /** for writes, type of page flush */
- const bool m_LRU= false;
+ /** Request type bit flags */
+ const Type type;
};
-constexpr IORequest IORequestRead(IORequest::READ);
-constexpr IORequest IORequestWrite(IORequest::WRITE);
-
+constexpr IORequest IORequestRead(IORequest::READ_SYNC);
+constexpr IORequest IORequestReadPartial(IORequest::READ_MAYBE_PARTIAL);
+constexpr IORequest IORequestWrite(IORequest::WRITE_SYNC);
/** Sparse file size information. */
struct os_file_size_t {
@@ -313,20 +266,6 @@ struct os_file_size_t {
/** Win NT does not allow more than 64 */
static const ulint OS_AIO_N_PENDING_IOS_PER_THREAD = 256;
-/** Modes for aio operations @{ */
-/** Normal asynchronous i/o not for ibuf pages or ibuf bitmap pages */
-static const ulint OS_AIO_NORMAL = 21;
-
-/** Asynchronous i/o for ibuf pages or ibuf bitmap pages */
-static const ulint OS_AIO_IBUF = 22;
-
-/**Calling thread will wait for the i/o to complete,
-and perform IO completion routine itself;
-can be used for any pages, ibuf or non-ibuf. This is used to save
-CPU time, as we can do with fewer thread switches. */
-static const ulint OS_AIO_SYNC = 24;
-/* @} */
-
extern ulint os_n_file_reads;
extern ulint os_n_file_writes;
extern ulint os_n_fsyncs;
@@ -669,9 +608,9 @@ The wrapper functions have the prefix of "innodb_". */
# define os_file_close(file) \
pfs_os_file_close_func(file, __FILE__, __LINE__)
-# define os_aio(type, mode, name, file, buf, offset, \
+# define os_aio(type, name, file, buf, offset, \
n, read_only, message1, message2) \
- pfs_os_aio_func(type, mode, name, file, buf, offset, \
+ pfs_os_aio_func(type, name, file, buf, offset, \
n, read_only, message1, message2, \
__FILE__, __LINE__)
@@ -859,7 +798,6 @@ function!
Performance schema wrapper function of os_aio() which requests
an asynchronous I/O operation.
@param[in,out] type IO request context
-@param[in] mode IO mode
@param[in] name Name of the file or path as NUL terminated
string
@param[in] file Open file handle
@@ -879,8 +817,7 @@ an asynchronous I/O operation.
UNIV_INLINE
dberr_t
pfs_os_aio_func(
- IORequest& type,
- ulint mode,
+ const IORequest&type,
const char* name,
pfs_os_file_t file,
void* buf,
@@ -1013,9 +950,9 @@ to original un-instrumented file I/O APIs */
# define os_file_close(file) os_file_close_func(file)
-# define os_aio(type, mode, name, file, buf, offset, \
+# define os_aio(type, name, file, buf, offset, \
n, read_only, message1, message2) \
- os_aio_func(type, mode, name, file, buf, offset, \
+ os_aio_func(type, name, file, buf, offset, \
n, read_only, message1, message2)
# define os_file_read(type, file, buf, offset, n) \
@@ -1281,7 +1218,6 @@ struct os_aio_userdata_t
NOTE! Use the corresponding macro os_aio(), not directly this function!
Requests an asynchronous i/o operation.
@param[in,out] type IO request context
-@param[in] mode IO mode
@param[in] name Name of the file or path as NUL terminated
string
@param[in] file Open file handle
@@ -1298,8 +1234,7 @@ Requests an asynchronous i/o operation.
@return DB_SUCCESS or error code */
dberr_t
os_aio_func(
- IORequest& type,
- ulint mode,
+ const IORequest&type,
const char* name,
pfs_os_file_t file,
void* buf,
diff --git a/storage/innobase/include/os0file.ic b/storage/innobase/include/os0file.ic
index f950113f3c7..1382b79bc12 100644
--- a/storage/innobase/include/os0file.ic
+++ b/storage/innobase/include/os0file.ic
@@ -206,7 +206,6 @@ function!
Performance schema wrapper function of os_aio() which requests
an asynchronous i/o operation.
@param[in,type] type IO request context
-@param[in] mode IO mode
@param[in] name Name of the file or path as NUL terminated
string
@param[in] file Open file handle
@@ -226,8 +225,7 @@ an asynchronous i/o operation.
UNIV_INLINE
dberr_t
pfs_os_aio_func(
- IORequest& type,
- ulint mode,
+ const IORequest&type,
const char* name,
pfs_os_file_t file,
void* buf,
@@ -242,8 +240,6 @@ pfs_os_aio_func(
PSI_file_locker_state state;
struct PSI_file_locker* locker = NULL;
- ut_ad(type.validate());
-
/* Register the read or write I/O depending on "type" */
register_pfs_file_io_begin(
&state, locker, file, n,
@@ -251,7 +247,7 @@ pfs_os_aio_func(
src_file, src_line);
dberr_t result = os_aio_func(
- type, mode, name, file, buf, offset, n, read_only, m1, m2);
+ type, name, file, buf, offset, n, read_only, m1, m2);
register_pfs_file_io_end(locker, n);
@@ -284,8 +280,6 @@ pfs_os_file_read_func(
PSI_file_locker_state state;
struct PSI_file_locker* locker = NULL;
- ut_ad(type.validate());
-
register_pfs_file_io_begin(
&state, locker, file, n, PSI_FILE_READ, src_file, src_line);
diff --git a/storage/innobase/include/trx0sys.h b/storage/innobase/include/trx0sys.h
index 0bc8b95dd77..9fe6fcfa262 100644
--- a/storage/innobase/include/trx0sys.h
+++ b/storage/innobase/include/trx0sys.h
@@ -46,10 +46,9 @@ Created 3/26/1996 Heikki Tuuri
/** Checks if a page address is the trx sys header page.
@param[in] page_id page id
@return true if trx sys header page */
-inline bool trx_sys_hdr_page(const page_id_t& page_id)
+inline bool trx_sys_hdr_page(const page_id_t page_id)
{
- return(page_id.space() == TRX_SYS_SPACE
- && page_id.page_no() == TRX_SYS_PAGE_NO);
+ return page_id == page_id_t(TRX_SYS_SPACE, TRX_SYS_PAGE_NO);
}
/*****************************************************************//**
diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc
index e3ac675cd56..1fe5c70bcf7 100644
--- a/storage/innobase/log/log0recv.cc
+++ b/storage/innobase/log/log0recv.cc
@@ -2060,7 +2060,14 @@ same_page:
const bool is_init= (b & 0x70) <= INIT_PAGE;
switch (*store) {
case STORE_IF_EXISTS:
- if (!fil_space_get_size(space_id))
+ if (fil_space_t *space= fil_space_acquire_silent(space_id))
+ {
+ const auto size= space->get_size();
+ space->release();
+ if (!size)
+ continue;
+ }
+ else
continue;
/* fall through */
case STORE_YES:
@@ -2487,7 +2494,7 @@ static void recv_read_in_area(page_id_t page_id)
if (p != page_nos) {
mutex_exit(&recv_sys.mutex);
- buf_read_recv_pages(FALSE, page_id.space(), page_nos,
+ buf_read_recv_pages(page_id.space(), page_nos,
ulint(p - page_nos));
mutex_enter(&recv_sys.mutex);
}
@@ -2513,7 +2520,7 @@ inline buf_block_t *recv_sys_t::recover_low(const page_id_t page_id,
if (end_lsn < i.lsn)
DBUG_LOG("ib_log", "skip log for page " << page_id
<< " LSN " << end_lsn << " < " << i.lsn);
- else if (fil_space_t *space= fil_space_acquire_for_io(page_id.space()))
+ else if (fil_space_t *space= fil_space_t::get_for_io(page_id.space()))
{
mtr.start();
mtr.set_log_mode(MTR_LOG_NO_REDO);
diff --git a/storage/innobase/mtr/mtr0mtr.cc b/storage/innobase/mtr/mtr0mtr.cc
index a3a2b8f4f45..e04a2af92c8 100644
--- a/storage/innobase/mtr/mtr0mtr.cc
+++ b/storage/innobase/mtr/mtr0mtr.cc
@@ -214,7 +214,7 @@ static void memo_slot_release(mtr_memo_slot_t *slot)
case MTR_MEMO_SPACE_X_LOCK:
{
fil_space_t *space= static_cast<fil_space_t*>(slot->object);
- space->committed_size= space->size;
+ space->set_committed_size();
rw_lock_x_unlock(&space->latch);
}
break;
@@ -256,7 +256,7 @@ struct ReleaseLatches {
case MTR_MEMO_SPACE_X_LOCK:
{
fil_space_t *space= static_cast<fil_space_t*>(slot->object);
- space->committed_size= space->size;
+ space->set_committed_size();
rw_lock_x_unlock(&space->latch);
}
break;
diff --git a/storage/innobase/os/os0file.cc b/storage/innobase/os/os0file.cc
index bfe18fd2519..cdf61f12ce4 100644
--- a/storage/innobase/os/os0file.cc
+++ b/storage/innobase/os/os0file.cc
@@ -135,7 +135,6 @@ public:
static io_slots *read_slots;
static io_slots *write_slots;
-static io_slots *ibuf_slots;
/** Number of retries for partial I/O's */
constexpr ulint NUM_RETRIES_ON_PARTIAL_IO = 10;
@@ -3143,14 +3142,7 @@ os_file_io(
bytes_returned += n_bytes;
- if (offset > 0
- && type.is_write()
- && type.punch_hole()) {
- *err = type.punch_hole(file, offset, n);
-
- } else {
- *err = DB_SUCCESS;
- }
+ *err = type.maybe_punch_hole(offset, n);
return(original_n);
}
@@ -3161,8 +3153,7 @@ os_file_io(
bytes_returned += n_bytes;
- if (!type.is_partial_io_warning_disabled()) {
-
+ if (type.type != IORequest::READ_MAYBE_PARTIAL) {
const char* op = type.is_read()
? "read" : "written";
@@ -3180,7 +3171,7 @@ os_file_io(
*err = DB_IO_ERROR;
- if (!type.is_partial_io_warning_disabled()) {
+ if (type.type != IORequest::READ_MAYBE_PARTIAL) {
ib::warn()
<< "Retry attempts for "
<< (type.is_read() ? "reading" : "writing")
@@ -3208,7 +3199,6 @@ os_file_pwrite(
os_offset_t offset,
dberr_t* err)
{
- ut_ad(type.validate());
ut_ad(type.is_write());
++os_n_file_writes;
@@ -3242,7 +3232,6 @@ os_file_write_func(
{
dberr_t err;
- ut_ad(type.validate());
ut_ad(n > 0);
WAIT_ALLOW_WRITES();
@@ -3332,7 +3321,6 @@ os_file_read_page(
os_bytes_read_since_printout += n;
- ut_ad(type.validate());
ut_ad(n > 0);
ssize_t n_bytes = os_file_pread(type, file, buf, n, offset, &err);
@@ -3657,13 +3645,9 @@ fallback:
n_bytes = buf_size;
}
- dberr_t err;
- IORequest request(IORequest::WRITE);
-
- err = os_file_write(
- request, name, file, buf, current_size, n_bytes);
-
- if (err != DB_SUCCESS) {
+ if (os_file_write(IORequestWrite, name,
+ file, buf, current_size, n_bytes) !=
+ DB_SUCCESS) {
break;
}
@@ -3786,18 +3770,11 @@ os_file_punch_hole(
#endif /* _WIN32 */
}
-inline bool IORequest::should_punch_hole() const
-{
- return m_fil_node && m_fil_node->space->punch_hole;
-}
-
/** Free storage space associated with a section of the file.
-@param[in] fh Open file handle
-@param[in] off Starting offset (SEEK_SET)
-@param[in] len Size of the hole
+@param off byte offset from the start (SEEK_SET)
+@param len size of the hole in bytes
@return DB_SUCCESS or error code */
-dberr_t
-IORequest::punch_hole(os_file_t fh, os_offset_t off, ulint len)
+dberr_t IORequest::punch_hole(os_offset_t off, ulint len) const
{
/* In this debugging mode, we act as if punch hole is supported,
and then skip any calls to actually punch a hole here.
@@ -3806,7 +3783,7 @@ IORequest::punch_hole(os_file_t fh, os_offset_t off, ulint len)
return(DB_SUCCESS);
);
- ulint trim_len = get_trim_length(len);
+ ulint trim_len = bpage ? bpage->physical_size() - len : 0;
if (trim_len == 0) {
return(DB_SUCCESS);
@@ -3816,11 +3793,11 @@ IORequest::punch_hole(os_file_t fh, os_offset_t off, ulint len)
/* Check does file system support punching holes for this
tablespace. */
- if (!should_punch_hole()) {
+ if (!node->space->punch_hole) {
return DB_IO_NO_PUNCH_HOLE;
}
- dberr_t err = os_file_punch_hole(fh, off, trim_len);
+ dberr_t err = os_file_punch_hole(node->handle, off, trim_len);
if (err == DB_SUCCESS) {
srv_stats.page_compressed_trim_op.inc();
@@ -3828,7 +3805,7 @@ IORequest::punch_hole(os_file_t fh, os_offset_t off, ulint len)
/* If punch hole is not supported,
set space so that it is not used. */
if (err == DB_IO_NO_PUNCH_HOLE) {
- m_fil_node->space->punch_hole = false;
+ node->space->punch_hole = false;
err = DB_SUCCESS;
}
}
@@ -3885,12 +3862,8 @@ static void io_callback(tpool::aiocb* cb)
os_aio_userdata_t data(cb->m_userdata);
/* Return cb back to cache*/
if (cb->m_opcode == tpool::aio_opcode::AIO_PREAD) {
- if (read_slots->contains(cb)) {
- read_slots->release(cb);
- } else {
- ut_ad(ibuf_slots->contains(cb));
- ibuf_slots->release(cb);
- }
+ ut_ad(read_slots->contains(cb));
+ read_slots->release(cb);
} else {
ut_ad(write_slots->contains(cb));
write_slots->release(cb);
@@ -4033,8 +4006,7 @@ bool os_aio_init(ulint n_reader_threads, ulint n_writer_threads, ulint)
{
int max_write_events= int(n_writer_threads * OS_AIO_N_PENDING_IOS_PER_THREAD);
int max_read_events= int(n_reader_threads * OS_AIO_N_PENDING_IOS_PER_THREAD);
- int max_ibuf_events = 1 * OS_AIO_N_PENDING_IOS_PER_THREAD;
- int max_events = max_read_events + max_write_events + max_ibuf_events;
+ int max_events = max_read_events + max_write_events;
int ret;
#if LINUX_NATIVE_AIO
@@ -4053,7 +4025,6 @@ bool os_aio_init(ulint n_reader_threads, ulint n_writer_threads, ulint)
}
read_slots = new io_slots(max_read_events, (uint)n_reader_threads);
write_slots = new io_slots(max_write_events, (uint)n_writer_threads);
- ibuf_slots = new io_slots(max_ibuf_events, 1);
return true;
}
@@ -4062,10 +4033,8 @@ void os_aio_free()
srv_thread_pool->disable_aio();
delete read_slots;
delete write_slots;
- delete ibuf_slots;
read_slots= nullptr;
write_slots= nullptr;
- ibuf_slots= nullptr;
}
/** Waits until there are no pending writes. There can
@@ -4088,7 +4057,6 @@ void os_aio_wait_until_no_pending_writes()
NOTE! Use the corresponding macro os_aio(), not directly this function!
Requests an asynchronous i/o operation.
@param[in,out] type IO request context
-@param[in] mode IO mode
@param[in] name Name of the file or path as NUL terminated
string
@param[in] file Open file handle
@@ -4106,8 +4074,7 @@ Requests an asynchronous i/o operation.
@return DB_SUCCESS or error code */
dberr_t
os_aio_func(
- IORequest& type,
- ulint mode,
+ const IORequest&type,
const char* name,
pfs_os_file_t file,
void* buf,
@@ -4126,10 +4093,7 @@ os_aio_func(
ut_ad((n & 0xFFFFFFFFUL) == n);
#endif /* WIN_ASYNC_IO */
- DBUG_EXECUTE_IF("ib_os_aio_func_io_failure_28",
- mode = OS_AIO_SYNC; os_has_said_disk_full = FALSE;);
-
- if (mode == OS_AIO_SYNC) {
+ if (!type.is_async()) {
if (type.is_read()) {
return(os_file_read_func(type, file, buf, offset, n));
}
@@ -4140,21 +4104,15 @@ os_aio_func(
}
if (type.is_read()) {
- ++os_n_file_reads;
- } else if (type.is_write()) {
- ++os_n_file_writes;
+ ++os_n_file_reads;
} else {
- ut_error;
+ ut_ad(type.is_write());
+ ++os_n_file_writes;
}
compile_time_assert(sizeof(os_aio_userdata_t) <= tpool::MAX_AIO_USERDATA_LEN);
os_aio_userdata_t userdata{m1,type,m2};
- io_slots* slots;
- if (type.is_read()) {
- slots = mode == OS_AIO_IBUF?ibuf_slots: read_slots;
- } else {
- slots = write_slots;
- }
+ io_slots* slots= type.is_read() ? read_slots : write_slots;
tpool::aiocb* cb = slots->acquire();
cb->m_buffer = buf;
@@ -4462,12 +4420,11 @@ void fil_node_t::find_metadata(os_file_t file
}
/** Read the first page of a data file.
-@param[in] first whether this is the very first read
@return whether the page was found valid */
-bool fil_node_t::read_page0(bool first)
+bool fil_node_t::read_page0()
{
ut_ad(mutex_own(&fil_system.mutex));
- const ulint psize = space->physical_size();
+ const unsigned psize = space->physical_size();
#ifndef _WIN32
struct stat statbuf;
if (fstat(handle, &statbuf)) {
@@ -4479,7 +4436,7 @@ bool fil_node_t::read_page0(bool first)
os_offset_t size_bytes = os_file_get_size(handle);
ut_a(size_bytes != (os_offset_t) -1);
#endif
- const ulint min_size = FIL_IBD_FILE_INITIAL_SIZE * psize;
+ const uint32_t min_size = FIL_IBD_FILE_INITIAL_SIZE * psize;
if (size_bytes < min_size) {
ib::error() << "The size of the file " << name
@@ -4506,7 +4463,7 @@ corrupted:
const uint32_t size = fsp_header_get_field(page, FSP_SIZE);
const uint32_t free_limit = fsp_header_get_field(page, FSP_FREE_LIMIT);
const uint32_t free_len = flst_get_len(FSP_HEADER_OFFSET + FSP_FREE
- + page);
+ + page);
if (!fil_space_t::is_valid_flags(flags, space->id)) {
ulint cflags = fsp_flags_convert_from_101(flags);
if (cflags == ULINT_UNDEFINED) {
@@ -4546,41 +4503,26 @@ invalid:
return false;
}
- if (first) {
- ut_ad(space->id != TRX_SYS_SPACE);
#ifdef UNIV_LINUX
- find_metadata(handle, &statbuf);
+ find_metadata(handle, &statbuf);
#else
- find_metadata();
+ find_metadata();
#endif
+ /* Truncate the size to a multiple of extent size. */
+ ulint mask = psize * FSP_EXTENT_SIZE - 1;
- /* Truncate the size to a multiple of extent size. */
- ulint mask = psize * FSP_EXTENT_SIZE - 1;
-
- if (size_bytes <= mask) {
- /* .ibd files start smaller than an
- extent size. Do not truncate valid data. */
- } else {
- size_bytes &= ~os_offset_t(mask);
- }
-
- space->flags = (space->flags & FSP_FLAGS_MEM_MASK) | flags;
-
- space->punch_hole = space->is_compressed();
- this->size = uint32_t(size_bytes / psize);
- space->committed_size = space->size += this->size;
- } else if (space->id != TRX_SYS_SPACE || space->size_in_header) {
- /* If this is not the first-time open, do nothing.
- For the system tablespace, we always get invoked as
- first=false, so we detect the true first-time-open based
- on size_in_header and proceed to initialize the data. */
- return true;
+ if (size_bytes <= mask) {
+ /* .ibd files start smaller than an
+ extent size. Do not truncate valid data. */
} else {
- /* Initialize the size of predefined tablespaces
- to FSP_SIZE. */
- space->committed_size = size;
+ size_bytes &= ~os_offset_t(mask);
}
+ space->flags = (space->flags & FSP_FLAGS_MEM_MASK) | flags;
+
+ space->punch_hole = space->is_compressed();
+ this->size = uint32_t(size_bytes / psize);
+ space->set_sizes(this->size);
ut_ad(space->free_limit == 0 || space->free_limit == free_limit);
ut_ad(space->free_len == 0 || space->free_len == free_len);
space->size_in_header = size;
diff --git a/storage/innobase/row/row0import.cc b/storage/innobase/row/row0import.cc
index 03706d9ae99..8376fbb4ba6 100644
--- a/storage/innobase/row/row0import.cc
+++ b/storage/innobase/row/row0import.cc
@@ -3424,8 +3424,7 @@ fil_iterate(
byte* const writeptr = readptr;
err = os_file_read_no_error_handling(
- IORequest(IORequest::READ
- | IORequest::DISABLE_PARTIAL_IO_WARNINGS),
+ IORequestReadPartial,
iter.file, readptr, offset, n_bytes, 0);
if (err != DB_SUCCESS) {
ib::error() << iter.filepath
@@ -3664,9 +3663,7 @@ not_encrypted:
/* A page was updated in the set, write back to disk. */
if (updated) {
- IORequest write_request(IORequest::WRITE);
-
- err = os_file_write(write_request,
+ err = os_file_write(IORequestWrite,
iter.filepath, iter.file,
writeptr, offset, n_bytes);
@@ -3759,10 +3756,8 @@ fil_tablespace_iterate(
/* Read the first page and determine the page and zip size. */
- err = os_file_read_no_error_handling(
- IORequest(IORequest::READ
- | IORequest::DISABLE_PARTIAL_IO_WARNINGS),
- file, page, 0, srv_page_size, 0);
+ err = os_file_read_no_error_handling(IORequestReadPartial,
+ file, page, 0, srv_page_size, 0);
if (err == DB_SUCCESS) {
err = callback.init(file_size, block);
diff --git a/storage/innobase/row/row0quiesce.cc b/storage/innobase/row/row0quiesce.cc
index 0cddde4b3ca..0bdf52dfd56 100644
--- a/storage/innobase/row/row0quiesce.cc
+++ b/storage/innobase/row/row0quiesce.cc
@@ -545,7 +545,7 @@ row_quiesce_table_start(
if (!trx_is_interrupted(trx)) {
/* Ensure that all asynchronous IO is completed. */
os_aio_wait_until_no_pending_writes();
- fil_flush(table->space_id);
+ table->space->flush();
if (row_quiesce_write_cfg(table, trx->mysql_thd)
!= DB_SUCCESS) {
diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc
index 1746d351263..d4ee4dc3c4b 100644
--- a/storage/innobase/srv/srv0start.cc
+++ b/storage/innobase/srv/srv0start.cc
@@ -229,10 +229,12 @@ srv_file_check_mode(
static const char INIT_LOG_FILE0[]= "101";
/** Creates log file.
-@param[in] lsn FIL_PAGE_FILE_FLUSH_LSN value
-@param[out] logfile0 name of the log file
+@param[in] create_new_db whether the database is being initialized
+@param[in] lsn FIL_PAGE_FILE_FLUSH_LSN value
+@param[out] logfile0 name of the log file
@return DB_SUCCESS or error code */
-static dberr_t create_log_file(lsn_t lsn, std::string& logfile0)
+static dberr_t create_log_file(bool create_new_db, lsn_t lsn,
+ std::string& logfile0)
{
if (srv_read_only_mode) {
ib::error() << "Cannot create log file in read-only mode";
@@ -296,7 +298,9 @@ static dberr_t create_log_file(lsn_t lsn, std::string& logfile0)
}
log_sys.log.open_file(logfile0);
- fil_open_system_tablespace_files();
+ if (!fil_system.sys_space->open(create_new_db)) {
+ return DB_ERROR;
+ }
/* Create a log checkpoint. */
log_mutex_enter();
@@ -553,8 +557,8 @@ err_exit:
fil_set_max_space_id_if_bigger(space_id);
- fil_space_t *space= fil_space_create(undo_name, space_id, fsp_flags,
- FIL_TYPE_TABLESPACE, NULL);
+ fil_space_t *space= fil_space_t::create(undo_name, space_id, fsp_flags,
+ FIL_TYPE_TABLESPACE, NULL);
ut_a(fil_validate());
ut_a(space);
@@ -563,20 +567,15 @@ err_exit:
if (create)
{
+ space->set_sizes(SRV_UNDO_TABLESPACE_SIZE_IN_PAGES);
space->size= file->size= uint32_t(size >> srv_page_size_shift);
- space->size_in_header= SRV_UNDO_TABLESPACE_SIZE_IN_PAGES;
- space->committed_size= SRV_UNDO_TABLESPACE_SIZE_IN_PAGES;
}
- else
+ else if (!file->read_page0())
{
- success= file->read_page0(true);
- if (!success)
- {
- os_file_close(file->handle);
- file->handle= OS_FILE_CLOSED;
- ut_a(fil_system.n_open > 0);
- fil_system.n_open--;
- }
+ os_file_close(file->handle);
+ file->handle= OS_FILE_CLOSED;
+ ut_a(fil_system.n_open > 0);
+ fil_system.n_open--;
}
mutex_exit(&fil_system.mutex);
@@ -803,7 +802,7 @@ srv_open_tmp_tablespace(bool create_new_db)
true, create_new_db, &sum_of_new_sizes, NULL))
!= DB_SUCCESS) {
ib::error() << "Unable to create the shared innodb_temporary";
- } else if (fil_system.temp_space->open()) {
+ } else if (fil_system.temp_space->open(true)) {
/* Initialize the header page */
mtr_t mtr;
mtr.start();
@@ -1304,7 +1303,7 @@ dberr_t srv_start(bool create_new_db)
log_sys.set_flushed_lsn(flushed_lsn);
buf_flush_sync();
- err = create_log_file(flushed_lsn, logfile0);
+ err = create_log_file(true, flushed_lsn, logfile0);
if (err != DB_SUCCESS) {
return(srv_init_abort(err));
@@ -1333,7 +1332,7 @@ dberr_t srv_start(bool create_new_db)
srv_log_file_size = srv_log_file_size_requested;
- err = create_log_file(flushed_lsn, logfile0);
+ err = create_log_file(false, flushed_lsn, logfile0);
if (err == DB_SUCCESS) {
err = create_log_file_rename(flushed_lsn,
@@ -1364,11 +1363,11 @@ dberr_t srv_start(bool create_new_db)
file_checked:
/* Open log file and data files in the systemtablespace: we keep
them open until database shutdown */
-
- fil_open_system_tablespace_files();
ut_d(fil_system.sys_space->recv_size = srv_sys_space_size_debug);
- err = srv_undo_tablespaces_init(create_new_db);
+ err = fil_system.sys_space->open(create_new_db)
+ ? srv_undo_tablespaces_init(create_new_db)
+ : DB_ERROR;
/* If the force recovery is set very high then we carry on regardless
of all errors. Basically this is fingers crossed mode. */
@@ -1673,7 +1672,7 @@ file_checked:
srv_log_file_size = srv_log_file_size_requested;
- err = create_log_file(flushed_lsn, logfile0);
+ err = create_log_file(false, flushed_lsn, logfile0);
if (err == DB_SUCCESS) {
err = create_log_file_rename(flushed_lsn,
diff --git a/storage/innobase/trx/trx0purge.cc b/storage/innobase/trx/trx0purge.cc
index f9f564e1841..c0375f25fa6 100644
--- a/storage/innobase/trx/trx0purge.cc
+++ b/storage/innobase/trx/trx0purge.cc
@@ -584,11 +584,10 @@ static void trx_purge_truncate_history()
: 0, j = i;; ) {
ulint space_id = srv_undo_space_id_start + i;
ut_ad(srv_is_undo_tablespace(space_id));
+ fil_space_t* space= fil_space_get(space_id);
- if (fil_space_get_size(space_id)
- > threshold) {
- purge_sys.truncate.current
- = fil_space_get(space_id);
+ if (space && space->get_size() > threshold) {
+ purge_sys.truncate.current = space;
break;
}