diff options
Diffstat (limited to 'storage/innobase/fil/fil0fil.cc')
-rw-r--r-- | storage/innobase/fil/fil0fil.cc | 763 |
1 files changed, 343 insertions, 420 deletions
diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc index 9413a2b3cba..b38899e6de4 100644 --- a/storage/innobase/fil/fil0fil.cc +++ b/storage/innobase/fil/fil0fil.cc @@ -1,7 +1,7 @@ /***************************************************************************** -Copyright (c) 1995, 2016, Oracle and/or its affiliates. -Copyright (c) 2013, 2017, MariaDB Corporation. +Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2014, 2017, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -153,7 +153,11 @@ fil_addr_t fil_addr_null = {FIL_NULL, 0}; /** The tablespace memory cache. This variable is NULL before the module is initialized. */ -fil_system_t* fil_system = NULL; +UNIV_INTERN fil_system_t* fil_system = NULL; + +/** At this age or older a space/page will be rotated */ +UNIV_INTERN extern uint srv_fil_crypt_rotate_key_age; +UNIV_INTERN extern ib_mutex_t fil_crypt_threads_mutex; /** Determine if user has explicitly disabled fsync(). */ # define fil_buffering_disabled(s) \ @@ -241,18 +245,12 @@ fil_node_prepare_for_io( fil_system_t* system, /*!< in: tablespace memory cache */ fil_space_t* space); /*!< in: space */ -/** -Updates the data structures when an i/o operation finishes. Updates the -pending i/o's field in the node appropriately. +/** Update the data structures when an i/o operation finishes. @param[in,out] node file node -@param[in,out] system tablespace instance @param[in] type IO context */ static void -fil_node_complete_io( - fil_node_t* node, - fil_system_t* system, - const IORequest& type); +fil_node_complete_io(fil_node_t* node, const IORequest& type); /** Reads data from a space to a buffer. Remember that the possible incomplete blocks at the end of file are ignored: they are not taken into account when @@ -654,6 +652,18 @@ retry: page, FSP_FREE_LIMIT); const ulint free_len = flst_get_len( FSP_HEADER_OFFSET + FSP_FREE + page); + + /* Try to read crypt_data from page 0 if it is not yet + read. FIXME: Remove page_0_crypt_read, and simply ensure in + fil_space_t object creation that node->size==0 if and only + if the crypt_data is not known and must be read. */ + if (!space->page_0_crypt_read) { + space->page_0_crypt_read = true; + ut_ad(space->crypt_data == NULL); + space->crypt_data = fil_space_read_crypt_data( + page_size_t(space->flags), page); + } + ut_free(buf2); os_file_close(node->handle); @@ -993,61 +1003,6 @@ skip_flush: space->n_pending_flushes--; } -/** -Fill the pages with NULs -@param[in] node File node -@param[in] page_size physical page size -@param[in] start Offset from the start of the file in bytes -@param[in] len Length in bytes -@param[in] read_only_mode - if true, then read only mode checks are enforced. -@return DB_SUCCESS or error code */ -static -dberr_t -fil_write_zeros( - const fil_node_t* node, - ulint page_size, - os_offset_t start, - ulint len, - bool read_only_mode) -{ - ut_a(len > 0); - - /* Extend at most 1M at a time */ - ulint n_bytes = ut_min(static_cast<ulint>(1024 * 1024), len); - byte* ptr = reinterpret_cast<byte*>(ut_zalloc_nokey(n_bytes - + page_size)); - byte* buf = reinterpret_cast<byte*>(ut_align(ptr, page_size)); - - os_offset_t offset = start; - dberr_t err = DB_SUCCESS; - const os_offset_t end = start + len; - IORequest request(IORequest::WRITE); - - while (offset < end) { - err = os_aio( - request, OS_AIO_SYNC, node->name, - node->handle, buf, offset, n_bytes, read_only_mode, - NULL, NULL); - - if (err != DB_SUCCESS) { - break; - } - - offset += n_bytes; - - n_bytes = ut_min(n_bytes, static_cast<ulint>(end - offset)); - - DBUG_EXECUTE_IF("ib_crash_during_tablespace_extension", - DBUG_SUICIDE();); - } - - ut_free(ptr); - - return(err); -} - - /** Try to extend a tablespace. @param[in,out] space tablespace to be extended @param[in,out] node last file of the tablespace @@ -1098,79 +1053,177 @@ fil_space_extend_must_retry( ut_ad(size > space->size); - ulint pages_added = size - space->size; + ulint last_page_no = space->size; + const ulint file_start_page_no = last_page_no - node->size; + + /* Determine correct file block size */ + if (node->block_size == 0) { + node->block_size = os_file_get_block_size( + node->handle, node->name); + } + const page_size_t pageSize(space->flags); const ulint page_size = pageSize.physical(); - os_offset_t start = os_file_get_size(node->handle); - ut_a(start != (os_offset_t) -1); - start &= ~(page_size - 1); - const os_offset_t end - = (node->size + pages_added) * page_size; +#ifdef _WIN32 + /* Logically or physically extend the file with zero bytes, + depending on whether it is sparse. */ - *success = end <= start; + /* FIXME: Call DeviceIoControl(node->handle, FSCTL_SET_SPARSE, ...) + when opening a file when FSP_FLAGS_HAS_PAGE_COMPRESSION(). */ + { + FILE_END_OF_FILE_INFO feof; + /* fil_read_first_page() expects UNIV_PAGE_SIZE bytes. + fil_node_open_file() expects at least 4 * UNIV_PAGE_SIZE bytes. + Do not shrink short ROW_FORMAT=COMPRESSED files. */ + feof.EndOfFile.QuadPart = std::max( + os_offset_t(size - file_start_page_no) * page_size, + os_offset_t(FIL_IBD_FILE_INITIAL_SIZE + * UNIV_PAGE_SIZE)); + *success = SetFileInformationByHandle(node->handle, + FileEndOfFileInfo, + &feof, sizeof feof); + if (!*success) { + ib::error() << "extending file '" << node->name + << "' from " + << os_offset_t(node->size) * page_size + << " to " << feof.EndOfFile.QuadPart + << " bytes failed with " << GetLastError(); + } else { + last_page_no = size; + } + } +#else + /* We will logically extend the file with ftruncate() if + page_compression is enabled, because the file is expected to + be sparse in that case. Make sure that ftruncate() can deal + with large files. */ + const bool is_sparse = sizeof(off_t) >= 8 + && FSP_FLAGS_HAS_PAGE_COMPRESSION(space->flags); + + if (is_sparse) { + /* fil_read_first_page() expects UNIV_PAGE_SIZE bytes. + fil_node_open_file() expects at least 4 * UNIV_PAGE_SIZE bytes. + Do not shrink short ROW_FORMAT=COMPRESSED files. */ + off_t s = std::max(off_t(size - file_start_page_no) + * off_t(page_size), + off_t(FIL_IBD_FILE_INITIAL_SIZE + * UNIV_PAGE_SIZE)); + *success = !ftruncate(node->handle, s); + if (!*success) { + ib::error() << "ftruncate of file '" << node->name + << "' from " + << os_offset_t(last_page_no + - file_start_page_no) + * page_size << " to " << os_offset_t(s) + << " bytes failed with " << errno; + } else { + last_page_no = size; + } + } else { + const os_offset_t start_offset + = os_offset_t(last_page_no - file_start_page_no) + * page_size; + const ulint n_pages = size - last_page_no; + const os_offset_t len = os_offset_t(n_pages) * page_size; +# ifdef HAVE_POSIX_FALLOCATE + int err; + do { + err = posix_fallocate(node->handle, start_offset, len); + } while (err == EINTR + && srv_shutdown_state == SRV_SHUTDOWN_NONE); + + if (err != EINVAL) { + + *success = !err; + if (!*success) { + ib::error() << "extending file '" << node->name + << "' from " + << start_offset + << " to " << len + start_offset + << " bytes failed with: " << err; + } + } else +# endif /* HAVE_POSIX_FALLOCATE */ + { + /* Extend at most 1 megabyte pages at a time */ + ulint n_bytes = std::min(ulint(1) << 20, n_pages) + * page_size; + byte* buf2 = static_cast<byte*>( + calloc(1, n_bytes + page_size)); + *success = buf2 != NULL; + if (!buf2) { + ib::error() << "Cannot allocate " + << n_bytes + page_size + << " bytes to extend file"; + } + byte* const buf = static_cast<byte*>( + ut_align(buf2, page_size)); + IORequest request(IORequest::WRITE); - if (!*success) { - DBUG_EXECUTE_IF("ib_crash_during_tablespace_extension", - DBUG_SUICIDE();); -#ifdef HAVE_POSIX_FALLOCATE - /* On Linux, FusionIO atomic writes cannot extend - files, so we must use posix_fallocate(). */ - int ret = posix_fallocate(node->handle, start, - end - start); - - /* EINVAL means that fallocate() is not supported. - One known case is Linux ext3 file system with O_DIRECT. */ - if (ret == 0) { - } else if (ret != EINVAL) { - ib::error() - << "posix_fallocate(): Failed to preallocate" - " data for file " - << node->name << ", desired size " - << end << " bytes." - " Operating system error number " - << ret << ". Check" - " that the disk is not full or a disk quota" - " exceeded. Some operating system error" - " numbers are described at " REFMAN - "operating-system-error-codes.html"; - } else -#endif - if (DB_SUCCESS != fil_write_zeros( - node, page_size, start, - static_cast<ulint>(end - start), - space->purpose == FIL_TYPE_TEMPORARY - && srv_read_only_mode)) { - ib::warn() - << "Error while writing " << end - start - << " zeroes to " << node->name - << " starting at offset " << start; - } + os_offset_t offset = start_offset; + const os_offset_t end = start_offset + len; + const bool read_only_mode = space->purpose + == FIL_TYPE_TEMPORARY && srv_read_only_mode; - /* Check how many pages actually added */ - os_offset_t actual_end = os_file_get_size(node->handle); - ut_a(actual_end != static_cast<os_offset_t>(-1)); - ut_a(actual_end >= start); + while (*success && offset < end) { + dberr_t err = os_aio( + request, OS_AIO_SYNC, node->name, + node->handle, buf, offset, n_bytes, + read_only_mode, NULL, NULL); - *success = end >= actual_end; - pages_added = static_cast<ulint>( - (std::min(actual_end, end) - start) / page_size); - } + if (err != DB_SUCCESS) { + *success = false; + ib::error() << "writing zeroes to file '" + << node->name << "' from " + << offset << " to " << offset + n_bytes + << " bytes failed with: " + << ut_strerr(err); + break; + } - os_has_said_disk_full = !*success; + offset += n_bytes; - mutex_enter(&fil_system->mutex); + n_bytes = std::min(n_bytes, + static_cast<ulint>(end - offset)); + } - space->size += pages_added; + free(buf2); + } + + os_has_said_disk_full = *success; + if (*success) { + last_page_no = size; + } else { + /* Let us measure the size of the file + to determine how much we were able to + extend it */ + os_offset_t fsize = os_file_get_size(node->handle); + ut_a(fsize != os_offset_t(-1)); + + last_page_no = ulint(fsize / page_size) + + file_start_page_no; + } + } +#endif + mutex_enter(&fil_system->mutex); ut_a(node->being_extended); node->being_extended = false; - node->size += pages_added; + ut_a(last_page_no - file_start_page_no >= node->size); + + ulint file_size = last_page_no - file_start_page_no; + space->size += file_size - node->size; + node->size = file_size; const ulint pages_in_MiB = node->size & ~((1 << (20 - UNIV_PAGE_SIZE_SHIFT)) - 1); - fil_node_complete_io(node, fil_system, IORequestWrite); + fil_node_complete_io(node, +#ifndef _WIN32 + !is_sparse ? IORequestWrite : +#endif /* _WIN32 */ + IORequestRead); /* Keep the last data file size info up to date, rounded to full megabytes */ @@ -1423,6 +1476,12 @@ fil_space_detach( UT_LIST_REMOVE(fil_system->unflushed_spaces, space); } + if (space->is_in_rotation_list) { + space->is_in_rotation_list = false; + + UT_LIST_REMOVE(fil_system->rotation_list, space); + } + UT_LIST_REMOVE(fil_system->space_list, space); ut_a(space->magic_n == FIL_SPACE_MAGIC_N); @@ -1518,22 +1577,25 @@ fil_space_free( } /** Create a space memory object and put it to the fil_system hash table. -The tablespace name is independent from the tablespace file-name. Error messages are issued to the server log. -@param[in] name Tablespace name -@param[in] id Tablespace identifier -@param[in] flags Tablespace flags -@param[in] purpose Tablespace purpose +@param[in] name tablespace name +@param[in] id tablespace identifier +@param[in] flags tablespace flags +@param[in] purpose tablespace purpose +@param[in,out] crypt_data encryption information +@param[in] create_table whether this is CREATE TABLE +@param[in] mode encryption mode @return pointer to created tablespace, to be filled in with fil_node_create() @retval NULL on failure (such as when the same tablespace exists) */ fil_space_t* fil_space_create( - const char* name, - ulint id, - ulint flags, - fil_type_t purpose, - fil_space_crypt_t* crypt_data, /*!< in: crypt data */ - bool create_table) /*!< in: true if create table */ + const char* name, + ulint id, + ulint flags, + fil_type_t purpose, + fil_space_crypt_t* crypt_data, + bool create_table, + fil_encryption_t mode) { fil_space_t* space; @@ -1595,7 +1657,6 @@ fil_space_create( space->flags = flags; space->magic_n = FIL_SPACE_MAGIC_N; - space->crypt_data = crypt_data; /* In create table we write page 0 so we have already @@ -1635,7 +1696,23 @@ fil_space_create( fil_system->max_assigned_id = id; } - mutex_exit(&fil_system->mutex); + /* Inform key rotation that there could be something + to do */ + if (purpose == FIL_TYPE_TABLESPACE + && !srv_fil_crypt_rotate_key_age && fil_crypt_threads_event && + (mode == FIL_ENCRYPTION_ON || mode == FIL_ENCRYPTION_OFF || + srv_encrypt_tables)) { + /* Key rotation is not enabled, need to inform background + encryption threads. */ + UT_LIST_ADD_LAST(fil_system->rotation_list, space); + space->is_in_rotation_list = true; + mutex_exit(&fil_system->mutex); + mutex_enter(&fil_crypt_threads_mutex); + os_event_set(fil_crypt_threads_event); + mutex_exit(&fil_crypt_threads_mutex); + } else { + mutex_exit(&fil_system->mutex); + } return(space); } @@ -1750,7 +1827,7 @@ fil_space_get_space( return(NULL); } - fil_node_complete_io(node, fil_system, IORequestRead); + fil_node_complete_io(node, IORequestRead); } return(space); @@ -1972,6 +2049,7 @@ fil_init( UT_LIST_INIT(fil_system->LRU, &fil_node_t::LRU); UT_LIST_INIT(fil_system->space_list, &fil_space_t::space_list); + UT_LIST_INIT(fil_system->rotation_list, &fil_space_t::rotation_list); UT_LIST_INIT(fil_system->unflushed_spaces, &fil_space_t::unflushed_spaces); UT_LIST_INIT(fil_system->named_spaces, &fil_space_t::named_spaces); @@ -2546,9 +2624,7 @@ fil_recreate_tablespace( page_zip.m_start = #endif /* UNIV_DEBUG */ page_zip.m_end = page_zip.m_nonempty = page_zip.n_blobs = 0; - buf_flush_init_for_writing( - NULL, page, &page_zip, 0, - fsp_is_checksum_disabled(space_id)); + buf_flush_init_for_writing(NULL, page, &page_zip, 0); err = fil_write(page_id_t(space_id, 0), page_size, 0, page_size.physical(), page_zip.data); @@ -2611,7 +2687,7 @@ fil_recreate_tablespace( ut_ad(!page_size.is_compressed()); buf_flush_init_for_writing( - block, page, NULL, recv_lsn, false); + block, page, NULL, recv_lsn); err = fil_write(cur_page_id, page_size, 0, page_size.physical(), page); @@ -2625,8 +2701,7 @@ fil_recreate_tablespace( buf_block_get_page_zip(block); buf_flush_init_for_writing( - block, page, page_zip, recv_lsn, - fsp_is_checksum_disabled(space_id)); + block, page, page_zip, recv_lsn); err = fil_write(cur_page_id, page_size, 0, page_size.physical(), @@ -2854,16 +2929,22 @@ fil_check_pending_operations( mutex_enter(&fil_system->mutex); fil_space_t* sp = fil_space_get_by_id(id); + if (sp) { sp->stop_new_ops = true; + if (sp->crypt_data) { + sp->n_pending_ops++; + mutex_exit(&fil_system->mutex); + fil_space_crypt_close_tablespace(sp); + mutex_enter(&fil_system->mutex); + ut_ad(sp->n_pending_ops > 0); + sp->n_pending_ops--; + } } - mutex_exit(&fil_system->mutex); /* Check for pending operations. */ do { - mutex_enter(&fil_system->mutex); - sp = fil_space_get_by_id(id); count = fil_check_pending_ops(sp, count); @@ -2874,15 +2955,14 @@ fil_check_pending_operations( os_thread_sleep(20000); } + mutex_enter(&fil_system->mutex); } while (count > 0); /* Check for pending IO. */ *path = 0; - do { - mutex_enter(&fil_system->mutex); - + for (;;) { sp = fil_space_get_by_id(id); if (sp == NULL) { @@ -2900,11 +2980,13 @@ fil_check_pending_operations( mutex_exit(&fil_system->mutex); - if (count > 0) { - os_thread_sleep(20000); + if (count == 0) { + break; } - } while (count > 0); + os_thread_sleep(20000); + mutex_enter(&fil_system->mutex); + } ut_ad(sp); @@ -3801,9 +3883,7 @@ fil_ibd_create( if (!page_size.is_compressed()) { - buf_flush_init_for_writing( - NULL, page, NULL, 0, - fsp_is_checksum_disabled(space_id)); + buf_flush_init_for_writing(NULL, page, NULL, 0); err = os_file_write( request, path, file, page, 0, page_size.physical()); @@ -3817,9 +3897,7 @@ fil_ibd_create( page_zip.m_end = page_zip.m_nonempty = page_zip.n_blobs = 0; - buf_flush_init_for_writing( - NULL, page, &page_zip, 0, - fsp_is_checksum_disabled(space_id)); + buf_flush_init_for_writing(NULL, page, &page_zip, 0); err = os_file_write( request, path, file, page_zip.data, 0, @@ -3863,13 +3941,13 @@ fil_ibd_create( /* Create crypt data if the tablespace is either encrypted or user has requested it to remain unencrypted. */ - if (mode == FIL_SPACE_ENCRYPTION_ON || mode == FIL_SPACE_ENCRYPTION_OFF || + if (mode == FIL_ENCRYPTION_ON || mode == FIL_ENCRYPTION_OFF || srv_encrypt_tables) { crypt_data = fil_space_create_crypt_data(mode, key_id); } space = fil_space_create(name, space_id, flags, FIL_TYPE_TABLESPACE, - crypt_data, true); + crypt_data, true, mode); fil_node_t* node = NULL; @@ -4957,19 +5035,14 @@ fil_node_prepare_for_io( return(true); } -/********************************************************************//** -Updates the data structures when an i/o operation finishes. Updates the -pending i/o's field in the node appropriately. */ +/** Update the data structures when an i/o operation finishes. +@param[in,out] node file node +@param[in] type IO context */ static void -fil_node_complete_io( -/*=================*/ - fil_node_t* node, /*!< in: file node */ - fil_system_t* system, /*!< in: tablespace memory cache */ - const IORequest&type) /*!< in: IO_TYPE_*, marks the node as - modified if TYPE_IS_WRITE() */ +fil_node_complete_io(fil_node_t* node, const IORequest& type) { - ut_ad(mutex_own(&system->mutex)); + ut_ad(mutex_own(&fil_system->mutex)); ut_a(node->n_pending > 0); --node->n_pending; @@ -4981,9 +5054,9 @@ fil_node_complete_io( ut_ad(!srv_read_only_mode || fsp_is_system_temporary(node->space->id)); - ++system->modification_counter; + ++fil_system->modification_counter; - node->modification_counter = system->modification_counter; + node->modification_counter = fil_system->modification_counter; if (fil_buffering_disabled(node->space)) { @@ -4998,14 +5071,14 @@ fil_node_complete_io( node->space->is_in_unflushed_spaces = true; UT_LIST_ADD_FIRST( - system->unflushed_spaces, node->space); + fil_system->unflushed_spaces, node->space); } } if (node->n_pending == 0 && fil_space_belongs_in_lru(node->space)) { /* The node must be put back to the LRU list */ - UT_LIST_ADD_FIRST(system->LRU, node); + UT_LIST_ADD_FIRST(fil_system->LRU, node); } } @@ -5247,7 +5320,7 @@ fil_io( /* If we can tolerate the non-existent pages, we should return with DB_ERROR and let caller decide what to do. */ - fil_node_complete_io(node, fil_system, req_type); + fil_node_complete_io(node, req_type); mutex_exit(&fil_system->mutex); return(DB_ERROR); } @@ -5320,7 +5393,7 @@ fil_io( mutex_enter(&fil_system->mutex); - fil_node_complete_io(node, fil_system, req_type); + fil_node_complete_io(node, req_type); mutex_exit(&fil_system->mutex); @@ -5360,7 +5433,7 @@ fil_aio_wait( mutex_enter(&fil_system->mutex); - fil_node_complete_io(node, fil_system, type); + fil_node_complete_io(node, type); mutex_exit(&fil_system->mutex); @@ -5793,7 +5866,8 @@ fil_iterate( || page_type == FIL_PAGE_PAGE_COMPRESSED); /* If tablespace is encrypted, we need to decrypt - the page. */ + the page. Note that tablespaces are not in + fil_system during import. */ if (encrypted) { decrypted = fil_space_decrypt( iter.crypt_data, @@ -6070,9 +6144,7 @@ fil_tablespace_iterate( /* read (optional) crypt data */ iter.crypt_data = fil_space_read_crypt_data( - 0, page, FSP_HEADER_OFFSET - + fsp_header_get_encryption_offset( - callback.get_page_size())); + callback.get_page_size(), page); if (err == DB_SUCCESS) { @@ -6107,10 +6179,12 @@ fil_tablespace_iterate( err = fil_iterate(iter, block, callback); + if (iter.crypt_data) { + fil_space_destroy_crypt_data(&iter.crypt_data); + } + ut_free(io_buffer); ut_free(crypt_io_buffer); - - fil_space_destroy_crypt_data(&iter.crypt_data); } } @@ -6618,269 +6692,138 @@ fil_space_t::release_free_extents(ulint n_reserved) n_reserved_extents -= n_reserved; } -/****************************************************************** -Get crypt data for a tablespace */ -UNIV_INTERN -fil_space_crypt_t* -fil_space_get_crypt_data( -/*=====================*/ - ulint id) /*!< in: space id */ +/** Return the next fil_space_t. +Once started, the caller must keep calling this until it returns NULL. +fil_space_acquire() and fil_space_release() are invoked here which +blocks a concurrent operation from dropping the tablespace. +@param[in] prev_space Pointer to the previous fil_space_t. +If NULL, use the first fil_space_t on fil_system->space_list. +@return pointer to the next fil_space_t. +@retval NULL if this was the last*/ +fil_space_t* +fil_space_next( + fil_space_t* prev_space) { - fil_space_t* space; - fil_space_crypt_t* crypt_data = NULL; - - ut_ad(fil_system); + fil_space_t* space=prev_space; mutex_enter(&fil_system->mutex); - space = fil_space_get_by_id(id); + if (prev_space == NULL) { + space = UT_LIST_GET_FIRST(fil_system->space_list); - mutex_exit(&fil_system->mutex); + /* We can trust that space is not NULL because at least the + system tablespace is always present and loaded first. */ + space->n_pending_ops++; + } else { + ut_ad(space->n_pending_ops > 0); - if (space != NULL) { - /* If we have not yet read the page0 - of this tablespace we will do it now. */ - if (!space->crypt_data && !space->page_0_crypt_read) { - ulint space_id = space->id; - fil_node_t* node; - - ut_a(space->crypt_data == NULL); - node = UT_LIST_GET_FIRST(space->chain); - - byte *buf = static_cast<byte*>(ut_malloc(2 * UNIV_PAGE_SIZE, PSI_INSTRUMENT_ME)); - byte *page = static_cast<byte*>(ut_align(buf, UNIV_PAGE_SIZE)); - fil_read(page_id_t(space_id, 0), univ_page_size, 0, univ_page_size.physical(), - page); - ulint offset = FSP_HEADER_OFFSET - + fsp_header_get_encryption_offset( - page_size_t(space->flags)); - space->crypt_data = fil_space_read_crypt_data(space_id, page, offset); - ut_free(buf); - - DBUG_LOG("crypt", - "Read page 0 from" - << " tablespace " << space_id - << " name " << space->name - << " key_id " << (space->crypt_data - ? space->crypt_data->key_id - : 0) - << " encryption " - << (space->crypt_data - ? space->crypt_data->encryption : 0) - << " handle " << node->handle); - - ut_a(space->id == space_id); + /* Move on to the next fil_space_t */ + space->n_pending_ops--; + space = UT_LIST_GET_NEXT(space_list, space); - space->page_0_crypt_read = true; + /* Skip spaces that are being created by + fil_ibd_create(), or dropped, or !tablespace. */ + while (space != NULL + && (UT_LIST_GET_LEN(space->chain) == 0 + || space->stop_new_ops + || space->purpose != FIL_TYPE_TABLESPACE)) { + space = UT_LIST_GET_NEXT(space_list, space); } - crypt_data = space->crypt_data; - - if (!space->page_0_crypt_read) { - ib::warn() << "Space " << space->id << " name " - << space->name << " contains encryption " - << (space->crypt_data ? space->crypt_data->encryption : 0) - << " information for key_id " - << (space->crypt_data ? space->crypt_data->key_id : 0) - << " but page0 is not read."; + if (space != NULL) { + space->n_pending_ops++; } } - return(crypt_data); -} - -/*******************************************************************//** -Increments the count of pending operation, if space is not being deleted. -@return TRUE if being deleted, and operation should be skipped */ -UNIV_INTERN -ibool -fil_inc_pending_ops( -/*================*/ - ulint id, /*!< in: space id */ - ibool print_err) /*!< in: need to print error or not */ -{ - fil_space_t* space; - - mutex_enter(&fil_system->mutex); - - space = fil_space_get_by_id(id); - - if (space == NULL) { - if (print_err) { - fprintf(stderr, - "InnoDB: Error: trying to do an operation on a" - " dropped tablespace %lu\n", - (ulong) id); - } - } - - if (space == NULL || space->stop_new_ops) { - mutex_exit(&fil_system->mutex); - - return(TRUE); - } - - space->n_pending_ops++; - mutex_exit(&fil_system->mutex); - return(FALSE); + return(space); } -/*******************************************************************//** -Decrements the count of pending operations. */ -UNIV_INTERN +/** +Remove space from key rotation list if there are no more +pending operations. +@param[in,out] space Tablespace */ +static void -fil_decr_pending_ops( -/*=================*/ - ulint id) /*!< in: space id */ +fil_space_remove_from_keyrotation(fil_space_t* space) { - fil_space_t* space; - - mutex_enter(&fil_system->mutex); - - space = fil_space_get_by_id(id); - - if (space == NULL) { - fprintf(stderr, - "InnoDB: Error: decrementing pending operation" - " of a dropped tablespace %lu\n", - (ulong) id); - } + ut_ad(mutex_own(&fil_system->mutex)); + ut_ad(space); - if (space != NULL) { - space->n_pending_ops--; + if (space->n_pending_ops == 0 && space->is_in_rotation_list) { + space->is_in_rotation_list = false; + ut_a(UT_LIST_GET_LEN(fil_system->rotation_list) > 0); + UT_LIST_REMOVE(fil_system->rotation_list, space); } - - mutex_exit(&fil_system->mutex); } -/****************************************************************** -Set crypt data for a tablespace */ -UNIV_INTERN -fil_space_crypt_t* -fil_space_set_crypt_data( -/*=====================*/ - ulint id, /*!< in: space id */ - fil_space_crypt_t* crypt_data) /*!< in: crypt data */ -{ - fil_space_t* space; - fil_space_crypt_t* free_crypt_data = NULL; - fil_space_crypt_t* ret_crypt_data = NULL; - ut_ad(fil_system); +/** Return the next fil_space_t from key rotation list. +Once started, the caller must keep calling this until it returns NULL. +fil_space_acquire() and fil_space_release() are invoked here which +blocks a concurrent operation from dropping the tablespace. +@param[in] prev_space Pointer to the previous fil_space_t. +If NULL, use the first fil_space_t on fil_system->space_list. +@return pointer to the next fil_space_t. +@retval NULL if this was the last*/ +fil_space_t* +fil_space_keyrotate_next( + fil_space_t* prev_space) +{ + fil_space_t* space = prev_space; + fil_space_t* old = NULL; mutex_enter(&fil_system->mutex); - space = fil_space_get_by_id(id); - - if (space != NULL) { - if (space->crypt_data != NULL) { - /* Here we need to release fil_system mutex to - avoid mutex deadlock assertion. Here we would - taje mutexes in order fil_system, crypt_data and - in fil_crypt_start_encrypting_space we would - take them in order crypt_data, fil_system - at fil_space_get_flags -> fil_space_get_space */ - mutex_exit(&fil_system->mutex); - fil_space_merge_crypt_data(space->crypt_data, - crypt_data); - ret_crypt_data = space->crypt_data; - free_crypt_data = crypt_data; - } else { - space->crypt_data = crypt_data; - ret_crypt_data = space->crypt_data; - mutex_exit(&fil_system->mutex); + if (UT_LIST_GET_LEN(fil_system->rotation_list) == 0) { + if (space) { + ut_ad(space->n_pending_ops > 0); + space->n_pending_ops--; + fil_space_remove_from_keyrotation(space); } - } else { - /* there is a small risk that tablespace has been deleted */ - free_crypt_data = crypt_data; mutex_exit(&fil_system->mutex); + return(NULL); } - if (free_crypt_data != NULL) { - /* there was already crypt data present and the new crypt - * data provided as argument to this function has been merged - * into that => free new crypt data - */ - fil_space_destroy_crypt_data(&free_crypt_data); - } - - return ret_crypt_data; -} + if (prev_space == NULL) { + space = UT_LIST_GET_FIRST(fil_system->rotation_list); -/****************************************************************** -Get id of first tablespace that has node or ULINT_UNDEFINED if none */ -UNIV_INTERN -ulint -fil_get_first_space_safe() -/*======================*/ -{ - ulint out_id = ULINT_UNDEFINED; - fil_space_t* space; + /* We can trust that space is not NULL because we + checked list length above */ + } else { + ut_ad(space->n_pending_ops > 0); - mutex_enter(&fil_system->mutex); + /* Move on to the next fil_space_t */ + space->n_pending_ops--; - space = UT_LIST_GET_FIRST(fil_system->space_list); - if (space != NULL) { - do - { - if (!space->stop_new_ops && UT_LIST_GET_LEN(space->chain) > 0) { - out_id = space->id; - break; - } + old = space; + space = UT_LIST_GET_NEXT(rotation_list, space); - space = UT_LIST_GET_NEXT(space_list, space); - } while (space != NULL); + fil_space_remove_from_keyrotation(old); } - mutex_exit(&fil_system->mutex); - - return out_id; -} - -/****************************************************************** -Get id of next tablespace that has node or ULINT_UNDEFINED if none */ -UNIV_INTERN -ulint -fil_get_next_space_safe( -/*====================*/ - ulint id) /*!< in: previous space id */ -{ - bool found; - fil_space_t* space; - ulint out_id = ULINT_UNDEFINED; - - mutex_enter(&fil_system->mutex); + /* Skip spaces that are being created by fil_ibd_create(), + or dropped or truncated. Note that rotation_list contains only + space->purpose == FIL_TYPE_TABLESPACE. */ + while (space != NULL + && (UT_LIST_GET_LEN(space->chain) == 0 + || space->is_stopping())) { - space = fil_space_get_by_id(id); - if (space == NULL) { - /* we didn't find it...search for space with space->id > id */ - found = false; - space = UT_LIST_GET_FIRST(fil_system->space_list); - } else { - /* we found it, take next available space */ - found = true; + old = space; + space = UT_LIST_GET_NEXT(rotation_list, space); + fil_space_remove_from_keyrotation(old); } - while ((space = UT_LIST_GET_NEXT(space_list, space)) != NULL) { - - if (!found && space->id <= id) - continue; - - if (!space->stop_new_ops) { - /* inc reference to prevent drop */ - out_id = space->id; - break; - } + if (space != NULL) { + space->n_pending_ops++; } mutex_exit(&fil_system->mutex); - return out_id; + return(space); } - /********************************************************************//** Find correct node from file space @return node */ @@ -6981,26 +6924,6 @@ fil_space_found_by_id( return space; } -/****************************************************************//** -Acquire fil_system mutex */ -void -fil_system_enter(void) -/*==================*/ -{ - ut_ad(!mutex_own(&fil_system->mutex)); - mutex_enter(&fil_system->mutex); -} - -/****************************************************************//** -Release fil_system mutex */ -void -fil_system_exit(void) -/*=================*/ -{ - ut_ad(mutex_own(&fil_system->mutex)); - mutex_exit(&fil_system->mutex); -} - /** Get should we punch hole to tablespace. @param[in] node File node |