diff options
author | Michael Widenius <monty@askmonty.org> | 2012-08-01 17:27:34 +0300 |
---|---|---|
committer | Michael Widenius <monty@askmonty.org> | 2012-08-01 17:27:34 +0300 |
commit | 1d0f70c2f894b27e98773a282871d32802f67964 (patch) | |
tree | 833e683e0ced29c4323c29a9d845703d4dfcd81b /storage/innobase/fil | |
parent | 5a86a61219826aadf8d08cbc447fe438f2bf50c3 (diff) | |
download | mariadb-git-1d0f70c2f894b27e98773a282871d32802f67964.tar.gz |
Temporary commit of merge of MariaDB 10.0-base and MySQL 5.6
Diffstat (limited to 'storage/innobase/fil')
-rw-r--r-- | storage/innobase/fil/fil0fil.cc (renamed from storage/innobase/fil/fil0fil.c) | 866 |
1 files changed, 472 insertions, 394 deletions
diff --git a/storage/innobase/fil/fil0fil.c b/storage/innobase/fil/fil0fil.cc index 0a467d40345..4c6ed9807f6 100644 --- a/storage/innobase/fil/fil0fil.c +++ b/storage/innobase/fil/fil0fil.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,13 +11,13 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ /**************************************************//** -@file fil/fil0fil.c +@file fil/fil0fil.cc The tablespace memory cache Created 10/25/1995 Heikki Tuuri @@ -40,12 +40,15 @@ Created 10/25/1995 Heikki Tuuri #include "dict0dict.h" #include "page0page.h" #include "page0zip.h" +#include "trx0sys.h" +#include "buf0rea.h" #ifndef UNIV_HOTBACKUP # include "buf0lru.h" # include "ibuf0ibuf.h" # include "sync0sync.h" # include "os0sync.h" #else /* !UNIV_HOTBACKUP */ +# include "srv0srv.h" static ulint srv_data_read, srv_data_written; #endif /* !UNIV_HOTBACKUP */ @@ -118,6 +121,9 @@ UNIV_INTERN ulint fil_n_pending_log_flushes = 0; /** Number of pending tablespace flushes */ UNIV_INTERN ulint fil_n_pending_tablespace_flushes = 0; +/** Number of files currently open */ +UNIV_INTERN ulint fil_n_file_opened = 0; + /** The null file address */ UNIV_INTERN fil_addr_t fil_addr_null = {FIL_NULL, 0}; @@ -151,6 +157,9 @@ struct fil_node_struct { /*!< count of pending flushes on this file; closing of the file is not allowed if this is > 0 */ + ibool being_extended; + /*!< TRUE if the node is currently + being extended. */ ib_int64_t modification_counter;/*!< when we write to the file we increment this by one */ ib_int64_t flush_counter;/*!< up to what @@ -185,7 +194,7 @@ struct fil_space_struct { .ibd file of tablespace and want to stop temporarily posting of new i/o requests on the file */ - ibool stop_new_ops; + ibool stop_ibuf_merges; /*!< we set this TRUE when we start deleting a single-table tablespace */ ibool is_being_deleted; @@ -203,20 +212,20 @@ struct fil_space_struct { tablespace whose size we do not know yet; last incomplete megabytes in data files may be ignored if space == 0 */ - ulint flags; /*!< compressed page size and file format, or 0 */ + ulint flags; /*!< tablespace flags; see + fsp_flags_validate(), fsp_flags_get_zip_size() */ ulint n_reserved_extents; /*!< number of reserved free extents for ongoing operations like B-tree page split */ ulint n_pending_flushes; /*!< this is positive when flushing the tablespace to disk; dropping of the tablespace is forbidden if this is positive */ - ulint n_pending_ops;/*!< this is positive when we - have pending operations against this - tablespace. The pending operations can - be ibuf merges or lock validation code - trying to read a block. - Dropping of the tablespace is forbidden - if this is positive */ + ulint n_pending_ibuf_merges;/*!< this is positive + when merging insert buffer entries to + a page so that we may need to access + the ibuf bitmap page in the + tablespade: dropping of the tablespace + is forbidden if this is positive */ hash_node_t hash; /*!< hash chain node */ hash_node_t name_hash;/*!< hash chain the name_hash table */ #ifndef UNIV_HOTBACKUP @@ -300,6 +309,9 @@ struct fil_system_struct { initialized. */ static fil_system_t* fil_system = NULL; +/** Determine if (i) is a user tablespace id or not. */ +# define fil_is_user_tablespace_id(i) ((i) > srv_undo_tablespaces) + #ifdef UNIV_DEBUG /** Try fil_validate() every this many times */ # define FIL_VALIDATE_SKIP 17 @@ -330,6 +342,19 @@ fil_validate_skip(void) #endif /* UNIV_DEBUG */ /********************************************************************//** +Determines if a file node belongs to the least-recently-used list. +@return TRUE if the file belongs to fil_system->LRU mutex. */ +UNIV_INLINE +ibool +fil_space_belongs_in_lru( +/*=====================*/ + const fil_space_t* space) /*!< in: file space */ +{ + return(space->purpose == FIL_TABLESPACE + && fil_is_user_tablespace_id(space->id)); +} + +/********************************************************************//** NOTE: you must call fil_mutex_enter_and_prepare_for_io() first! Prepares a file node for i/o. Opens the file if it is closed. Updates the @@ -610,21 +635,15 @@ fil_node_create( mutex_enter(&fil_system->mutex); - node = mem_alloc(sizeof(fil_node_t)); + node = static_cast<fil_node_t*>(mem_zalloc(sizeof(fil_node_t))); node->name = mem_strdup(name); - node->open = FALSE; ut_a(!is_raw || srv_start_raw_disk_in_use); node->is_raw_disk = is_raw; node->size = size; node->magic_n = FIL_NODE_MAGIC_N; - node->n_pending = 0; - node->n_pending_flushes = 0; - - node->modification_counter = 0; - node->flush_counter = 0; space = fil_space_get_by_id(id); @@ -659,7 +678,7 @@ fil_node_create( } /********************************************************************//** -Opens a the file of a node of a tablespace. The caller must own the fil_system +Opens a file of a node of a tablespace. The caller must own the fil_system mutex. */ static void @@ -669,15 +688,14 @@ fil_node_open_file( fil_system_t* system, /*!< in: tablespace memory cache */ fil_space_t* space) /*!< in: space */ { - ib_int64_t size_bytes; - ulint size_low; - ulint size_high; + os_offset_t size_bytes; ibool ret; ibool success; byte* buf2; byte* page; ulint space_id; ulint flags; + ulint page_size; ut_ad(mutex_own(&(system->mutex))); ut_a(node->n_pending == 0); @@ -709,10 +727,8 @@ fil_node_open_file( ut_a(0); } - os_file_get_size(node->handle, &size_low, &size_high); - - size_bytes = (((ib_int64_t)size_high) << 32) - + (ib_int64_t)size_low; + size_bytes = os_file_get_size(node->handle); + ut_a(size_bytes != (os_offset_t) -1); #ifdef UNIV_HOTBACKUP if (space->id == 0) { node->size = (ulint) (size_bytes / UNIV_PAGE_SIZE); @@ -721,17 +737,16 @@ fil_node_open_file( } #endif /* UNIV_HOTBACKUP */ ut_a(space->purpose != FIL_LOG); - ut_a(space->id != 0); + ut_a(fil_is_user_tablespace_id(space->id)); if (size_bytes < FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE) { fprintf(stderr, "InnoDB: Error: the size of single-table" " tablespace file %s\n" - "InnoDB: is only %lu %lu," + "InnoDB: is only "UINT64PF"," " should be at least %lu!\n", node->name, - (ulong) size_high, - (ulong) size_low, + size_bytes, (ulong) (FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE)); @@ -740,15 +755,15 @@ fil_node_open_file( /* Read the first page of the tablespace */ - buf2 = ut_malloc(2 * UNIV_PAGE_SIZE); + buf2 = static_cast<byte*>(ut_malloc(2 * UNIV_PAGE_SIZE)); /* Align the memory for file i/o if we might have O_DIRECT set */ - page = ut_align(buf2, UNIV_PAGE_SIZE); + page = static_cast<byte*>(ut_align(buf2, UNIV_PAGE_SIZE)); - success = os_file_read(node->handle, page, 0, 0, - UNIV_PAGE_SIZE); + success = os_file_read(node->handle, page, 0, UNIV_PAGE_SIZE); space_id = fsp_header_get_space_id(page); flags = fsp_header_get_flags(page); + page_size = fsp_flags_get_page_size(flags); ut_free(buf2); @@ -776,6 +791,19 @@ fil_node_open_file( ut_error; } + if (UNIV_UNLIKELY(fsp_flags_get_page_size(space->flags) + != page_size)) { + fprintf(stderr, + "InnoDB: Error: tablespace file %s" + " has page size %lx\n" + "InnoDB: but the data dictionary" + " expects page size %lx!\n", + node->name, flags, + fsp_flags_get_page_size(space->flags)); + + ut_error; + } + if (UNIV_UNLIKELY(space->flags != flags)) { fprintf(stderr, "InnoDB: Error: table flags are %lx" @@ -791,12 +819,12 @@ fil_node_open_file( size_bytes = ut_2pow_round(size_bytes, 1024 * 1024); } - if (!(flags & DICT_TF_ZSSIZE_MASK)) { + if (!fsp_flags_is_compressed(flags)) { node->size = (ulint) (size_bytes / UNIV_PAGE_SIZE); } else { node->size = (ulint) (size_bytes - / dict_table_flags_to_zip_size(flags)); + / fsp_flags_get_zip_size(flags)); } #ifdef UNIV_HOTBACKUP @@ -834,8 +862,10 @@ add_size: node->open = TRUE; system->n_open++; + fil_n_file_opened++; + + if (fil_space_belongs_in_lru(space)) { - if (space->purpose == FIL_TABLESPACE && space->id != 0) { /* Put the node to the LRU list */ UT_LIST_ADD_FIRST(LRU, system->LRU, node); } @@ -857,8 +887,11 @@ fil_node_close_file( ut_a(node->open); ut_a(node->n_pending == 0); ut_a(node->n_pending_flushes == 0); + ut_a(!node->being_extended); +#ifndef UNIV_HOTBACKUP ut_a(node->modification_counter == node->flush_counter || srv_fast_shutdown == 2); +#endif /* !UNIV_HOTBACKUP */ ret = os_file_close(node->handle); ut_a(ret); @@ -868,8 +901,10 @@ fil_node_close_file( node->open = FALSE; ut_a(system->n_open > 0); system->n_open--; + fil_n_file_opened--; + + if (fil_space_belongs_in_lru(node->space)) { - if (node->space->purpose == FIL_TABLESPACE && node->space->id != 0) { ut_a(UT_LIST_GET_LEN(system->LRU) > 0); /* The node is in the LRU list, remove it */ @@ -896,32 +931,37 @@ fil_try_to_close_file_in_LRU( ut_ad(mutex_own(&fil_system->mutex)); - node = UT_LIST_GET_LAST(fil_system->LRU); - if (print_info) { fprintf(stderr, "InnoDB: fil_sys open file LRU len %lu\n", (ulong) UT_LIST_GET_LEN(fil_system->LRU)); } - while (node != NULL) { + for (node = UT_LIST_GET_LAST(fil_system->LRU); + node != NULL; + node = UT_LIST_GET_PREV(LRU, node)) { + if (node->modification_counter == node->flush_counter - && node->n_pending_flushes == 0) { + && node->n_pending_flushes == 0 + && !node->being_extended) { fil_node_close_file(node, fil_system); return(TRUE); } - if (print_info && node->n_pending_flushes > 0) { + if (!print_info) { + continue; + } + + if (node->n_pending_flushes > 0) { fputs("InnoDB: cannot close file ", stderr); ut_print_filename(stderr, node->name); fprintf(stderr, ", because n_pending_flushes %lu\n", (ulong) node->n_pending_flushes); } - if (print_info - && node->modification_counter != node->flush_counter) { + if (node->modification_counter != node->flush_counter) { fputs("InnoDB: cannot close file ", stderr); ut_print_filename(stderr, node->name); fprintf(stderr, @@ -930,7 +970,11 @@ fil_try_to_close_file_in_LRU( (long) node->flush_counter); } - node = UT_LIST_GET_PREV(LRU, node); + if (node->being_extended) { + fputs("InnoDB: cannot close file ", stderr); + ut_print_filename(stderr, node->name); + fprintf(stderr, ", because it is being extended\n"); + } } return(FALSE); @@ -1088,6 +1132,7 @@ fil_node_free( ut_ad(mutex_own(&(system->mutex))); ut_a(node->magic_n == FIL_NODE_MAGIC_N); ut_a(node->n_pending == 0); + ut_a(!node->being_extended); if (node->open) { /* We fool the assertion in fil_node_close_file() to think @@ -1153,8 +1198,8 @@ fil_space_truncate_start( #endif /* UNIV_LOG_ARCHIVE */ /*******************************************************************//** -Creates a space memory object and puts it to the tablespace memory cache. If -there is an error, prints an error message to the .err log. +Creates a space memory object and puts it to the 'fil system' hash table. +If there is an error, prints an error message to the .err log. @return TRUE if success */ UNIV_INTERN ibool @@ -1162,20 +1207,12 @@ fil_space_create( /*=============*/ const char* name, /*!< in: space name */ ulint id, /*!< in: space id */ - ulint flags, /*!< in: compressed page size - and file format, or 0 */ + ulint flags, /*!< in: tablespace flags */ ulint purpose)/*!< in: FIL_TABLESPACE, or FIL_LOG if log */ { fil_space_t* space; - /* The tablespace flags (FSP_SPACE_FLAGS) should be 0 for - ROW_FORMAT=COMPACT - ((table->flags & ~(~0 << DICT_TF_BITS)) == DICT_TF_COMPACT) and - ROW_FORMAT=REDUNDANT (table->flags == 0). For any other - format, the tablespace flags should equal - (table->flags & ~(~0 << DICT_TF_BITS)). */ - ut_a(flags != DICT_TF_COMPACT); - ut_a(!(flags & (~0UL << DICT_TF_BITS))); + fsp_flags_validate(flags); try_again: /*printf( @@ -1257,7 +1294,7 @@ try_again: return(FALSE); } - space = mem_alloc(sizeof(fil_space_t)); + space = static_cast<fil_space_t*>(mem_zalloc(sizeof(*space))); space->name = mem_strdup(name); space->id = id; @@ -1282,19 +1319,9 @@ try_again: fil_system->max_assigned_id = id; } - space->stop_ios = FALSE; - space->stop_new_ops = FALSE; - space->is_being_deleted = FALSE; space->purpose = purpose; - space->size = 0; space->flags = flags; - space->n_reserved_extents = 0; - - space->n_pending_flushes = 0; - space->n_pending_ops = 0; - - UT_LIST_INIT(space->chain); space->magic_n = FIL_SPACE_MAGIC_N; rw_lock_create(fil_space_latch_key, &space->latch, SYNC_FSP); @@ -1387,7 +1414,7 @@ fil_space_free( in X mode */ { fil_space_t* space; - fil_space_t* namespace; + fil_space_t* fnamespace; fil_node_t* fil_node; ut_ad(mutex_own(&fil_system->mutex)); @@ -1406,9 +1433,9 @@ fil_space_free( HASH_DELETE(fil_space_t, hash, fil_system->spaces, id, space); - namespace = fil_space_get_by_name(space->name); - ut_a(namespace); - ut_a(space == namespace); + fnamespace = fil_space_get_by_name(space->name); + ut_a(fnamespace); + ut_a(space == fnamespace); HASH_DELETE(fil_space_t, name_hash, fil_system->name_hash, ut_fold_string(space->name), space); @@ -1563,7 +1590,7 @@ fil_space_get_zip_size( if (flags && flags != ULINT_UNDEFINED) { - return(dict_table_flags_to_zip_size(flags)); + return(fsp_flags_get_zip_size(flags)); } return(flags); @@ -1602,7 +1629,8 @@ fil_init( ut_a(hash_size > 0); ut_a(max_n_open > 0); - fil_system = mem_zalloc(sizeof(fil_system_t)); + fil_system = static_cast<fil_system_t*>( + mem_zalloc(sizeof(fil_system_t))); mutex_create(fil_system_mutex_key, &fil_system->mutex, SYNC_ANY_LATCH); @@ -1627,47 +1655,51 @@ fil_open_log_and_system_tablespace_files(void) /*==========================================*/ { fil_space_t* space; - fil_node_t* node; mutex_enter(&fil_system->mutex); - space = UT_LIST_GET_FIRST(fil_system->space_list); + for (space = UT_LIST_GET_FIRST(fil_system->space_list); + space != NULL; + space = UT_LIST_GET_NEXT(space_list, space)) { - while (space != NULL) { - if (space->purpose != FIL_TABLESPACE || space->id == 0) { - node = UT_LIST_GET_FIRST(space->chain); + fil_node_t* node; - while (node != NULL) { - if (!node->open) { - fil_node_open_file(node, fil_system, - space); - } - if (fil_system->max_n_open - < 10 + fil_system->n_open) { - fprintf(stderr, - "InnoDB: Warning: you must" - " raise the value of" - " innodb_open_files in\n" - "InnoDB: my.cnf! Remember that" - " InnoDB keeps all log files" - " and all system\n" - "InnoDB: tablespace files open" - " for the whole time mysqld is" - " running, and\n" - "InnoDB: needs to open also" - " some .ibd files if the" - " file-per-table storage\n" - "InnoDB: model is used." - " Current open files %lu," - " max allowed" - " open files %lu.\n", - (ulong) fil_system->n_open, - (ulong) fil_system->max_n_open); - } - node = UT_LIST_GET_NEXT(chain, node); + if (fil_space_belongs_in_lru(space)) { + + continue; + } + + for (node = UT_LIST_GET_FIRST(space->chain); + node != NULL; + node = UT_LIST_GET_NEXT(chain, node)) { + + if (!node->open) { + fil_node_open_file(node, fil_system, space); + } + + if (fil_system->max_n_open < 10 + fil_system->n_open) { + + fprintf(stderr, + "InnoDB: Warning: you must" + " raise the value of" + " innodb_open_files in\n" + "InnoDB: my.cnf! Remember that" + " InnoDB keeps all log files" + " and all system\n" + "InnoDB: tablespace files open" + " for the whole time mysqld is" + " running, and\n" + "InnoDB: needs to open also" + " some .ibd files if the" + " file-per-table storage\n" + "InnoDB: model is used." + " Current open files %lu," + " max allowed" + " open files %lu.\n", + (ulong) fil_system->n_open, + (ulong) fil_system->max_n_open); } } - space = UT_LIST_GET_NEXT(space_list, space); } mutex_exit(&fil_system->mutex); @@ -1742,23 +1774,24 @@ static ulint fil_write_lsn_and_arch_no_to_file( /*==============================*/ - ulint sum_of_sizes, /*!< in: combined size of previous files - in space, in database pages */ - ib_uint64_t lsn, /*!< in: lsn to write */ - ulint arch_log_no __attribute__((unused))) - /*!< in: archived log number to write */ + ulint space, /*!< in: space to write to */ + ulint sum_of_sizes, /*!< in: combined size of previous files + in space, in database pages */ + lsn_t lsn, /*!< in: lsn to write */ + ulint arch_log_no __attribute__((unused))) + /*!< in: archived log number to write */ { byte* buf1; byte* buf; - buf1 = mem_alloc(2 * UNIV_PAGE_SIZE); - buf = ut_align(buf1, UNIV_PAGE_SIZE); + buf1 = static_cast<byte*>(mem_alloc(2 * UNIV_PAGE_SIZE)); + buf = static_cast<byte*>(ut_align(buf1, UNIV_PAGE_SIZE)); - fil_read(TRUE, 0, 0, sum_of_sizes, 0, UNIV_PAGE_SIZE, buf, NULL); + fil_read(TRUE, space, 0, sum_of_sizes, 0, UNIV_PAGE_SIZE, buf, NULL); mach_write_to_8(buf + FIL_PAGE_FILE_FLUSH_LSN, lsn); - fil_write(TRUE, 0, 0, sum_of_sizes, 0, UNIV_PAGE_SIZE, buf, NULL); + fil_write(TRUE, space, 0, sum_of_sizes, 0, UNIV_PAGE_SIZE, buf, NULL); mem_free(buf1); @@ -1773,36 +1806,40 @@ UNIV_INTERN ulint fil_write_flushed_lsn_to_data_files( /*================================*/ - ib_uint64_t lsn, /*!< in: lsn to write */ - ulint arch_log_no) /*!< in: latest archived log - file number */ + lsn_t lsn, /*!< in: lsn to write */ + ulint arch_log_no) /*!< in: latest archived log file number */ { fil_space_t* space; fil_node_t* node; - ulint sum_of_sizes; ulint err; mutex_enter(&fil_system->mutex); - space = UT_LIST_GET_FIRST(fil_system->space_list); + for (space = UT_LIST_GET_FIRST(fil_system->space_list); + space != NULL; + space = UT_LIST_GET_NEXT(space_list, space)) { - while (space) { /* We only write the lsn to all existing data files which have been open during the lifetime of the mysqld process; they are represented by the space objects in the tablespace memory - cache. Note that all data files in the system tablespace 0 are - always open. */ + cache. Note that all data files in the system tablespace 0 + and the UNDO log tablespaces (if separate) are always open. */ if (space->purpose == FIL_TABLESPACE - && space->id == 0) { - sum_of_sizes = 0; + && !fil_is_user_tablespace_id(space->id)) { + + ulint sum_of_sizes = 0; + + for (node = UT_LIST_GET_FIRST(space->chain); + node != NULL; + node = UT_LIST_GET_NEXT(chain, node)) { - node = UT_LIST_GET_FIRST(space->chain); - while (node) { mutex_exit(&fil_system->mutex); err = fil_write_lsn_and_arch_no_to_file( - sum_of_sizes, lsn, arch_log_no); + space->id, sum_of_sizes, lsn, + arch_log_no); + if (err != DB_SUCCESS) { return(err); @@ -1811,10 +1848,8 @@ fil_write_flushed_lsn_to_data_files( mutex_enter(&fil_system->mutex); sum_of_sizes += node->size; - node = UT_LIST_GET_NEXT(chain, node); } } - space = UT_LIST_GET_NEXT(space_list, space); } mutex_exit(&fil_system->mutex); @@ -1840,25 +1875,26 @@ fil_read_first_page( ulint* max_arch_log_no, /*!< out: max of archived log numbers in data files */ #endif /* UNIV_LOG_ARCHIVE */ - ib_uint64_t* min_flushed_lsn, /*!< out: min of flushed + lsn_t* min_flushed_lsn, /*!< out: min of flushed lsn values in data files */ - ib_uint64_t* max_flushed_lsn) /*!< out: max of flushed + lsn_t* max_flushed_lsn) /*!< out: max of flushed lsn values in data files */ { - byte* buf; - page_t* page; - ib_uint64_t flushed_lsn; + byte* buf; + byte* page; + lsn_t flushed_lsn; + + buf = static_cast<byte*>(ut_malloc(2 * UNIV_PAGE_SIZE)); - buf = ut_malloc(2 * UNIV_PAGE_SIZE); /* Align the memory for a possible read from a raw device */ - page = ut_align(buf, UNIV_PAGE_SIZE); - os_file_read(data_file, page, 0, 0, UNIV_PAGE_SIZE); + page = static_cast<byte*>(ut_align(buf, UNIV_PAGE_SIZE)); - *flags = mach_read_from_4(page + - FSP_HEADER_OFFSET + FSP_SPACE_FLAGS); + os_file_read(data_file, page, 0, UNIV_PAGE_SIZE); - flushed_lsn = mach_read_from_8(page + FIL_PAGE_FILE_FLUSH_LSN); + *flags = fsp_header_get_flags(page); + + flushed_lsn = mach_read_from_8(page+ FIL_PAGE_FILE_FLUSH_LSN); ut_free(buf); @@ -1892,12 +1928,13 @@ fil_read_first_page( #ifndef UNIV_HOTBACKUP /*******************************************************************//** -Increments the count of pending operation, if space is not being deleted. -@return TRUE if being deleted, and operation should be skipped */ +Increments the count of pending insert buffer page merges, if space is not +being deleted. +@return TRUE if being deleted, and ibuf merges should be skipped */ UNIV_INTERN ibool -fil_inc_pending_ops( -/*================*/ +fil_inc_pending_ibuf_merges( +/*========================*/ ulint id) /*!< in: space id */ { fil_space_t* space; @@ -1908,18 +1945,18 @@ fil_inc_pending_ops( if (space == NULL) { fprintf(stderr, - "InnoDB: Error: trying to do an operation on a" + "InnoDB: Error: trying to do ibuf merge to a" " dropped tablespace %lu\n", (ulong) id); } - if (space == NULL || space->stop_new_ops) { + if (space == NULL || space->stop_ibuf_merges) { mutex_exit(&fil_system->mutex); return(TRUE); } - space->n_pending_ops++; + space->n_pending_ibuf_merges++; mutex_exit(&fil_system->mutex); @@ -1927,11 +1964,11 @@ fil_inc_pending_ops( } /*******************************************************************//** -Decrements the count of pending operations. */ +Decrements the count of pending insert buffer page merges. */ UNIV_INTERN void -fil_decr_pending_ops( -/*=================*/ +fil_decr_pending_ibuf_merges( +/*=========================*/ ulint id) /*!< in: space id */ { fil_space_t* space; @@ -1942,13 +1979,13 @@ fil_decr_pending_ops( if (space == NULL) { fprintf(stderr, - "InnoDB: Error: decrementing pending operation" - " of a dropped tablespace %lu\n", + "InnoDB: Error: decrementing ibuf merge of a" + " dropped tablespace %lu\n", (ulong) id); } if (space != NULL) { - space->n_pending_ops--; + space->n_pending_ibuf_merges--; } mutex_exit(&fil_system->mutex); @@ -1971,7 +2008,7 @@ fil_create_directory_for_tablename( len = strlen(fil_path_to_mysql_datadir); namend = strchr(name, '/'); ut_a(namend); - path = mem_alloc(len + (namend - name) + 2); + path = static_cast<char*>(mem_alloc(len + (namend - name) + 2)); memcpy(path, fil_path_to_mysql_datadir, len); path[len] = '/'; @@ -2150,7 +2187,7 @@ fil_op_log_parse_or_replay( /* Let us try to perform the file operation, if sensible. Note that ibbackup has at this stage already read in all space id info to the - fil0fil.c data structures. + fil0fil.cc data structures. NOTE that our algorithm is not guaranteed to work correctly if there were renames of tables during the backup. See ibbackup code for more @@ -2159,7 +2196,7 @@ fil_op_log_parse_or_replay( switch (type) { case MLOG_FILE_DELETE: if (fil_tablespace_exists_in_mem(space_id)) { - ut_a(fil_delete_tablespace(space_id, TRUE)); + ut_a(fil_delete_tablespace(space_id)); } break; @@ -2207,6 +2244,7 @@ fil_op_log_parse_or_replay( if (fil_create_new_single_table_tablespace( space_id, name, FALSE, flags, + DICT_TF2_USE_TABLESPACE, FIL_IBD_FILE_INITIAL_SIZE) != DB_SUCCESS) { ut_error; } @@ -2229,9 +2267,7 @@ UNIV_INTERN ibool fil_delete_tablespace( /*==================*/ - ulint id, /*!< in: space id */ - ibool evict_all) /*!< in: TRUE if we want all pages - evicted from LRU. */ + ulint id) /*!< in: space id */ { ibool success; fil_space_t* space; @@ -2240,15 +2276,15 @@ fil_delete_tablespace( char* path; ut_a(id != 0); -stop_new_ops: +stop_ibuf_merges: mutex_enter(&fil_system->mutex); space = fil_space_get_by_id(id); if (space != NULL) { - space->stop_new_ops = TRUE; + space->stop_ibuf_merges = TRUE; - if (space->n_pending_ops == 0) { + if (space->n_pending_ibuf_merges == 0) { mutex_exit(&fil_system->mutex); count = 0; @@ -2262,10 +2298,9 @@ stop_new_ops: ut_print_filename(stderr, space->name); fprintf(stderr, ",\n" "InnoDB: but there are %lu pending" - " operations (most likely ibuf merges)" - " on it.\n" + " ibuf merges on it.\n" "InnoDB: Loop %lu.\n", - (ulong) space->n_pending_ops, + (ulong) space->n_pending_ibuf_merges, (ulong) count); } @@ -2274,7 +2309,7 @@ stop_new_ops: os_thread_sleep(20000); count++; - goto stop_new_ops; + goto stop_ibuf_merges; } } @@ -2300,14 +2335,18 @@ try_again: } ut_a(space); - ut_a(space->n_pending_ops == 0); + ut_a(space->n_pending_ibuf_merges == 0); space->is_being_deleted = TRUE; + /* TODO: The following code must change when InnoDB supports + multiple datafiles per tablespace. */ ut_a(UT_LIST_GET_LEN(space->chain) == 1); + node = UT_LIST_GET_FIRST(space->chain); - if (space->n_pending_flushes > 0 || node->n_pending > 0) { + if (space->n_pending_flushes > 0 || node->n_pending > 0 + || node->being_extended) { if (count > 1000) { ut_print_timestamp(stderr); fputs(" InnoDB: Warning: trying to" @@ -2316,6 +2355,7 @@ try_again: fprintf(stderr, ",\n" "InnoDB: but there are %lu flushes" " and %lu pending i/o's on it\n" + "InnoDB: Or it is being extended\n" "InnoDB: Loop %lu.\n", (ulong) space->n_pending_flushes, (ulong) node->n_pending, @@ -2329,7 +2369,7 @@ try_again: goto try_again; } - path = mem_strdup(space->name); + path = mem_strdup(node->name); mutex_exit(&fil_system->mutex); @@ -2353,10 +2393,7 @@ try_again: completely and permanently. The flag is_being_deleted also prevents fil_flush() from being applied to this tablespace. */ - buf_LRU_flush_or_remove_pages( - id, evict_all - ? BUF_REMOVE_ALL_NO_WRITE - : BUF_REMOVE_FLUSH_NO_WRITE); + buf_LRU_invalidate_tablespace(id); #endif /* printf("Deleting tablespace %s id %lu\n", space->name, id); */ @@ -2444,7 +2481,7 @@ fil_discard_tablespace( { ibool success; - success = fil_delete_tablespace(id, TRUE); + success = fil_delete_tablespace(id); if (!success) { fprintf(stderr, @@ -2472,7 +2509,8 @@ fil_rename_tablespace_in_mem( /*=========================*/ fil_space_t* space, /*!< in: tablespace memory object */ fil_node_t* node, /*!< in: file node of that tablespace */ - const char* path) /*!< in: new name */ + const char* new_name, /*!< in: new name */ + const char* new_path) /*!< in: new file path */ { fil_space_t* space2; const char* old_name = space->name; @@ -2488,10 +2526,10 @@ fil_rename_tablespace_in_mem( return(FALSE); } - space2 = fil_space_get_by_name(path); + space2 = fil_space_get_by_name(new_name); if (space2 != NULL) { fputs("InnoDB: Error: ", stderr); - ut_print_filename(stderr, path); + ut_print_filename(stderr, new_name); fputs(" is already in tablespace memory cache\n", stderr); return(FALSE); @@ -2502,11 +2540,11 @@ fil_rename_tablespace_in_mem( mem_free(space->name); mem_free(node->name); - space->name = mem_strdup(path); - node->name = mem_strdup(path); + space->name = mem_strdup(new_name); + node->name = mem_strdup(new_path); HASH_INSERT(fil_space_t, name_hash, fil_system->name_hash, - ut_fold_string(path), space); + ut_fold_string(new_name), space); return(TRUE); } @@ -2522,9 +2560,12 @@ fil_make_ibd_name( TEMPORARY table */ ibool is_temp) /*!< in: TRUE if it is a dir path */ { + char* filename; ulint namelen = strlen(name); ulint dirlen = strlen(fil_path_to_mysql_datadir); - char* filename = mem_alloc(namelen + dirlen + sizeof "/.ibd"); + + filename = static_cast<char*>( + mem_alloc(namelen + dirlen + sizeof "/.ibd")); if (is_temp) { memcpy(filename, name, namelen); @@ -2550,7 +2591,7 @@ UNIV_INTERN ibool fil_rename_tablespace( /*==================*/ - const char* old_name, /*!< in: old table name in the standard + const char* old_name_in, /*!< in: old table name in the standard databasename/tablename format of InnoDB, or NULL if we do the rename based on the space id only */ @@ -2563,23 +2604,21 @@ fil_rename_tablespace( fil_space_t* space; fil_node_t* node; ulint count = 0; - char* path; - ibool old_name_was_specified = TRUE; + char* new_path; + char* old_name; char* old_path; + const char* not_given = "(name not specified)"; ut_a(id != 0); - if (old_name == NULL) { - old_name = "(name not specified)"; - old_name_was_specified = FALSE; - } retry: count++; if (!(count % 1000)) { ut_print_timestamp(stderr); fputs(" InnoDB: Warning: problems renaming ", stderr); - ut_print_filename(stderr, old_name); + ut_print_filename(stderr, + old_name_in ? old_name_in : not_given); fputs(" to ", stderr); ut_print_filename(stderr, new_name); fprintf(stderr, ", %lu iterations\n", (ulong) count); @@ -2594,7 +2633,8 @@ retry: "InnoDB: Error: cannot find space id %lu" " in the tablespace memory cache\n" "InnoDB: though the table ", (ulong) id); - ut_print_filename(stderr, old_name); + ut_print_filename(stderr, + old_name_in ? old_name_in : not_given); fputs(" in a rename operation should have that id\n", stderr); mutex_exit(&fil_system->mutex); @@ -2617,8 +2657,10 @@ retry: ut_a(UT_LIST_GET_LEN(space->chain) == 1); node = UT_LIST_GET_FIRST(space->chain); - if (node->n_pending > 0 || node->n_pending_flushes > 0) { - /* There are pending i/o's or flushes, sleep for a while and + if (node->n_pending > 0 || node->n_pending_flushes > 0 + || node->being_extended) { + /* There are pending i/o's or flushes or the file is + currently being extended, sleep for a while and retry */ mutex_exit(&fil_system->mutex); @@ -2646,34 +2688,35 @@ retry: /* Check that the old name in the space is right */ - if (old_name_was_specified) { + if (old_name_in) { + old_name = mem_strdup(old_name_in); old_path = fil_make_ibd_name(old_name, FALSE); - ut_a(strcmp(space->name, old_path) == 0); + ut_a(strcmp(space->name, old_name) == 0); ut_a(strcmp(node->name, old_path) == 0); } else { - old_path = mem_strdup(space->name); + old_name = mem_strdup(space->name); + old_path = mem_strdup(node->name); } /* Rename the tablespace and the node in the memory cache */ - path = fil_make_ibd_name(new_name, FALSE); - success = fil_rename_tablespace_in_mem(space, node, path); + new_path = fil_make_ibd_name(new_name, FALSE); + success = fil_rename_tablespace_in_mem( + space, node, new_name, new_path); if (success) { - success = os_file_rename(innodb_file_data_key, old_path, path); + success = os_file_rename( + innodb_file_data_key, old_path, new_path); if (!success) { /* We have to revert the changes we made to the tablespace memory cache */ - ut_a(fil_rename_tablespace_in_mem(space, node, - old_path)); + ut_a(fil_rename_tablespace_in_mem( + space, node, old_name, old_path)); } } - mem_free(path); - mem_free(old_path); - space->stop_ios = FALSE; mutex_exit(&fil_system->mutex); @@ -2689,6 +2732,11 @@ retry: mtr_commit(&mtr); } #endif + + mem_free(new_path); + mem_free(old_path); + mem_free(old_name); + return(success); } @@ -2711,6 +2759,7 @@ fil_create_new_single_table_tablespace( ibool is_temp, /*!< in: TRUE if a table created with CREATE TEMPORARY TABLE */ ulint flags, /*!< in: tablespace flags */ + ulint flags2, /*!< in: table flags2 */ ulint size) /*!< in: the initial size of the tablespace file in pages, must be >= FIL_IBD_FILE_INITIAL_SIZE */ @@ -2720,26 +2769,23 @@ fil_create_new_single_table_tablespace( ulint err; byte* buf2; byte* page; - ibool success; char* path; + ibool success; ut_a(space_id > 0); ut_a(space_id < SRV_LOG_SPACE_FIRST_ID); ut_a(size >= FIL_IBD_FILE_INITIAL_SIZE); - /* The tablespace flags (FSP_SPACE_FLAGS) should be 0 for - ROW_FORMAT=COMPACT - ((table->flags & ~(~0 << DICT_TF_BITS)) == DICT_TF_COMPACT) and - ROW_FORMAT=REDUNDANT (table->flags == 0). For any other - format, the tablespace flags should equal - (table->flags & ~(~0 << DICT_TF_BITS)). */ - ut_a(flags != DICT_TF_COMPACT); - ut_a(!(flags & (~0UL << DICT_TF_BITS))); + fsp_flags_validate(flags); path = fil_make_ibd_name(tablename, is_temp); - file = os_file_create(innodb_file_data_key, path, - OS_FILE_CREATE, OS_FILE_NORMAL, - OS_DATA_FILE, &ret); + file = os_file_create( + innodb_file_data_key, path, + OS_FILE_CREATE | OS_FILE_ON_ERROR_NO_EXIT, + OS_FILE_NORMAL, + OS_DATA_FILE, + &ret); + if (ret == FALSE) { ut_print_timestamp(stderr); fputs(" InnoDB: Error creating file ", stderr); @@ -2782,7 +2828,7 @@ fil_create_new_single_table_tablespace( return(DB_ERROR); } - ret = os_file_set_size(path, file, size * UNIV_PAGE_SIZE, 0); + ret = os_file_set_size(path, file, size * UNIV_PAGE_SIZE); if (!ret) { err = DB_OUT_OF_FILE_SPACE; @@ -2806,25 +2852,26 @@ error_exit2: with zeros from the call of os_file_set_size(), until a buffer pool flush would write to it. */ - buf2 = ut_malloc(3 * UNIV_PAGE_SIZE); + buf2 = static_cast<byte*>(ut_malloc(3 * UNIV_PAGE_SIZE)); /* Align the memory for file i/o if we might have O_DIRECT set */ - page = ut_align(buf2, UNIV_PAGE_SIZE); + page = static_cast<byte*>(ut_align(buf2, UNIV_PAGE_SIZE)); memset(page, '\0', UNIV_PAGE_SIZE); + /* Add the UNIV_PAGE_SIZE to the table flags and write them to the + tablespace header. */ + flags = fsp_flags_set_page_size(flags, UNIV_PAGE_SIZE); fsp_header_init_fields(page, space_id, flags); mach_write_to_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, space_id); - if (!(flags & DICT_TF_ZSSIZE_MASK)) { + if (!(fsp_flags_is_compressed(flags))) { buf_flush_init_for_writing(page, NULL, 0); - ret = os_file_write(path, file, page, 0, 0, UNIV_PAGE_SIZE); + ret = os_file_write(path, file, page, 0, UNIV_PAGE_SIZE); } else { page_zip_des_t page_zip; ulint zip_size; - zip_size = ((PAGE_ZIP_MIN_SIZE >> 1) - << ((flags & DICT_TF_ZSSIZE_MASK) - >> DICT_TF_ZSSIZE_SHIFT)); + zip_size = fsp_flags_get_zip_size(flags); page_zip_set_size(&page_zip, zip_size); page_zip.data = page + UNIV_PAGE_SIZE; @@ -2834,7 +2881,7 @@ error_exit2: page_zip.m_end = page_zip.m_nonempty = page_zip.n_blobs = 0; buf_flush_init_for_writing(page, &page_zip, 0); - ret = os_file_write(path, file, page_zip.data, 0, 0, zip_size); + ret = os_file_write(path, file, page_zip.data, 0, zip_size); } ut_free(buf2); @@ -2860,7 +2907,7 @@ error_exit2: os_file_close(file); - success = fil_space_create(path, space_id, flags, FIL_TABLESPACE); + success = fil_space_create(tablename, space_id, flags, FIL_TABLESPACE); if (!success) { err = DB_ERROR; @@ -2907,7 +2954,7 @@ fil_reset_too_high_lsns( /*====================*/ const char* name, /*!< in: table name in the databasename/tablename format */ - ib_uint64_t current_lsn) /*!< in: reset lsn's if the lsn stamped + lsn_t current_lsn) /*!< in: reset lsn's if the lsn stamped to FIL_PAGE_FILE_FLUSH_LSN in the first page is too high */ { @@ -2915,10 +2962,10 @@ fil_reset_too_high_lsns( char* filepath; byte* page; byte* buf2; - ib_uint64_t flush_lsn; + lsn_t flush_lsn; ulint space_id; - ib_int64_t file_size; - ib_int64_t offset; + os_offset_t file_size; + os_offset_t offset; ulint zip_size; ibool success; page_zip_des_t page_zip; @@ -2946,11 +2993,11 @@ fil_reset_too_high_lsns( /* Read the first page of the tablespace */ - buf2 = ut_malloc(3 * UNIV_PAGE_SIZE); + buf2 = static_cast<byte*>(ut_malloc(3 * UNIV_PAGE_SIZE)); /* Align the memory for file i/o if we might have O_DIRECT set */ - page = ut_align(buf2, UNIV_PAGE_SIZE); + page = static_cast<byte*>(ut_align(buf2, UNIV_PAGE_SIZE)); - success = os_file_read(file, page, 0, 0, UNIV_PAGE_SIZE); + success = os_file_read(file, page, 0, UNIV_PAGE_SIZE); if (!success) { goto func_exit; @@ -2980,8 +3027,8 @@ fil_reset_too_high_lsns( fprintf(stderr, " InnoDB: Flush lsn in the tablespace file %lu" " to be imported\n" - "InnoDB: is %llu, which exceeds current" - " system lsn %llu.\n" + "InnoDB: is " LSN_PF ", which exceeds current" + " system lsn " LSN_PF ".\n" "InnoDB: We reset the lsn's in the file ", (ulong) space_id, flush_lsn, current_lsn); @@ -2989,18 +3036,17 @@ fil_reset_too_high_lsns( fputs(".\n", stderr); ut_a(ut_is_2pow(zip_size)); - ut_a(zip_size <= UNIV_PAGE_SIZE); + ut_a(zip_size <= UNIV_ZIP_SIZE_MAX); /* Loop through all the pages in the tablespace and reset the lsn and the page checksum if necessary */ - file_size = os_file_get_size_as_iblonglong(file); + file_size = os_file_get_size(file); + ut_a(file_size != (os_offset_t) -1); for (offset = 0; offset < file_size; offset += zip_size ? zip_size : UNIV_PAGE_SIZE) { - success = os_file_read(file, page, - (ulint)(offset & 0xFFFFFFFFUL), - (ulint)(offset >> 32), + success = os_file_read(file, page, offset, zip_size ? zip_size : UNIV_PAGE_SIZE); if (!success) { @@ -3015,16 +3061,13 @@ fil_reset_too_high_lsns( page, &page_zip, current_lsn); success = os_file_write( filepath, file, page_zip.data, - (ulint) offset & 0xFFFFFFFFUL, - (ulint) (offset >> 32), zip_size); + offset, zip_size); } else { buf_flush_init_for_writing( page, NULL, current_lsn); success = os_file_write( filepath, file, page, - (ulint)(offset & 0xFFFFFFFFUL), - (ulint)(offset >> 32), - UNIV_PAGE_SIZE); + offset, UNIV_PAGE_SIZE); } if (!success) { @@ -3041,7 +3084,7 @@ fil_reset_too_high_lsns( } /* We now update the flush_lsn stamp at the start of the file */ - success = os_file_read(file, page, 0, 0, + success = os_file_read(file, page, 0, zip_size ? zip_size : UNIV_PAGE_SIZE); if (!success) { @@ -3050,7 +3093,7 @@ fil_reset_too_high_lsns( mach_write_to_8(page + FIL_PAGE_FILE_FLUSH_LSN, current_lsn); - success = os_file_write(filepath, file, page, 0, 0, + success = os_file_write(filepath, file, page, 0, zip_size ? zip_size : UNIV_PAGE_SIZE); if (!success) { @@ -3088,7 +3131,7 @@ fil_open_single_table_tablespace( accessing the first page of the file */ ulint id, /*!< in: space id */ ulint flags, /*!< in: tablespace flags */ - const char* name) /*!< in: table name in the + const char* tablename) /*!< in: table name in the databasename/tablename format */ { os_file_t file; @@ -3099,16 +3142,9 @@ fil_open_single_table_tablespace( ulint space_id; ulint space_flags; - filepath = fil_make_ibd_name(name, FALSE); + filepath = fil_make_ibd_name(tablename, FALSE); - /* The tablespace flags (FSP_SPACE_FLAGS) should be 0 for - ROW_FORMAT=COMPACT - ((table->flags & ~(~0 << DICT_TF_BITS)) == DICT_TF_COMPACT) and - ROW_FORMAT=REDUNDANT (table->flags == 0). For any other - format, the tablespace flags should equal - (table->flags & ~(~0 << DICT_TF_BITS)). */ - ut_a(flags != DICT_TF_COMPACT); - ut_a(!(flags & (~0UL << DICT_TF_BITS))); + fsp_flags_validate(flags); file = os_file_create_simple_no_error_handling( innodb_file_data_key, filepath, OS_FILE_OPEN, @@ -3132,7 +3168,8 @@ fil_open_single_table_tablespace( " a temporary table #sql...,\n" "InnoDB: and MySQL removed the .ibd file for this.\n" "InnoDB: Please refer to\n" - "InnoDB: " REFMAN "innodb-troubleshooting-datadict.html\n" + "InnoDB: " REFMAN + "innodb-troubleshooting-datadict.html\n" "InnoDB: for how to resolve the issue.\n", stderr); mem_free(filepath); @@ -3148,11 +3185,11 @@ fil_open_single_table_tablespace( /* Read the first page of the tablespace */ - buf2 = ut_malloc(2 * UNIV_PAGE_SIZE); + buf2 = static_cast<byte*>(ut_malloc(2 * UNIV_PAGE_SIZE)); /* Align the memory for file i/o if we might have O_DIRECT set */ - page = ut_align(buf2, UNIV_PAGE_SIZE); + page = static_cast<byte*>(ut_align(buf2, UNIV_PAGE_SIZE)); - success = os_file_read(file, page, 0, 0, UNIV_PAGE_SIZE); + success = os_file_read(file, page, 0, UNIV_PAGE_SIZE); /* We have to read the tablespace id and flags from the file. */ @@ -3161,8 +3198,7 @@ fil_open_single_table_tablespace( ut_free(buf2); - if (UNIV_UNLIKELY(space_id != id - || space_flags != (flags & ~(~0 << DICT_TF_BITS)))) { + if (UNIV_UNLIKELY(space_id != id || space_flags != flags)) { ut_print_timestamp(stderr); fputs(" InnoDB: Error: tablespace id and flags in file ", @@ -3186,7 +3222,7 @@ fil_open_single_table_tablespace( } skip_check: - success = fil_space_create(filepath, space_id, flags, FIL_TABLESPACE); + success = fil_space_create(tablename, space_id, flags, FIL_TABLESPACE); if (!success) { goto func_exit; @@ -3216,8 +3252,10 @@ fil_make_ibbackup_old_name( const char* name) /*!< in: original file name */ { static const char suffix[] = "_ibbackup_old_vers_"; + char* path; ulint len = strlen(name); - char* path = mem_alloc(len + (15 + sizeof suffix)); + + path = static_cast<char*>(mem_alloc(len + (15 + sizeof suffix))); memcpy(path, name, len); memcpy(path + len, suffix, (sizeof suffix) - 1); @@ -3228,7 +3266,7 @@ fil_make_ibbackup_old_name( /********************************************************************//** Opens an .ibd file and adds the associated single-table tablespace to the -InnoDB fil0fil.c data structures. */ +InnoDB fil0fil.cc data structures. */ static void fil_load_single_table_tablespace( @@ -3239,23 +3277,31 @@ fil_load_single_table_tablespace( { os_file_t file; char* filepath; + char* tablename; ibool success; byte* buf2; byte* page; ulint space_id; ulint flags; - ulint size_low; - ulint size_high; - ib_int64_t size; + os_offset_t size; #ifdef UNIV_HOTBACKUP fil_space_t* space; #endif - filepath = mem_alloc(strlen(dbname) + strlen(filename) - + strlen(fil_path_to_mysql_datadir) + 3); + filepath = static_cast<char*>( + mem_alloc( + strlen(dbname) + + strlen(filename) + + strlen(fil_path_to_mysql_datadir) + 3)); sprintf(filepath, "%s/%s/%s", fil_path_to_mysql_datadir, dbname, filename); srv_normalize_path_for_win(filepath); + + tablename = static_cast<char*>( + mem_alloc(strlen(dbname) + strlen(filename) + 2)); + sprintf(tablename, "%s/%s", dbname, filename); + tablename[strlen(tablename) - strlen(".ibd")] = 0; + #ifdef __WIN__ # ifndef UNIV_HOTBACKUP /* If lower_case_table_names is 0 or 2, then MySQL allows database @@ -3299,6 +3345,7 @@ fil_load_single_table_tablespace( "InnoDB: and force InnoDB to continue crash" " recovery here.\n", filepath); + mem_free(tablename); mem_free(filepath); if (srv_force_recovery > 0) { @@ -3314,9 +3361,9 @@ fil_load_single_table_tablespace( exit(1); } - success = os_file_get_size(file, &size_low, &size_high); + size = os_file_get_size(file); - if (!success) { + if (UNIV_UNLIKELY(size == (os_offset_t) -1)) { /* The following call prints an error message */ os_file_get_last_error(TRUE); @@ -3346,6 +3393,7 @@ fil_load_single_table_tablespace( " crash recovery here.\n", filepath); os_file_close(file); + mem_free(tablename); mem_free(filepath); if (srv_force_recovery > 0) { @@ -3367,30 +3415,30 @@ fil_load_single_table_tablespace( /* Every .ibd file is created >= 4 pages in size. Smaller files cannot be ok. */ - size = (((ib_int64_t)size_high) << 32) + (ib_int64_t)size_low; #ifndef UNIV_HOTBACKUP if (size < FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE) { fprintf(stderr, - "InnoDB: Error: the size of single-table tablespace" - " file %s\n" - "InnoDB: is only %lu %lu, should be at least %lu!", + "InnoDB: Error: the size of single-table" + " tablespace file %s\n" + "InnoDB: is only " UINT64PF + ", should be at least %lu!\n", filepath, - (ulong) size_high, - (ulong) size_low, (ulong) (4 * UNIV_PAGE_SIZE)); + size, (ulong) (4 * UNIV_PAGE_SIZE)); os_file_close(file); + mem_free(tablename); mem_free(filepath); return; } #endif - /* Read the first page of the tablespace if the size big enough */ + /* Read the first page of the tablespace if the size is big enough */ - buf2 = ut_malloc(2 * UNIV_PAGE_SIZE); + buf2 = static_cast<byte*>(ut_malloc(2 * UNIV_PAGE_SIZE)); /* Align the memory for file i/o if we might have O_DIRECT set */ - page = ut_align(buf2, UNIV_PAGE_SIZE); + page = static_cast<byte*>(ut_align(buf2, UNIV_PAGE_SIZE)); if (size >= FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE) { - success = os_file_read(file, page, 0, 0, UNIV_PAGE_SIZE); + success = os_file_read(file, page, 0, UNIV_PAGE_SIZE); /* We have to read the tablespace id from the file */ @@ -3430,6 +3478,7 @@ fil_load_single_table_tablespace( ut_a(os_file_rename(innodb_file_data_key, filepath, new_path)); ut_free(buf2); + mem_free(tablename); mem_free(filepath); mem_free(new_path); @@ -3468,6 +3517,7 @@ fil_load_single_table_tablespace( ut_a(os_file_rename(innodb_file_data_key, filepath, new_path)); ut_free(buf2); + mem_free(tablename); mem_free(filepath); mem_free(new_path); @@ -3475,7 +3525,7 @@ fil_load_single_table_tablespace( } mutex_exit(&fil_system->mutex); #endif - success = fil_space_create(filepath, space_id, flags, FIL_TABLESPACE); + success = fil_space_create(tablename, space_id, flags, FIL_TABLESPACE); if (!success) { @@ -3500,6 +3550,7 @@ fil_load_single_table_tablespace( func_exit: os_file_close(file); ut_free(buf2); + mem_free(tablename); mem_free(filepath); } @@ -3574,7 +3625,7 @@ fil_load_single_table_tablespaces(void) return(DB_ERROR); } - dbpath = mem_alloc(dbpath_len); + dbpath = static_cast<char*>(mem_alloc(dbpath_len)); /* Scan all directories under the datadir. They are the database directories of MySQL. */ @@ -3603,10 +3654,10 @@ fil_load_single_table_tablespaces(void) mem_free(dbpath); } - dbpath = mem_alloc(dbpath_len); + dbpath = static_cast<char*>(mem_alloc(dbpath_len)); } - sprintf(dbpath, "%s/%s", fil_path_to_mysql_datadir, - dbinfo.name); + ut_snprintf(dbpath, dbpath_len, + "%s/%s", fil_path_to_mysql_datadir, dbinfo.name); srv_normalize_path_for_win(dbpath); dbdir = os_file_opendir(dbpath, FALSE); @@ -3675,7 +3726,7 @@ next_datadir_item: /*******************************************************************//** Returns TRUE if a single-table tablespace does not exist in the memory cache, or is being deleted there. -@return TRUE if does not exist or is being\ deleted */ +@return TRUE if does not exist or is being deleted */ UNIV_INTERN ibool fil_tablespace_deleted_or_being_deleted_in_mem( @@ -3744,10 +3795,7 @@ fil_space_for_table_exists_in_mem( /*==============================*/ ulint id, /*!< in: space id */ const char* name, /*!< in: table name in the standard - 'databasename/tablename' format or - the dir path to a temp table */ - ibool is_temp, /*!< in: TRUE if created with CREATE - TEMPORARY TABLE */ + 'databasename/tablename' format */ ibool mark_space, /*!< in: in crash recovery, at database startup we mark all spaces which have an associated table in the InnoDB @@ -3760,16 +3808,13 @@ fil_space_for_table_exists_in_mem( matching tablespace is not found from memory */ { - fil_space_t* namespace; + fil_space_t* fnamespace; fil_space_t* space; - char* path; ut_ad(fil_system); mutex_enter(&fil_system->mutex); - path = fil_make_ibd_name(name, is_temp); - /* Look if there is a space with the same id */ space = fil_space_get_by_id(id); @@ -3777,15 +3822,14 @@ fil_space_for_table_exists_in_mem( /* Look if there is a space with the same name; the name is the directory path from the datadir to the file */ - namespace = fil_space_get_by_name(path); - if (space && space == namespace) { + fnamespace = fil_space_get_by_name(name); + if (space && space == fnamespace) { /* Found */ if (mark_space) { space->mark = TRUE; } - mem_free(path); mutex_exit(&fil_system->mutex); return(TRUE); @@ -3793,14 +3837,13 @@ fil_space_for_table_exists_in_mem( if (!print_error_if_does_not_exist) { - mem_free(path); mutex_exit(&fil_system->mutex); return(FALSE); } if (space == NULL) { - if (namespace == NULL) { + if (fnamespace == NULL) { ut_print_timestamp(stderr); fputs(" InnoDB: Error: table ", stderr); ut_print_filename(stderr, name); @@ -3829,21 +3872,20 @@ fil_space_for_table_exists_in_mem( "InnoDB: a tablespace of name %s and id %lu," " though. Have\n" "InnoDB: you deleted or moved .ibd files?\n", - (ulong) id, namespace->name, - (ulong) namespace->id); + (ulong) id, fnamespace->name, + (ulong) fnamespace->id); } error_exit: fputs("InnoDB: Please refer to\n" "InnoDB: " REFMAN "innodb-troubleshooting-datadict.html\n" "InnoDB: for how to resolve the issue.\n", stderr); - mem_free(path); mutex_exit(&fil_system->mutex); return(FALSE); } - if (0 != strcmp(space->name, path)) { + if (0 != strcmp(space->name, name)) { ut_print_timestamp(stderr); fputs(" InnoDB: Error: table ", stderr); ut_print_filename(stderr, name); @@ -3855,19 +3897,18 @@ error_exit: "InnoDB: Have you deleted or moved .ibd files?\n", (ulong) id, space->name); - if (namespace != NULL) { + if (fnamespace != NULL) { fputs("InnoDB: There is a tablespace" " with the right name\n" "InnoDB: ", stderr); - ut_print_filename(stderr, namespace->name); + ut_print_filename(stderr, fnamespace->name); fprintf(stderr, ", but its id is %lu.\n", - (ulong) namespace->id); + (ulong) fnamespace->id); } goto error_exit; } - mem_free(path); mutex_exit(&fil_system->mutex); return(FALSE); @@ -3881,30 +3922,24 @@ static ulint fil_get_space_id_for_table( /*=======================*/ - const char* name) /*!< in: table name in the standard + const char* tablename) /*!< in: table name in the standard 'databasename/tablename' format */ { - fil_space_t* namespace; + fil_space_t* fnamespace; ulint id = ULINT_UNDEFINED; - char* path; ut_ad(fil_system); mutex_enter(&fil_system->mutex); - path = fil_make_ibd_name(name, FALSE); - - /* Look if there is a space with the same name; the name is the - directory path to the file */ + /* Look if there is a space with the same name. */ - namespace = fil_space_get_by_name(path); + fnamespace = fil_space_get_by_name(tablename); - if (namespace) { - id = namespace->id; + if (fnamespace) { + id = fnamespace->id; } - mem_free(path); - mutex_exit(&fil_system->mutex); return(id); @@ -3934,10 +3969,13 @@ fil_extend_space_to_desired_size( ulint buf_size; ulint start_page_no; ulint file_start_page_no; - ulint offset_high; - ulint offset_low; ulint page_size; - ibool success = TRUE; + ulint pages_added; + ibool success; + +retry: + pages_added = 0; + success = TRUE; fil_mutex_enter_and_prepare_for_io(space_id); @@ -3954,70 +3992,93 @@ fil_extend_space_to_desired_size( return(TRUE); } - page_size = dict_table_flags_to_zip_size(space->flags); + page_size = fsp_flags_get_zip_size(space->flags); if (!page_size) { page_size = UNIV_PAGE_SIZE; } node = UT_LIST_GET_LAST(space->chain); + if (!node->being_extended) { + /* Mark this node as undergoing extension. This flag + is used by other threads to wait for the extension + opereation to finish. */ + node->being_extended = TRUE; + } else { + /* Another thread is currently extending the file. Wait + for it to finish. + It'd have been better to use event driven mechanism but + the entire module is peppered with polling stuff. */ + mutex_exit(&fil_system->mutex); + os_thread_sleep(100000); + goto retry; + } + fil_node_prepare_for_io(node, fil_system, space); + /* At this point it is safe to release fil_system mutex. No + other thread can rename, delete or close the file because + we have set the node->being_extended flag. */ + mutex_exit(&fil_system->mutex); + start_page_no = space->size; file_start_page_no = space->size - node->size; /* Extend at most 64 pages at a time */ buf_size = ut_min(64, size_after_extend - start_page_no) * page_size; - buf2 = mem_alloc(buf_size + page_size); - buf = ut_align(buf2, page_size); + buf2 = static_cast<byte*>(mem_alloc(buf_size + page_size)); + buf = static_cast<byte*>(ut_align(buf2, page_size)); memset(buf, 0, buf_size); while (start_page_no < size_after_extend) { - ulint n_pages = ut_min(buf_size / page_size, - size_after_extend - start_page_no); + ulint n_pages + = ut_min(buf_size / page_size, + size_after_extend - start_page_no); - offset_high = (start_page_no - file_start_page_no) - / (4096 * ((1024 * 1024) / page_size)); - offset_low = ((start_page_no - file_start_page_no) - % (4096 * ((1024 * 1024) / page_size))) + os_offset_t offset + = ((os_offset_t) (start_page_no - file_start_page_no)) * page_size; #ifdef UNIV_HOTBACKUP success = os_file_write(node->name, node->handle, buf, - offset_low, offset_high, - page_size * n_pages); + offset, page_size * n_pages); #else success = os_aio(OS_FILE_WRITE, OS_AIO_SYNC, node->name, node->handle, buf, - offset_low, offset_high, - page_size * n_pages, + offset, page_size * n_pages, NULL, NULL); #endif if (success) { - node->size += n_pages; - space->size += n_pages; - os_has_said_disk_full = FALSE; } else { /* Let us measure the size of the file to determine how much we were able to extend it */ + os_offset_t size; - n_pages = ((ulint) - (os_file_get_size_as_iblonglong( - node->handle) - / page_size)) - node->size; + size = os_file_get_size(node->handle); + ut_a(size != (os_offset_t) -1); - node->size += n_pages; - space->size += n_pages; + n_pages = ((ulint) (size / page_size)) + - node->size - pages_added; + pages_added += n_pages; break; } start_page_no += n_pages; + pages_added += n_pages; } mem_free(buf2); + mutex_enter(&fil_system->mutex); + + ut_a(node->being_extended); + + space->size += pages_added; + node->size += pages_added; + node->being_extended = FALSE; + fil_node_complete_io(node, fil_system, OS_FILE_WRITE); *actual_size = space->size; @@ -4075,7 +4136,7 @@ fil_extend_tablespaces_to_stored_len(void) mutex, because this is a single-threaded operation */ error = fil_read(TRUE, space->id, - dict_table_flags_to_zip_size(space->flags), + fsp_flags_get_zip_size(space->flags), 0, 0, UNIV_PAGE_SIZE, buf, NULL); ut_a(error == DB_SUCCESS); @@ -4227,12 +4288,10 @@ fil_node_prepare_for_io( if (node->open == FALSE) { /* File is closed: open it */ ut_a(node->n_pending == 0); - fil_node_open_file(node, system, space); } - if (node->n_pending == 0 && space->purpose == FIL_TABLESPACE - && space->id != 0) { + if (node->n_pending == 0 && fil_space_belongs_in_lru(space)) { /* The node is in the LRU list, remove it */ ut_a(UT_LIST_GET_LEN(system->LRU) > 0); @@ -4277,8 +4336,8 @@ fil_node_complete_io( } } - if (node->n_pending == 0 && node->space->purpose == FIL_TABLESPACE - && node->space->id != 0) { + if (node->n_pending == 0 && fil_space_belongs_in_lru(node->space)) { + /* The node must be put back to the LRU list */ UT_LIST_ADD_FIRST(LRU, system->LRU, node); } @@ -4349,11 +4408,11 @@ fil_io( ulint mode; fil_space_t* space; fil_node_t* node; - ulint offset_high; - ulint offset_low; ibool ret; ulint is_log; ulint wake_later; + os_offset_t offset; + ibool ignore_nonexistent_pages; is_log = type & OS_FILE_LOG; type = type & ~OS_FILE_LOG; @@ -4361,13 +4420,20 @@ fil_io( wake_later = type & OS_AIO_SIMULATED_WAKE_LATER; type = type & ~OS_AIO_SIMULATED_WAKE_LATER; + ignore_nonexistent_pages = type & BUF_READ_IGNORE_NONEXISTENT_PAGES; + type &= ~BUF_READ_IGNORE_NONEXISTENT_PAGES; + ut_ad(byte_offset < UNIV_PAGE_SIZE); ut_ad(!zip_size || !byte_offset); ut_ad(ut_is_2pow(zip_size)); ut_ad(buf); ut_ad(len > 0); -#if (1 << UNIV_PAGE_SIZE_SHIFT) != UNIV_PAGE_SIZE -# error "(1 << UNIV_PAGE_SIZE_SHIFT) != UNIV_PAGE_SIZE" + ut_ad(UNIV_PAGE_SIZE == (ulong)(1 << UNIV_PAGE_SIZE_SHIFT)); +#if (1 << UNIV_PAGE_SIZE_SHIFT_MAX) != UNIV_PAGE_SIZE_MAX +# error "(1 << UNIV_PAGE_SIZE_SHIFT_MAX) != UNIV_PAGE_SIZE_MAX" +#endif +#if (1 << UNIV_PAGE_SIZE_SHIFT_MIN) != UNIV_PAGE_SIZE_MIN +# error "(1 << UNIV_PAGE_SIZE_SHIFT_MIN) != UNIV_PAGE_SIZE_MIN" #endif ut_ad(fil_validate_skip()); #ifndef UNIV_HOTBACKUP @@ -4427,6 +4493,12 @@ fil_io( for (;;) { if (UNIV_UNLIKELY(node == NULL)) { + if (ignore_nonexistent_pages) { + mutex_exit(&fil_system->mutex); + return(DB_ERROR); + } + /* else */ + fil_report_invalid_page_access( block_offset, space_id, space->name, byte_offset, len, type); @@ -4434,7 +4506,7 @@ fil_io( ut_error; } - if (space->id != 0 && node->size == 0) { + if (fil_is_user_tablespace_id(space->id) && node->size == 0) { /* We do not know the size of a single-table tablespace before we open the file */ @@ -4454,7 +4526,7 @@ fil_io( fil_node_prepare_for_io(node, fil_system, space); /* Check that at least the start offset is within the bounds of a - single-table tablespace */ + single-table tablespace, including rollback tablespaces. */ if (UNIV_UNLIKELY(node->size <= block_offset) && space->id != 0 && space->purpose == FIL_TABLESPACE) { @@ -4471,9 +4543,8 @@ fil_io( /* Calculate the low 32 bits and the high 32 bits of the file offset */ if (!zip_size) { - offset_high = (block_offset >> (32 - UNIV_PAGE_SIZE_SHIFT)); - offset_low = ((block_offset << UNIV_PAGE_SIZE_SHIFT) - & 0xFFFFFFFFUL) + byte_offset; + offset = ((os_offset_t) block_offset << UNIV_PAGE_SIZE_SHIFT) + + byte_offset; ut_a(node->size - block_offset >= ((byte_offset + len + (UNIV_PAGE_SIZE - 1)) @@ -4488,8 +4559,7 @@ fil_io( case 16384: zip_size_shift = 14; break; default: ut_error; } - offset_high = block_offset >> (32 - zip_size_shift); - offset_low = (block_offset << zip_size_shift & 0xFFFFFFFFUL) + offset = ((os_offset_t) block_offset << zip_size_shift) + byte_offset; ut_a(node->size - block_offset >= (len + (zip_size - 1)) / zip_size); @@ -4503,16 +4573,15 @@ fil_io( #ifdef UNIV_HOTBACKUP /* In ibbackup do normal i/o, not aio */ if (type == OS_FILE_READ) { - ret = os_file_read(node->handle, buf, offset_low, offset_high, - len); + ret = os_file_read(node->handle, buf, offset, len); } else { ret = os_file_write(node->name, node->handle, buf, - offset_low, offset_high, len); + offset, len); } #else /* Queue the aio request */ ret = os_aio(type, mode | wake_later, node->name, node->handle, buf, - offset_low, offset_high, len, node, message); + offset, len, node, message); #endif ut_a(ret); @@ -4536,7 +4605,7 @@ fil_io( /**********************************************************************//** Waits for an aio operation to complete. This function is used to write the handler for completed requests. The aio array of pending requests is divided -into segments (see os0file.c for more info). The thread specifies which +into segments (see os0file.cc for more info). The thread specifies which segment it wants to wait for. */ UNIV_INTERN void @@ -4595,10 +4664,10 @@ fil_aio_wait( if (fil_node->space->purpose == FIL_TABLESPACE) { srv_set_io_thread_op_info(segment, "complete io for buf page"); - buf_page_io_complete(message); + buf_page_io_complete(static_cast<buf_page_t*>(message)); } else { srv_set_io_thread_op_info(segment, "complete io for log"); - log_io_complete(message); + log_io_complete(static_cast<log_group_t*>(message)); } } #endif /* UNIV_HOTBACKUP */ @@ -4745,7 +4814,8 @@ fil_flush_file_spaces( traversed fil_system->unflushed_spaces and called UT_LIST_GET_NEXT() on a space that was just removed from the list by fil_flush(). Thus, the space could be dropped and the memory overwritten. */ - space_ids = mem_alloc(n_space_ids * sizeof *space_ids); + space_ids = static_cast<ulint*>( + mem_alloc(n_space_ids * sizeof *space_ids)); n_space_ids = 0; @@ -4771,6 +4841,14 @@ fil_flush_file_spaces( mem_free(space_ids); } +/** Functor to validate the space list. */ +struct Check { + void operator()(const fil_node_t* elem) + { + ut_a(elem->open || !elem->n_pending); + } +}; + /******************************************************************//** Checks the consistency of the tablespace cache. @return TRUE if ok */ @@ -4790,16 +4868,19 @@ fil_validate(void) for (i = 0; i < hash_get_n_cells(fil_system->spaces); i++) { - space = HASH_GET_FIRST(fil_system->spaces, i); + for (space = static_cast<fil_space_t*>( + HASH_GET_FIRST(fil_system->spaces, i)); + space != 0; + space = static_cast<fil_space_t*>( + HASH_GET_NEXT(hash, space))) { - while (space != NULL) { - UT_LIST_VALIDATE(chain, fil_node_t, space->chain, - ut_a(ut_list_node_313->open - || !ut_list_node_313->n_pending)); + UT_LIST_VALIDATE( + chain, fil_node_t, space->chain, Check()); - fil_node = UT_LIST_GET_FIRST(space->chain); + for (fil_node = UT_LIST_GET_FIRST(space->chain); + fil_node != 0; + fil_node = UT_LIST_GET_NEXT(chain, fil_node)) { - while (fil_node != NULL) { if (fil_node->n_pending > 0) { ut_a(fil_node->open); } @@ -4807,25 +4888,22 @@ fil_validate(void) if (fil_node->open) { n_open++; } - fil_node = UT_LIST_GET_NEXT(chain, fil_node); } - space = HASH_GET_NEXT(hash, space); } } ut_a(fil_system->n_open == n_open); - UT_LIST_VALIDATE(LRU, fil_node_t, fil_system->LRU, (void) 0); + UT_LIST_CHECK(LRU, fil_node_t, fil_system->LRU); - fil_node = UT_LIST_GET_FIRST(fil_system->LRU); + for (fil_node = UT_LIST_GET_FIRST(fil_system->LRU); + fil_node != 0; + fil_node = UT_LIST_GET_NEXT(LRU, fil_node)) { - while (fil_node != NULL) { ut_a(fil_node->n_pending == 0); + ut_a(!fil_node->being_extended); ut_a(fil_node->open); - ut_a(fil_node->space->purpose == FIL_TABLESPACE); - ut_a(fil_node->space->id != 0); - - fil_node = UT_LIST_GET_NEXT(LRU, fil_node); + ut_a(fil_space_belongs_in_lru(fil_node->space)); } mutex_exit(&fil_system->mutex); @@ -4899,7 +4977,7 @@ fil_page_get_type( } /****************************************************************//** -Initializes the tablespace memory cache. */ +Closes the tablespace memory cache. */ UNIV_INTERN void fil_close(void) |