diff options
author | Sergei Golubchik <serg@mariadb.org> | 2016-06-30 16:38:05 +0200 |
---|---|---|
committer | Sergei Golubchik <serg@mariadb.org> | 2016-06-30 16:38:05 +0200 |
commit | 932646b1ff6a8f5815a961340a9e1ee4702f5b44 (patch) | |
tree | 5bc42ace8ae1f7e4d00baffd468bdb7564e851f1 /storage/xtradb | |
parent | 0bb30f3603b519780eaf3fe0527b1c6af285229a (diff) | |
parent | 33492ec8d4e2077cf8e07d0628a959d8729bd1f9 (diff) | |
download | mariadb-git-932646b1ff6a8f5815a961340a9e1ee4702f5b44.tar.gz |
Merge branch '10.1' into 10.2
Diffstat (limited to 'storage/xtradb')
64 files changed, 1829 insertions, 1310 deletions
diff --git a/storage/xtradb/buf/buf0buf.cc b/storage/xtradb/buf/buf0buf.cc index 556096ca7e4..19dd375f6e6 100644 --- a/storage/xtradb/buf/buf0buf.cc +++ b/storage/xtradb/buf/buf0buf.cc @@ -1544,6 +1544,7 @@ buf_pool_free_instance( buf_chunk_t* chunk; buf_chunk_t* chunks; buf_page_t* bpage; + ulint i; bpage = UT_LIST_GET_LAST(buf_pool->LRU); while (bpage != NULL) { @@ -1567,10 +1568,29 @@ buf_pool_free_instance( mem_free(buf_pool->watch); buf_pool->watch = NULL; + for (i = BUF_FLUSH_LRU; i < BUF_FLUSH_N_TYPES; i++) { + os_event_free(buf_pool->no_flush[i]); + } + mutex_free(&buf_pool->LRU_list_mutex); + mutex_free(&buf_pool->free_list_mutex); + mutex_free(&buf_pool->zip_free_mutex); + mutex_free(&buf_pool->zip_hash_mutex); + mutex_free(&buf_pool->zip_mutex); + mutex_free(&buf_pool->flush_state_mutex); + mutex_free(&buf_pool->flush_list_mutex); + chunks = buf_pool->chunks; chunk = chunks + buf_pool->n_chunks; while (--chunk >= chunks) { + buf_block_t* block = chunk->blocks; + for (i = 0; i < chunk->size; i++, block++) { + mutex_free(&block->mutex); + rw_lock_free(&block->lock); +#ifdef UNIV_SYNC_DEBUG + rw_lock_free(&block->debug_latch); +#endif + } os_mem_free_large(chunk->mem, chunk->mem_size); } diff --git a/storage/xtradb/buf/buf0dblwr.cc b/storage/xtradb/buf/buf0dblwr.cc index 7c7f28ddc2f..ff334003524 100644 --- a/storage/xtradb/buf/buf0dblwr.cc +++ b/storage/xtradb/buf/buf0dblwr.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2013, 2015, MariaDB Corporation. All Rights Reserved. +Copyright (c) 2013, 2016, MariaDB Corporation. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -37,6 +37,7 @@ Created 2011/12/19 #include "page0zip.h" #include "trx0sys.h" #include "fil0crypt.h" +#include "fil0pagecompress.h" #ifndef UNIV_HOTBACKUP @@ -500,6 +501,7 @@ buf_dblwr_process() for (std::list<byte*>::iterator i = recv_dblwr.pages.begin(); i != recv_dblwr.pages.end(); ++i, ++page_no_dblwr ) { + bool is_compressed = false; page = *i; page_no = mach_read_from_4(page + FIL_PAGE_OFFSET); @@ -533,6 +535,16 @@ buf_dblwr_process() NULL, 0); + /* Is page compressed ? */ + is_compressed = fil_page_is_compressed_encrypted(read_buf) | + fil_page_is_compressed(read_buf); + + /* If page was compressed, decompress it before we + check checksum. */ + if (is_compressed) { + fil_decompress_page(NULL, read_buf, UNIV_PAGE_SIZE, NULL, true); + } + if (fil_space_verify_crypt_checksum(read_buf, zip_size)) { /* page is encrypted and checksum is OK */ } else if (buf_page_is_corrupted(true, read_buf, zip_size)) { @@ -546,6 +558,16 @@ buf_dblwr_process() " the doublewrite buffer.\n", (ulong) space_id, (ulong) page_no); + /* Is page compressed ? */ + is_compressed = fil_page_is_compressed_encrypted(page) | + fil_page_is_compressed(page); + + /* If page was compressed, decompress it before we + check checksum. */ + if (is_compressed) { + fil_decompress_page(NULL, page, UNIV_PAGE_SIZE, NULL, true); + } + if (fil_space_verify_crypt_checksum(page, zip_size)) { /* the doublewrite buffer page is encrypted and OK */ } else if (buf_page_is_corrupted(true, diff --git a/storage/xtradb/buf/buf0dump.cc b/storage/xtradb/buf/buf0dump.cc index 7bce480e652..a68bb8a1344 100644 --- a/storage/xtradb/buf/buf0dump.cc +++ b/storage/xtradb/buf/buf0dump.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2011, 2012, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2011, 2015, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -163,6 +163,25 @@ buf_load_status( va_end(ap); } +/** Returns the directory path where the buffer pool dump file will be created. +@return directory path */ +static +const char* +get_buf_dump_dir() +{ + const char* dump_dir; + + /* The dump file should be created in the default data directory if + innodb_data_home_dir is set as an empty string. */ + if (strcmp(srv_data_home, "") == 0) { + dump_dir = fil_path_to_mysql_datadir; + } else { + dump_dir = srv_data_home; + } + + return(dump_dir); +} + /*****************************************************************//** Perform a buffer pool dump into the file specified by innodb_buffer_pool_filename. If any errors occur then the value of @@ -186,7 +205,7 @@ buf_dump( int ret; ut_snprintf(full_filename, sizeof(full_filename), - "%s%c%s", srv_data_home, SRV_PATH_SEPARATOR, + "%s%c%s", get_buf_dump_dir(), SRV_PATH_SEPARATOR, srv_buf_dump_filename); ut_snprintf(tmp_filename, sizeof(tmp_filename), @@ -471,7 +490,7 @@ buf_load() buf_load_abort_flag = FALSE; ut_snprintf(full_filename, sizeof(full_filename), - "%s%c%s", srv_data_home, SRV_PATH_SEPARATOR, + "%s%c%s", get_buf_dump_dir(), SRV_PATH_SEPARATOR, srv_buf_dump_filename); buf_load_status(STATUS_NOTICE, diff --git a/storage/xtradb/buf/buf0flu.cc b/storage/xtradb/buf/buf0flu.cc index ec13673a56c..1dfd92e4fba 100644 --- a/storage/xtradb/buf/buf0flu.cc +++ b/storage/xtradb/buf/buf0flu.cc @@ -72,12 +72,6 @@ UNIV_INTERN mysql_pfs_key_t buf_page_cleaner_thread_key; UNIV_INTERN mysql_pfs_key_t buf_lru_manager_thread_key; #endif /* UNIV_PFS_THREAD */ -/** If LRU list of a buf_pool is less than this size then LRU eviction -should not happen. This is because when we do LRU flushing we also put -the blocks on free list. If LRU list is very small then we can end up -in thrashing. */ -#define BUF_LRU_MIN_LEN 256 - /* @} */ /******************************************************************//** diff --git a/storage/xtradb/buf/buf0lru.cc b/storage/xtradb/buf/buf0lru.cc index b36136e4079..fdec0e48198 100644 --- a/storage/xtradb/buf/buf0lru.cc +++ b/storage/xtradb/buf/buf0lru.cc @@ -607,6 +607,7 @@ rescan: bpage != NULL; bpage = prev) { + ut_ad(!must_restart); ut_a(buf_page_in_file(bpage)); /* Save the previous link because once we free the @@ -624,9 +625,6 @@ rescan: /* Remove was unsuccessful, we have to try again by scanning the entire list from the end. - This also means that we never released the - flush list mutex. Therefore we can trust the prev - pointer. buf_flush_or_remove_page() released the flush list mutex but not the LRU list mutex. Therefore it is possible that a new page was @@ -643,6 +641,11 @@ rescan: iteration. */ all_freed = false; + if (UNIV_UNLIKELY(must_restart)) { + + /* Cannot trust the prev pointer */ + break; + } } else if (flush) { /* The processing was successful. And during the @@ -650,12 +653,9 @@ rescan: when calling buf_page_flush(). We cannot trust prev pointer. */ goto rescan; - } else if (UNIV_UNLIKELY(must_restart)) { - - ut_ad(!all_freed); - break; } + ut_ad(!must_restart); ++processed; /* Yield if we have hogged the CPU and mutexes for too long. */ @@ -666,6 +666,11 @@ rescan: /* Reset the batch size counter if we had to yield. */ processed = 0; + } else if (UNIV_UNLIKELY(must_restart)) { + + /* Cannot trust the prev pointer */ + all_freed = false; + break; } #ifdef DBUG_OFF diff --git a/storage/xtradb/dict/dict0crea.cc b/storage/xtradb/dict/dict0crea.cc index 68cbde8c342..e2c7355b2e4 100644 --- a/storage/xtradb/dict/dict0crea.cc +++ b/storage/xtradb/dict/dict0crea.cc @@ -1369,7 +1369,7 @@ dict_create_or_check_foreign_constraint_tables(void) ib_logf(IB_LOG_LEVEL_WARN, "Dropping incompletely created " "SYS_FOREIGN table."); - row_drop_table_for_mysql("SYS_FOREIGN", trx, TRUE); + row_drop_table_for_mysql("SYS_FOREIGN", trx, TRUE, TRUE); } if (sys_foreign_cols_err == DB_CORRUPTION) { @@ -1377,7 +1377,7 @@ dict_create_or_check_foreign_constraint_tables(void) "Dropping incompletely created " "SYS_FOREIGN_COLS table."); - row_drop_table_for_mysql("SYS_FOREIGN_COLS", trx, TRUE); + row_drop_table_for_mysql("SYS_FOREIGN_COLS", trx, TRUE, TRUE); } ib_logf(IB_LOG_LEVEL_WARN, @@ -1431,8 +1431,8 @@ dict_create_or_check_foreign_constraint_tables(void) ut_ad(err == DB_OUT_OF_FILE_SPACE || err == DB_TOO_MANY_CONCURRENT_TRXS); - row_drop_table_for_mysql("SYS_FOREIGN", trx, TRUE); - row_drop_table_for_mysql("SYS_FOREIGN_COLS", trx, TRUE); + row_drop_table_for_mysql("SYS_FOREIGN", trx, TRUE, TRUE); + row_drop_table_for_mysql("SYS_FOREIGN_COLS", trx, TRUE, TRUE); if (err == DB_OUT_OF_FILE_SPACE) { err = DB_MUST_GET_MORE_FILE_SPACE; @@ -1856,7 +1856,7 @@ dict_create_or_check_sys_tablespace(void) ib_logf(IB_LOG_LEVEL_WARN, "Dropping incompletely created " "SYS_TABLESPACES table."); - row_drop_table_for_mysql("SYS_TABLESPACES", trx, TRUE); + row_drop_table_for_mysql("SYS_TABLESPACES", trx, TRUE, TRUE); } if (sys_datafiles_err == DB_CORRUPTION) { @@ -1864,7 +1864,7 @@ dict_create_or_check_sys_tablespace(void) "Dropping incompletely created " "SYS_DATAFILES table."); - row_drop_table_for_mysql("SYS_DATAFILES", trx, TRUE); + row_drop_table_for_mysql("SYS_DATAFILES", trx, TRUE, TRUE); } ib_logf(IB_LOG_LEVEL_INFO, @@ -1900,8 +1900,8 @@ dict_create_or_check_sys_tablespace(void) ut_a(err == DB_OUT_OF_FILE_SPACE || err == DB_TOO_MANY_CONCURRENT_TRXS); - row_drop_table_for_mysql("SYS_TABLESPACES", trx, TRUE); - row_drop_table_for_mysql("SYS_DATAFILES", trx, TRUE); + row_drop_table_for_mysql("SYS_TABLESPACES", trx, TRUE, TRUE); + row_drop_table_for_mysql("SYS_DATAFILES", trx, TRUE, TRUE); if (err == DB_OUT_OF_FILE_SPACE) { err = DB_MUST_GET_MORE_FILE_SPACE; diff --git a/storage/xtradb/dict/dict0dict.cc b/storage/xtradb/dict/dict0dict.cc index 206038d36c9..a147fbbc671 100644 --- a/storage/xtradb/dict/dict0dict.cc +++ b/storage/xtradb/dict/dict0dict.cc @@ -1116,7 +1116,7 @@ dict_init(void) &dict_operation_lock, SYNC_DICT_OPERATION); if (!srv_read_only_mode) { - dict_foreign_err_file = os_file_create_tmpfile(); + dict_foreign_err_file = os_file_create_tmpfile(NULL); ut_a(dict_foreign_err_file); mutex_create(dict_foreign_err_mutex_key, diff --git a/storage/xtradb/dict/dict0load.cc b/storage/xtradb/dict/dict0load.cc index 0b1ba975eb6..cf2a5b69509 100644 --- a/storage/xtradb/dict/dict0load.cc +++ b/storage/xtradb/dict/dict0load.cc @@ -1,6 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2016, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -2242,8 +2243,9 @@ dict_get_and_save_data_dir_path( bool dict_mutex_own) /*!< in: true if dict_sys->mutex is owned already */ { - if (DICT_TF_HAS_DATA_DIR(table->flags) - && (!table->data_dir_path)) { + bool is_temp = DICT_TF2_FLAG_IS_SET(table, DICT_TF2_TEMPORARY); + + if (!is_temp && !table->data_dir_path && table->space) { char* path = fil_space_get_first_path(table->space); if (!dict_mutex_own) { @@ -2255,6 +2257,7 @@ dict_get_and_save_data_dir_path( } if (path) { + table->flags |= (1 << DICT_TF_POS_DATA_DIR); dict_save_data_dir_path(table, path); mem_free(path); } @@ -2395,16 +2398,14 @@ err_exit: } /* Use the remote filepath if needed. */ - if (DICT_TF_HAS_DATA_DIR(table->flags)) { - /* This needs to be added to the table - from SYS_DATAFILES */ - dict_get_and_save_data_dir_path(table, true); + /* This needs to be added to the table + from SYS_DATAFILES */ + dict_get_and_save_data_dir_path(table, true); - if (table->data_dir_path) { - filepath = os_file_make_remote_pathname( + if (table->data_dir_path) { + filepath = os_file_make_remote_pathname( table->data_dir_path, table->name, "ibd"); - } } /* Try to open the tablespace. We set the diff --git a/storage/xtradb/dict/dict0stats.cc b/storage/xtradb/dict/dict0stats.cc index 12ead09d829..5c283f693d5 100644 --- a/storage/xtradb/dict/dict0stats.cc +++ b/storage/xtradb/dict/dict0stats.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2009, 2014, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2009, 2015, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -1488,7 +1488,6 @@ on the leaf page. when comparing records @param[out] n_diff number of distinct records @param[out] n_external_pages number of external pages -@param[in,out] mtr mini-transaction @return number of distinct records on the leaf page */ static void @@ -1496,8 +1495,7 @@ dict_stats_analyze_index_below_cur( const btr_cur_t* cur, ulint n_prefix, ib_uint64_t* n_diff, - ib_uint64_t* n_external_pages, - mtr_t* mtr) + ib_uint64_t* n_external_pages) { dict_index_t* index; ulint space; @@ -1511,6 +1509,7 @@ dict_stats_analyze_index_below_cur( ulint* offsets2; ulint* offsets_rec; ulint size; + mtr_t mtr; index = btr_cur_get_index(cur); @@ -1549,12 +1548,14 @@ dict_stats_analyze_index_below_cur( function without analyzing any leaf pages */ *n_external_pages = 0; + mtr_start(&mtr); + /* descend to the leaf level on the B-tree */ for (;;) { block = buf_page_get_gen(space, zip_size, page_no, RW_S_LATCH, NULL /* no guessed block */, - BUF_GET, __FILE__, __LINE__, mtr); + BUF_GET, __FILE__, __LINE__, &mtr); page = buf_block_get_frame(block); @@ -1576,6 +1577,8 @@ dict_stats_analyze_index_below_cur( ut_a(*n_diff > 0); if (*n_diff == 1) { + mtr_commit(&mtr); + /* page has all keys equal and the end of the page was reached by dict_stats_scan_page(), no need to descend to the leaf level */ @@ -1600,7 +1603,7 @@ dict_stats_analyze_index_below_cur( } /* make sure we got a leaf page as a result from the above loop */ - ut_ad(btr_page_get_level(page, mtr) == 0); + ut_ad(btr_page_get_level(page, &mtr) == 0); /* scan the leaf page and find the number of distinct keys, when looking only at the first n_prefix columns; also estimate @@ -1617,6 +1620,7 @@ dict_stats_analyze_index_below_cur( __func__, page_no, n_diff); #endif + mtr_commit(&mtr); mem_heap_free(heap); } @@ -1826,8 +1830,7 @@ dict_stats_analyze_index_for_n_prefix( dict_stats_analyze_index_below_cur(btr_pcur_get_btr_cur(&pcur), n_prefix, &n_diff_on_leaf_page, - &n_external_pages, - mtr); + &n_external_pages); /* We adjust n_diff_on_leaf_page here to avoid counting one record twice - once as the last on some page and once diff --git a/storage/xtradb/fil/fil0fil.cc b/storage/xtradb/fil/fil0fil.cc index 2da234ad094..b0695e2f260 100644 --- a/storage/xtradb/fil/fil0fil.cc +++ b/storage/xtradb/fil/fil0fil.cc @@ -1,7 +1,7 @@ /***************************************************************************** -Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2013, 2015, MariaDB Corporation. All Rights Reserved. +Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2013, 2016, MariaDB Corporation. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -719,16 +719,23 @@ fil_node_open_file( } if (UNIV_UNLIKELY(space->flags != flags)) { - fprintf(stderr, - "InnoDB: Error: table flags are 0x%lx" - " in the data dictionary\n" - "InnoDB: but the flags in file %s are 0x%lx!\n", - space->flags, node->name, flags); + ulint sflags = (space->flags & ~FSP_FLAGS_MASK_DATA_DIR); + ulint fflags = (flags & ~FSP_FLAGS_MASK_DATA_DIR_ORACLE); - ut_error; - } + /* DATA_DIR option is on different place on MariaDB + compared to MySQL. If this is the difference. Fix + it. */ + + if (sflags == fflags) { + fprintf(stderr, + "InnoDB: Warning: Table flags 0x%lx" + " in the data dictionary but in file %s are 0x%lx!\n" + " Temporally corrected because DATA_DIR option to 0x%lx.\n", + space->flags, node->name, flags, space->flags); + + flags = space->flags; + } - if (UNIV_UNLIKELY(space->flags != flags)) { if (!dict_tf_verify_flags(space->flags, flags)) { fprintf(stderr, "InnoDB: Error: table flags are 0x%lx" @@ -3218,8 +3225,6 @@ fil_create_link_file( const char* tablename, /*!< in: tablename */ const char* filepath) /*!< in: pathname of tablespace */ { - os_file_t file; - ibool success; dberr_t err = DB_SUCCESS; char* link_filepath; char* prev_filepath = fil_read_link_file(tablename); @@ -3238,16 +3243,24 @@ fil_create_link_file( link_filepath = fil_make_isl_name(tablename); - /* Note that OS_FILE_READ_WRITE_CACHED used here to avoid - unnecessary errors on O_DIRECT, link files are not really - a data files. */ - file = os_file_create_simple_no_error_handling( - innodb_file_data_key, link_filepath, - OS_FILE_CREATE, OS_FILE_READ_WRITE_CACHED, &success, 0); + /** Check if the file already exists. */ + FILE* file = NULL; + ibool exists; + os_file_type_t ftype; - if (!success) { - /* The following call will print an error message */ - ulint error = os_file_get_last_error(true); + bool success = os_file_status(link_filepath, &exists, &ftype); + + ulint error = 0; + if (success && !exists) { + file = fopen(link_filepath, "w"); + if (file == NULL) { + /* This call will print its own error message */ + error = os_file_get_last_error(true); + } + } else { + error = OS_FILE_ALREADY_EXISTS; + } + if (error != 0) { ut_print_timestamp(stderr); fputs(" InnoDB: Cannot create file ", stderr); @@ -3259,10 +3272,8 @@ fil_create_link_file( ut_print_filename(stderr, filepath); fputs(" already exists.\n", stderr); err = DB_TABLESPACE_EXISTS; - } else if (error == OS_FILE_DISK_FULL) { err = DB_OUT_OF_FILE_SPACE; - } else if (error == OS_FILE_OPERATION_NOT_SUPPORTED) { err = DB_UNSUPPORTED; } else { @@ -3274,13 +3285,17 @@ fil_create_link_file( return(err); } - if (!os_file_write(link_filepath, file, filepath, 0, - strlen(filepath))) { + ulint rbytes = fwrite(filepath, 1, strlen(filepath), file); + if (rbytes != strlen(filepath)) { + os_file_get_last_error(true); + ib_logf(IB_LOG_LEVEL_ERROR, + "cannot write link file " + "%s",filepath); err = DB_ERROR; } /* Close the file, we only need it at startup */ - os_file_close(file); + fclose(file); mem_free(link_filepath); @@ -3829,8 +3844,18 @@ fil_open_single_table_tablespace( /* Validate this single-table-tablespace with SYS_TABLES, but do not compare the DATA_DIR flag, in case the tablespace was relocated. */ + + ulint newf = def.flags; + if (newf != mod_flags) { + if (FSP_FLAGS_HAS_DATA_DIR(newf)) { + newf = (newf & ~FSP_FLAGS_MASK_DATA_DIR); + } else if(FSP_FLAGS_HAS_DATA_DIR_ORACLE(newf)) { + newf = (newf & ~FSP_FLAGS_MASK_DATA_DIR_ORACLE); + } + } + if (def.valid && def.id == id - && (def.flags & ~FSP_FLAGS_MASK_DATA_DIR) == mod_flags) { + && newf == mod_flags) { valid_tablespaces_found++; } else { def.valid = false; @@ -3855,8 +3880,17 @@ fil_open_single_table_tablespace( /* Validate this single-table-tablespace with SYS_TABLES, but do not compare the DATA_DIR flag, in case the tablespace was relocated. */ + ulint newf = remote.flags; + if (newf != mod_flags) { + if (FSP_FLAGS_HAS_DATA_DIR(newf)) { + newf = (newf & ~FSP_FLAGS_MASK_DATA_DIR); + } else if(FSP_FLAGS_HAS_DATA_DIR_ORACLE(newf)) { + newf = (newf & ~FSP_FLAGS_MASK_DATA_DIR_ORACLE); + } + } + if (remote.valid && remote.id == id - && (remote.flags & ~FSP_FLAGS_MASK_DATA_DIR) == mod_flags) { + && newf == mod_flags) { valid_tablespaces_found++; } else { remote.valid = false; @@ -3882,8 +3916,17 @@ fil_open_single_table_tablespace( /* Validate this single-table-tablespace with SYS_TABLES, but do not compare the DATA_DIR flag, in case the tablespace was relocated. */ + ulint newf = dict.flags; + if (newf != mod_flags) { + if (FSP_FLAGS_HAS_DATA_DIR(newf)) { + newf = (newf & ~FSP_FLAGS_MASK_DATA_DIR); + } else if(FSP_FLAGS_HAS_DATA_DIR_ORACLE(newf)) { + newf = (newf & ~FSP_FLAGS_MASK_DATA_DIR_ORACLE); + } + } + if (dict.valid && dict.id == id - && (dict.flags & ~FSP_FLAGS_MASK_DATA_DIR) == mod_flags) { + && newf == mod_flags) { valid_tablespaces_found++; } else { dict.valid = false; @@ -5249,7 +5292,7 @@ retry: if (posix_fallocate(node->handle, start_offset, len) == -1) { ib_logf(IB_LOG_LEVEL_ERROR, "preallocating file " "space for file \'%s\' failed. Current size " - INT64PF ", desired size " INT64PF "\n", + INT64PF ", desired size " INT64PF, node->name, start_offset, len+start_offset); os_file_handle_error_no_exit(node->name, "posix_fallocate", FALSE, __FILE__, __LINE__); success = FALSE; @@ -5293,12 +5336,15 @@ retry: os_offset_t offset = ((os_offset_t) (start_page_no - file_start_page_no)) * page_size; + + const char* name = node->name == NULL ? space->name : node->name; + #ifdef UNIV_HOTBACKUP - success = os_file_write(node->name, node->handle, buf, + success = os_file_write(name, node->handle, buf, offset, page_size * n_pages); #else success = os_aio(OS_FILE_WRITE, 0, OS_AIO_SYNC, - node->name, node->handle, buf, + name, node->handle, buf, offset, page_size * n_pages, page_size, node, NULL, space_id, NULL, 0); #endif /* UNIV_HOTBACKUP */ @@ -5962,22 +6008,12 @@ _fil_io( } } + const char* name = node->name == NULL ? space->name : node->name; + /* Queue the aio request */ - ret = os_aio( - type, - is_log, - mode | wake_later, - node->name, - node->handle, - buf, - offset, - len, - zip_size ? zip_size : UNIV_PAGE_SIZE, - node, - message, - space_id, - trx, - write_size); + ret = os_aio(type, is_log, mode | wake_later, name, node->handle, buf, + offset, len, zip_size ? zip_size : UNIV_PAGE_SIZE, node, + message, space_id, trx, write_size); #else /* In mysqlbackup do normal i/o, not aio */ @@ -5985,7 +6021,7 @@ _fil_io( ret = os_file_read(node->handle, buf, offset, len); } else { ut_ad(!srv_read_only_mode); - ret = os_file_write(node->name, node->handle, buf, + ret = os_file_write(name, node->handle, buf, offset, len); } #endif /* !UNIV_HOTBACKUP */ @@ -6418,10 +6454,7 @@ fil_close(void) { fil_space_crypt_cleanup(); -#ifndef UNIV_HOTBACKUP - /* The mutex should already have been freed. */ - ut_ad(fil_system->mutex.magic_n == 0); -#endif /* !UNIV_HOTBACKUP */ + mutex_free(&fil_system->mutex); hash_table_free(fil_system->spaces); @@ -7070,27 +7103,6 @@ fil_mtr_rename_log( /************************************************************************* functions to access is_corrupt flag of fil_space_t*/ -ibool -fil_space_is_corrupt( -/*=================*/ - ulint space_id) -{ - fil_space_t* space; - ibool ret = FALSE; - - mutex_enter(&fil_system->mutex); - - space = fil_space_get_by_id(space_id); - - if (UNIV_UNLIKELY(space && space->is_corrupt)) { - ret = TRUE; - } - - mutex_exit(&fil_system->mutex); - - return(ret); -} - void fil_space_set_corrupt( /*==================*/ diff --git a/storage/xtradb/fil/fil0pagecompress.cc b/storage/xtradb/fil/fil0pagecompress.cc index 4451116337e..fe4f64d88ca 100644 --- a/storage/xtradb/fil/fil0pagecompress.cc +++ b/storage/xtradb/fil/fil0pagecompress.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (C) 2013, 2015, MariaDB Corporation. All Rights Reserved. +Copyright (C) 2013, 2016, MariaDB Corporation. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -446,8 +446,11 @@ fil_decompress_page( byte* buf, /*!< out: buffer from which to read; in aio this must be appropriately aligned */ ulong len, /*!< in: length of output buffer.*/ - ulint* write_size) /*!< in/out: Actual payload size of + ulint* write_size, /*!< in/out: Actual payload size of the compressed data. */ + bool return_error) /*!< in: true if only an error should + be produced when decompression fails. + By default this parameter is false. */ { int err = 0; ulint actual_size = 0; @@ -492,6 +495,9 @@ fil_decompress_page( mach_read_from_2(buf+FIL_PAGE_TYPE), len); fflush(stderr); + if (return_error) { + goto error_return; + } ut_error; } @@ -511,6 +517,9 @@ fil_decompress_page( " actual size %lu compression %s.", actual_size, fil_get_compression_alg_name(compression_alg)); fflush(stderr); + if (return_error) { + goto error_return; + } ut_error; } @@ -542,6 +551,9 @@ fil_decompress_page( fflush(stderr); + if (return_error) { + goto error_return; + } ut_error; } break; @@ -558,6 +570,9 @@ fil_decompress_page( err, actual_size, len); fflush(stderr); + if (return_error) { + goto error_return; + } ut_error; } break; @@ -576,6 +591,9 @@ fil_decompress_page( olen, actual_size, len); fflush(stderr); + if (return_error) { + goto error_return; + } ut_error; } break; @@ -609,6 +627,9 @@ fil_decompress_page( dst_pos, actual_size, len); fflush(stderr); + if (return_error) { + goto error_return; + } ut_error; } @@ -635,6 +656,9 @@ fil_decompress_page( dst_pos, actual_size, len, err); fflush(stderr); + if (return_error) { + goto error_return; + } ut_error; } break; @@ -660,6 +684,9 @@ fil_decompress_page( olen, actual_size, len, (int)cstatus); fflush(stderr); + if (return_error) { + goto error_return; + } ut_error; } break; @@ -673,6 +700,9 @@ fil_decompress_page( ,fil_get_compression_alg_name(compression_alg)); fflush(stderr); + if (return_error) { + goto error_return; + } ut_error; break; } @@ -683,6 +713,7 @@ fil_decompress_page( really any other options. */ memcpy(buf, in_buf, len); +error_return: // Need to free temporal buffer if no buffer was given if (page_buf == NULL) { ut_free(in_buf); diff --git a/storage/xtradb/fts/fts0fts.cc b/storage/xtradb/fts/fts0fts.cc index 712dfa264d4..a2e8a3d90ac 100644 --- a/storage/xtradb/fts/fts0fts.cc +++ b/storage/xtradb/fts/fts0fts.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2011, 2015, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2011, 2016, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -260,16 +260,18 @@ static const char* fts_config_table_insert_values_sql = "INSERT INTO \"%s\" VALUES ('" FTS_TABLE_STATE "', '0');\n"; -/****************************************************************//** -Run SYNC on the table, i.e., write out data from the cache to the +/** Run SYNC on the table, i.e., write out data from the cache to the FTS auxiliary INDEX table and clear the cache at the end. -@return DB_SUCCESS if all OK */ +@param[in,out] sync sync state +@param[in] unlock_cache whether unlock cache lock when write node +@param[in] wait whether wait when a sync is in progress +@return DB_SUCCESS if all OK */ static dberr_t fts_sync( -/*=====*/ - fts_sync_t* sync) /*!< in: sync state */ - __attribute__((nonnull)); + fts_sync_t* sync, + bool unlock_cache, + bool wait); /****************************************************************//** Release all resources help by the words rb tree e.g., the node ilist. */ @@ -653,6 +655,7 @@ fts_cache_create( mem_heap_zalloc(heap, sizeof(fts_sync_t))); cache->sync->table = table; + cache->sync->event = os_event_create(); /* Create the index cache vector that will hold the inverted indexes. */ cache->indexes = ib_vector_create( @@ -1207,6 +1210,7 @@ fts_cache_destroy( mutex_free(&cache->optimize_lock); mutex_free(&cache->deleted_lock); mutex_free(&cache->doc_id_lock); + os_event_free(cache->sync->event); if (cache->stopword_info.cached_stopword) { rbt_free(cache->stopword_info.cached_stopword); @@ -1435,7 +1439,7 @@ fts_cache_add_doc( ib_vector_last(word->nodes)); } - if (fts_node == NULL + if (fts_node == NULL || fts_node->synced || fts_node->ilist_size > FTS_ILIST_MAX_SIZE || doc_id < fts_node->last_doc_id) { @@ -1915,7 +1919,7 @@ func_exit: trx_rollback_to_savepoint(trx, NULL); - row_drop_table_for_mysql(table->name, trx, FALSE); + row_drop_table_for_mysql(table->name, trx, FALSE, TRUE); trx->error_state = DB_SUCCESS; } @@ -2067,7 +2071,7 @@ fts_create_index_tables_low( trx_rollback_to_savepoint(trx, NULL); - row_drop_table_for_mysql(table_name, trx, FALSE); + row_drop_table_for_mysql(table_name, trx, FALSE, TRUE); trx->error_state = DB_SUCCESS; } @@ -2886,35 +2890,28 @@ fts_doc_ids_free( } /*********************************************************************//** -Do commit-phase steps necessary for the insertion of a new row. -@return DB_SUCCESS or error code */ -static __attribute__((nonnull, warn_unused_result)) -dberr_t +Do commit-phase steps necessary for the insertion of a new row. */ +void fts_add( /*====*/ fts_trx_table_t*ftt, /*!< in: FTS trx table */ fts_trx_row_t* row) /*!< in: row */ { dict_table_t* table = ftt->table; - dberr_t error = DB_SUCCESS; doc_id_t doc_id = row->doc_id; ut_a(row->state == FTS_INSERT || row->state == FTS_MODIFY); fts_add_doc_by_id(ftt, doc_id, row->fts_indexes); - if (error == DB_SUCCESS) { - mutex_enter(&table->fts->cache->deleted_lock); - ++table->fts->cache->added; - mutex_exit(&table->fts->cache->deleted_lock); + mutex_enter(&table->fts->cache->deleted_lock); + ++table->fts->cache->added; + mutex_exit(&table->fts->cache->deleted_lock); - if (!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID) - && doc_id >= table->fts->cache->next_doc_id) { - table->fts->cache->next_doc_id = doc_id + 1; - } + if (!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID) + && doc_id >= table->fts->cache->next_doc_id) { + table->fts->cache->next_doc_id = doc_id + 1; } - - return(error); } /*********************************************************************//** @@ -3025,7 +3022,7 @@ fts_modify( error = fts_delete(ftt, row); if (error == DB_SUCCESS) { - error = fts_add(ftt, row); + fts_add(ftt, row); } return(error); @@ -3114,7 +3111,7 @@ fts_commit_table( switch (row->state) { case FTS_INSERT: - error = fts_add(ftt, row); + fts_add(ftt, row); break; case FTS_MODIFY: @@ -3554,16 +3551,34 @@ fts_add_doc_by_id( get_doc->index_cache, doc_id, doc.tokens); + bool need_sync = false; + if ((cache->total_size > fts_max_cache_size / 10 + || fts_need_sync) + && !cache->sync->in_progress) { + need_sync = true; + } + rw_lock_x_unlock(&table->fts->cache->lock); DBUG_EXECUTE_IF( "fts_instrument_sync", - fts_sync(cache->sync); + fts_optimize_request_sync_table(table); + os_event_wait(cache->sync->event); + ); + + DBUG_EXECUTE_IF( + "fts_instrument_sync_debug", + fts_sync(cache->sync, true, true); ); - if (cache->total_size > fts_max_cache_size - || fts_need_sync) { - fts_sync(cache->sync); + DEBUG_SYNC_C("fts_instrument_sync_request"); + DBUG_EXECUTE_IF( + "fts_instrument_sync_request", + fts_optimize_request_sync_table(table); + ); + + if (need_sync) { + fts_optimize_request_sync_table(table); } mtr_start(&mtr); @@ -3934,16 +3949,17 @@ fts_sync_add_deleted_cache( return(error); } -/*********************************************************************//** -Write the words and ilist to disk. +/** Write the words and ilist to disk. +@param[in,out] trx transaction +@param[in] index_cache index cache +@param[in] unlock_cache whether unlock cache when write node @return DB_SUCCESS if all went well else error code */ static __attribute__((nonnull, warn_unused_result)) dberr_t fts_sync_write_words( -/*=================*/ - trx_t* trx, /*!< in: transaction */ - fts_index_cache_t* - index_cache) /*!< in: index cache */ + trx_t* trx, + fts_index_cache_t* index_cache, + bool unlock_cache) { fts_table_t fts_table; ulint n_nodes = 0; @@ -3951,8 +3967,8 @@ fts_sync_write_words( const ib_rbt_node_t* rbt_node; dberr_t error = DB_SUCCESS; ibool print_error = FALSE; -#ifdef FTS_DOC_STATS_DEBUG dict_table_t* table = index_cache->index->table; +#ifdef FTS_DOC_STATS_DEBUG ulint n_new_words = 0; #endif /* FTS_DOC_STATS_DEBUG */ @@ -3965,7 +3981,7 @@ fts_sync_write_words( since we want to free the memory used during caching. */ for (rbt_node = rbt_first(index_cache->words); rbt_node; - rbt_node = rbt_first(index_cache->words)) { + rbt_node = rbt_next(index_cache->words, rbt_node)) { ulint i; ulint selected; @@ -3998,27 +4014,47 @@ fts_sync_write_words( } #endif /* FTS_DOC_STATS_DEBUG */ - n_nodes += ib_vector_size(word->nodes); - - /* We iterate over all the nodes even if there was an error, - this is to free the memory of the fts_node_t elements. */ + /* We iterate over all the nodes even if there was an error */ for (i = 0; i < ib_vector_size(word->nodes); ++i) { fts_node_t* fts_node = static_cast<fts_node_t*>( ib_vector_get(word->nodes, i)); + if (fts_node->synced) { + continue; + } else { + fts_node->synced = true; + } + + /*FIXME: we need to handle the error properly. */ if (error == DB_SUCCESS) { + if (unlock_cache) { + rw_lock_x_unlock( + &table->fts->cache->lock); + } error = fts_write_node( trx, &index_cache->ins_graph[selected], &fts_table, &word->text, fts_node); - } - ut_free(fts_node->ilist); - fts_node->ilist = NULL; + DEBUG_SYNC_C("fts_write_node"); + DBUG_EXECUTE_IF("fts_write_node_crash", + DBUG_SUICIDE();); + + DBUG_EXECUTE_IF("fts_instrument_sync_sleep", + os_thread_sleep(1000000); + ); + + if (unlock_cache) { + rw_lock_x_lock( + &table->fts->cache->lock); + } + } } + n_nodes += ib_vector_size(word->nodes); + if (error != DB_SUCCESS && !print_error) { ut_print_timestamp(stderr); fprintf(stderr, " InnoDB: Error (%s) writing " @@ -4027,9 +4063,6 @@ fts_sync_write_words( print_error = TRUE; } - - /* NOTE: We are responsible for free'ing the node */ - ut_free(rbt_remove_node(index_cache->words, rbt_node)); } #ifdef FTS_DOC_STATS_DEBUG @@ -4330,7 +4363,7 @@ fts_sync_index( ut_ad(rbt_validate(index_cache->words)); - error = fts_sync_write_words(trx, index_cache); + error = fts_sync_write_words(sync->trx, index_cache, sync->unlock_cache); #ifdef FTS_DOC_STATS_DEBUG /* FTS_RESOLVE: the word counter info in auxiliary table "DOC_ID" @@ -4346,6 +4379,36 @@ fts_sync_index( return(error); } +/** Check if index cache has been synced completely +@param[in,out] sync sync state +@param[in,out] index_cache index cache +@return true if index is synced, otherwise false. */ +static +bool +fts_sync_index_check( + fts_sync_t* sync, + fts_index_cache_t* index_cache) +{ + const ib_rbt_node_t* rbt_node; + + for (rbt_node = rbt_first(index_cache->words); + rbt_node != NULL; + rbt_node = rbt_next(index_cache->words, rbt_node)) { + + fts_tokenizer_word_t* word; + word = rbt_value(fts_tokenizer_word_t, rbt_node); + + fts_node_t* fts_node; + fts_node = static_cast<fts_node_t*>(ib_vector_last(word->nodes)); + + if (!fts_node->synced) { + return(false); + } + } + + return(true); +} + /*********************************************************************//** Commit the SYNC, change state of processed doc ids etc. @return DB_SUCCESS if all OK */ @@ -4422,21 +4485,53 @@ fts_sync_rollback( trx_t* trx = sync->trx; fts_cache_t* cache = sync->table->fts->cache; + for (ulint i = 0; i < ib_vector_size(cache->indexes); ++i) { + ulint j; + fts_index_cache_t* index_cache; + + index_cache = static_cast<fts_index_cache_t*>( + ib_vector_get(cache->indexes, i)); + + for (j = 0; fts_index_selector[j].value; ++j) { + + if (index_cache->ins_graph[j] != NULL) { + + fts_que_graph_free_check_lock( + NULL, index_cache, + index_cache->ins_graph[j]); + + index_cache->ins_graph[j] = NULL; + } + + if (index_cache->sel_graph[j] != NULL) { + + fts_que_graph_free_check_lock( + NULL, index_cache, + index_cache->sel_graph[j]); + + index_cache->sel_graph[j] = NULL; + } + } + } + rw_lock_x_unlock(&cache->lock); fts_sql_rollback(trx); trx_free_for_background(trx); } -/****************************************************************//** -Run SYNC on the table, i.e., write out data from the cache to the +/** Run SYNC on the table, i.e., write out data from the cache to the FTS auxiliary INDEX table and clear the cache at the end. +@param[in,out] sync sync state +@param[in] unlock_cache whether unlock cache lock when write node +@param[in] wait whether wait when a sync is in progress @return DB_SUCCESS if all OK */ static dberr_t fts_sync( -/*=====*/ - fts_sync_t* sync) /*!< in: sync state */ + fts_sync_t* sync, + bool unlock_cache, + bool wait) { ulint i; dberr_t error = DB_SUCCESS; @@ -4444,8 +4539,35 @@ fts_sync( rw_lock_x_lock(&cache->lock); + /* Check if cache is being synced. + Note: we release cache lock in fts_sync_write_words() to + avoid long wait for the lock by other threads. */ + while (sync->in_progress) { + rw_lock_x_unlock(&cache->lock); + + if (wait) { + os_event_wait(sync->event); + } else { + return(DB_SUCCESS); + } + + rw_lock_x_lock(&cache->lock); + } + + sync->unlock_cache = unlock_cache; + sync->in_progress = true; + + DEBUG_SYNC_C("fts_sync_begin"); fts_sync_begin(sync); +begin_sync: + if (cache->total_size > fts_max_cache_size) { + /* Avoid the case: sync never finish when + insert/update keeps comming. */ + ut_ad(sync->unlock_cache); + sync->unlock_cache = false; + } + for (i = 0; i < ib_vector_size(cache->indexes); ++i) { fts_index_cache_t* index_cache; @@ -4460,21 +4582,43 @@ fts_sync( if (error != DB_SUCCESS && !sync->interrupted) { - break; + goto end_sync; } } DBUG_EXECUTE_IF("fts_instrument_sync_interrupted", sync->interrupted = true; error = DB_INTERRUPTED; + goto end_sync; ); + /* Make sure all the caches are synced. */ + for (i = 0; i < ib_vector_size(cache->indexes); ++i) { + fts_index_cache_t* index_cache; + + index_cache = static_cast<fts_index_cache_t*>( + ib_vector_get(cache->indexes, i)); + + if (index_cache->index->to_be_dropped + || fts_sync_index_check(sync, index_cache)) { + continue; + } + + goto begin_sync; + } + +end_sync: if (error == DB_SUCCESS && !sync->interrupted) { error = fts_sync_commit(sync); } else { fts_sync_rollback(sync); } + rw_lock_x_lock(&cache->lock); + sync->in_progress = false; + os_event_set(sync->event); + rw_lock_x_unlock(&cache->lock); + /* We need to check whether an optimize is required, for that we make copies of the two variables that control the trigger. These variables can change behind our back and we don't want to hold the @@ -4489,21 +4633,25 @@ fts_sync( return(error); } -/****************************************************************//** -Run SYNC on the table, i.e., write out data from the cache to the -FTS auxiliary INDEX table and clear the cache at the end. */ +/** Run SYNC on the table, i.e., write out data from the cache to the +FTS auxiliary INDEX table and clear the cache at the end. +@param[in,out] table fts table +@param[in] unlock_cache whether unlock cache when write node +@param[in] wait whether wait for existing sync to finish +@return DB_SUCCESS on success, error code on failure. */ UNIV_INTERN dberr_t fts_sync_table( -/*===========*/ - dict_table_t* table) /*!< in: table */ + dict_table_t* table, + bool unlock_cache, + bool wait) { dberr_t err = DB_SUCCESS; ut_ad(table->fts); if (!dict_table_is_discarded(table) && table->fts->cache) { - err = fts_sync(table->fts->cache->sync); + err = fts_sync(table->fts->cache->sync, unlock_cache, wait); } return(err); diff --git a/storage/xtradb/fts/fts0opt.cc b/storage/xtradb/fts/fts0opt.cc index 0703b050848..ccb7090c61d 100644 --- a/storage/xtradb/fts/fts0opt.cc +++ b/storage/xtradb/fts/fts0opt.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2007, 2015, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2007, 2016, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -87,6 +87,7 @@ enum fts_msg_type_t { FTS_MSG_DEL_TABLE, /*!< Remove a table from the optimize threads work queue */ + FTS_MSG_SYNC_TABLE /*!< Sync fts cache of a table */ }; /** Compressed list of words that have been read from FTS INDEX @@ -580,7 +581,7 @@ fts_zip_read_word( #ifdef UNIV_DEBUG ulint i; #endif - byte len = 0; + short len = 0; void* null = NULL; byte* ptr = word->f_str; int flush = Z_NO_FLUSH; @@ -590,7 +591,7 @@ fts_zip_read_word( return(NULL); } - zip->zp->next_out = &len; + zip->zp->next_out = reinterpret_cast<byte*>(&len); zip->zp->avail_out = sizeof(len); while (zip->status == Z_OK && zip->zp->avail_out > 0) { @@ -688,11 +689,12 @@ fts_fetch_index_words( fts_zip_t* zip = static_cast<fts_zip_t*>(user_arg); que_node_t* exp = sel_node->select_list; dfield_t* dfield = que_node_get_val(exp); - byte len = (byte) dfield_get_len(dfield); + short len = static_cast<short>(dfield_get_len(dfield)); void* data = dfield_get_data(dfield); /* Skip the duplicate words. */ - if (zip->word.f_len == len && !memcmp(zip->word.f_str, data, len)) { + if (zip->word.f_len == static_cast<ulint>(len) + && !memcmp(zip->word.f_str, data, len)) { return(TRUE); } @@ -706,7 +708,7 @@ fts_fetch_index_words( ut_a(zip->zp->next_in == NULL); /* The string is prefixed by len. */ - zip->zp->next_in = &len; + zip->zp->next_in = reinterpret_cast<byte*>(&len); zip->zp->avail_in = sizeof(len); /* Compress the word, create output blocks as necessary. */ @@ -2651,6 +2653,39 @@ fts_optimize_remove_table( os_event_free(event); } +/** Send sync fts cache for the table. +@param[in] table table to sync */ +UNIV_INTERN +void +fts_optimize_request_sync_table( + dict_table_t* table) +{ + fts_msg_t* msg; + table_id_t* table_id; + + /* if the optimize system not yet initialized, return */ + if (!fts_optimize_wq) { + return; + } + + /* FTS optimizer thread is already exited */ + if (fts_opt_start_shutdown) { + ib_logf(IB_LOG_LEVEL_INFO, + "Try to sync table %s after FTS optimize" + " thread exiting.", table->name); + return; + } + + msg = fts_optimize_create_msg(FTS_MSG_SYNC_TABLE, NULL); + + table_id = static_cast<table_id_t*>( + mem_heap_alloc(msg->heap, sizeof(table_id_t))); + *table_id = table->id; + msg->ptr = table_id; + + ib_wqueue_add(fts_optimize_wq, msg, msg->heap); +} + /**********************************************************************//** Find the slot for a particular table. @return slot if found else NULL. */ @@ -2931,6 +2966,25 @@ fts_optimize_need_sync( } #endif +/** Sync fts cache of a table +@param[in] table_id table id */ +void +fts_optimize_sync_table( + table_id_t table_id) +{ + dict_table_t* table = NULL; + + table = dict_table_open_on_id(table_id, FALSE, DICT_TABLE_OP_NORMAL); + + if (table) { + if (dict_table_has_fts_index(table) && table->fts->cache) { + fts_sync_table(table, true, false); + } + + dict_table_close(table, FALSE, FALSE); + } +} + /**********************************************************************//** Optimize all FTS tables. @return Dummy return */ @@ -3052,6 +3106,11 @@ fts_optimize_thread( ((fts_msg_del_t*) msg->ptr)->event); break; + case FTS_MSG_SYNC_TABLE: + fts_optimize_sync_table( + *static_cast<table_id_t*>(msg->ptr)); + break; + default: ut_error; } @@ -3078,26 +3137,7 @@ fts_optimize_thread( ib_vector_get(tables, i)); if (slot->state != FTS_STATE_EMPTY) { - dict_table_t* table = NULL; - - /*slot->table may be freed, so we try to open - table by slot->table_id.*/ - table = dict_table_open_on_id( - slot->table_id, FALSE, - DICT_TABLE_OP_NORMAL); - - if (table) { - - if (dict_table_has_fts_index(table)) { - fts_sync_table(table); - } - - if (table->fts) { - fts_free(table); - } - - dict_table_close(table, FALSE, FALSE); - } + fts_optimize_sync_table(slot->table_id); } } } diff --git a/storage/xtradb/ha/ha0ha.cc b/storage/xtradb/ha/ha0ha.cc index b79ae922045..3674260f173 100644 --- a/storage/xtradb/ha/ha0ha.cc +++ b/storage/xtradb/ha/ha0ha.cc @@ -155,11 +155,15 @@ ha_clear( switch (table->type) { case HASH_TABLE_SYNC_MUTEX: + for (ulint i = 0; i < table->n_sync_obj; i++) + mutex_free(table->sync_obj.mutexes + i); mem_free(table->sync_obj.mutexes); table->sync_obj.mutexes = NULL; break; case HASH_TABLE_SYNC_RW_LOCK: + for (ulint i = 0; i < table->n_sync_obj; i++) + rw_lock_free(table->sync_obj.rw_locks + i); mem_free(table->sync_obj.rw_locks); table->sync_obj.rw_locks = NULL; break; diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc index 3a5e98fcb32..2c71488c18b 100644 --- a/storage/xtradb/handler/ha_innodb.cc +++ b/storage/xtradb/handler/ha_innodb.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2000, 2015, Oracle and/or its affiliates. +Copyright (c) 2000, 2016, Oracle and/or its affiliates. Copyright (c) 2013, 2016, MariaDB Corporation. Copyright (c) 2008, 2009 Google Inc. Copyright (c) 2009, Percona Inc. @@ -617,7 +617,6 @@ ib_cb_t innodb_api_cb[] = { (ib_cb_t) ib_trx_read_only }; - static void innodb_remember_check_sysvar_funcs(); mysql_var_check_func check_sysvar_enum; @@ -652,6 +651,67 @@ ha_create_table_option innodb_table_option_list[]= HA_TOPTION_END }; +/** + Test a file path whether it is same as mysql data directory path. + + @param path null terminated character string + + @return + @retval TRUE The path is different from mysql data directory. + @retval FALSE The path is same as mysql data directory. +*/ +static bool is_mysql_datadir_path(const char *path) +{ + if (path == NULL) + return false; + + char mysql_data_dir[FN_REFLEN], path_dir[FN_REFLEN]; + convert_dirname(path_dir, path, NullS); + convert_dirname(mysql_data_dir, mysql_unpacked_real_data_home, NullS); + size_t mysql_data_home_len= dirname_length(mysql_data_dir); + size_t path_len = dirname_length(path_dir); + + if (path_len < mysql_data_home_len) + return true; + + if (!lower_case_file_system) + return(memcmp(mysql_data_dir, path_dir, mysql_data_home_len)); + + return(files_charset_info->coll->strnncoll(files_charset_info, + (uchar *) path_dir, path_len, + (uchar *) mysql_data_dir, + mysql_data_home_len, + TRUE)); +} + + +static int mysql_tmpfile_path(const char *path, const char *prefix) +{ + DBUG_ASSERT(path != NULL); + DBUG_ASSERT((strlen(path) + strlen(prefix)) <= FN_REFLEN); + + char filename[FN_REFLEN]; + File fd = create_temp_file(filename, path, prefix, +#ifdef __WIN__ + O_BINARY | O_TRUNC | O_SEQUENTIAL | + O_SHORT_LIVED | +#endif /* __WIN__ */ + O_CREAT | O_EXCL | O_RDWR | O_TEMPORARY, + MYF(MY_WME)); + if (fd >= 0) { +#ifndef __WIN__ + /* + This can be removed once the following bug is fixed: + Bug #28903 create_temp_file() doesn't honor O_TEMPORARY option + (file not removed) (Unix) + */ + unlink(filename); +#endif /* !__WIN__ */ + } + + return fd; +} + /*************************************************************//** Check whether valid argument given to innodb_ft_*_stopword_table. This function is registered as a callback with MySQL. @@ -667,6 +727,108 @@ innodb_stopword_table_validate( for update function */ struct st_mysql_value* value); /*!< in: incoming string */ +/** Validate passed-in "value" is a valid directory name. +This function is registered as a callback with MySQL. +@param[in,out] thd thread handle +@param[in] var pointer to system variable +@param[out] save immediate result for update +@param[in] value incoming string +@return 0 for valid name */ +static +int +innodb_tmpdir_validate( + THD* thd, + struct st_mysql_sys_var* var, + void* save, + struct st_mysql_value* value) +{ + + char* alter_tmp_dir; + char* innodb_tmp_dir; + char buff[OS_FILE_MAX_PATH]; + int len = sizeof(buff); + char tmp_abs_path[FN_REFLEN + 2]; + + ut_ad(save != NULL); + ut_ad(value != NULL); + + if (check_global_access(thd, FILE_ACL)) { + push_warning_printf( + thd, Sql_condition::WARN_LEVEL_WARN, + ER_WRONG_ARGUMENTS, + "InnoDB: FILE Permissions required"); + *static_cast<const char**>(save) = NULL; + return(1); + } + + alter_tmp_dir = (char*) value->val_str(value, buff, &len); + + if (!alter_tmp_dir) { + *static_cast<const char**>(save) = alter_tmp_dir; + return(0); + } + + if (strlen(alter_tmp_dir) > FN_REFLEN) { + push_warning_printf( + thd, Sql_condition::WARN_LEVEL_WARN, + ER_WRONG_ARGUMENTS, + "Path length should not exceed %d bytes", FN_REFLEN); + *static_cast<const char**>(save) = NULL; + return(1); + } + + my_realpath(tmp_abs_path, alter_tmp_dir, 0); + size_t tmp_abs_len = strlen(tmp_abs_path); + + if (my_access(tmp_abs_path, F_OK)) { + + push_warning_printf( + thd, Sql_condition::WARN_LEVEL_WARN, + ER_WRONG_ARGUMENTS, + "InnoDB: Path doesn't exist."); + *static_cast<const char**>(save) = NULL; + return(1); + } else if (my_access(tmp_abs_path, R_OK | W_OK)) { + push_warning_printf( + thd, Sql_condition::WARN_LEVEL_WARN, + ER_WRONG_ARGUMENTS, + "InnoDB: Server doesn't have permission in " + "the given location."); + *static_cast<const char**>(save) = NULL; + return(1); + } + + MY_STAT stat_info_dir; + + if (my_stat(tmp_abs_path, &stat_info_dir, MYF(0))) { + if ((stat_info_dir.st_mode & S_IFDIR) != S_IFDIR) { + + push_warning_printf( + thd, Sql_condition::WARN_LEVEL_WARN, + ER_WRONG_ARGUMENTS, + "Given path is not a directory. "); + *static_cast<const char**>(save) = NULL; + return(1); + } + } + + if (!is_mysql_datadir_path(tmp_abs_path)) { + + push_warning_printf( + thd, Sql_condition::WARN_LEVEL_WARN, + ER_WRONG_ARGUMENTS, + "InnoDB: Path Location should not be same as " + "mysql data directory location."); + *static_cast<const char**>(save) = NULL; + return(1); + } + + innodb_tmp_dir = static_cast<char*>( + thd_memdup(thd, tmp_abs_path, tmp_abs_len + 1)); + *static_cast<const char**>(save) = innodb_tmp_dir; + return(0); +} + /** "GEN_CLUST_INDEX" is the name reserved for InnoDB default system clustered index when there is no primary key. */ const char innobase_index_reserve_name[] = "GEN_CLUST_INDEX"; @@ -796,6 +958,11 @@ static MYSQL_THDVAR_BOOL(fake_changes, PLUGIN_VAR_OPCMDARG, "This is to cause replication prefetch IO. ATTENTION: the transaction started after enabled is affected.", NULL, NULL, FALSE); +static MYSQL_THDVAR_STR(tmpdir, + PLUGIN_VAR_OPCMDARG|PLUGIN_VAR_MEMALLOC, + "Directory for temporary non-tablespace files.", + innodb_tmpdir_validate, NULL, NULL); + static ibool innodb_have_lzo=IF_LZO(1, 0); static ibool innodb_have_lz4=IF_LZ4(1, 0); static ibool innodb_have_lzma=IF_LZMA(1, 0); @@ -1492,6 +1659,28 @@ normalize_table_name_low( ibool set_lower_case); /* in: TRUE if we want to set name to lower case */ +/*************************************************************//** +Checks if buffer pool is big enough to enable backoff algorithm. +InnoDB empty free list algorithm backoff requires free pages +from LRU for the best performance. +buf_LRU_buf_pool_running_out cancels query if 1/4 of +buffer pool belongs to LRU or freelist. +At the same time buf_flush_LRU_list_batch +keeps up to BUF_LRU_MIN_LEN in LRU. +In order to avoid deadlock baclkoff requires buffer pool +to be at least 4*BUF_LRU_MIN_LEN, +but flush peformance is bad because of trashing +and additional BUF_LRU_MIN_LEN pages are requested. +@return true if it's possible to enable backoff. */ +static +bool +innodb_empty_free_list_algorithm_backoff_allowed( + srv_empty_free_list_t + algorithm, /*!< in: desired algorithm + from srv_empty_free_list_t */ + long long buf_pool_pages); /*!< in: total number + of pages inside buffer pool */ + #ifdef NOT_USED /*************************************************************//** Removes old archived transaction log files. @@ -1780,6 +1969,26 @@ thd_supports_xa( return(THDVAR(thd, support_xa)); } +/** Get the value of innodb_tmpdir. +@param[in] thd thread handle, or NULL to query + the global innodb_tmpdir. +@retval NULL if innodb_tmpdir="" */ +UNIV_INTERN +const char* +thd_innodb_tmpdir( + THD* thd) +{ +#ifdef UNIV_SYNC_DEBUG + ut_ad(!sync_thread_levels_nonempty_trx(false)); +#endif /* UNIV_SYNC_DEBUG */ + + const char* tmp_dir = THDVAR(thd, tmpdir); + if (tmp_dir != NULL && *tmp_dir == '\0') { + tmp_dir = NULL; + } + + return(tmp_dir); +} /******************************************************************//** Check the status of fake changes mode (innodb_fake_changes) @return true if fake change mode is enabled. */ @@ -2360,13 +2569,14 @@ innobase_get_lower_case_table_names(void) return(lower_case_table_names); } -/*********************************************************************//** -Creates a temporary file. +/** Create a temporary file in the location specified by the parameter +path. If the path is null, then it will be created in tmpdir. +@param[in] path location for creating temporary file @return temporary file descriptor, or < 0 on error */ UNIV_INTERN int -innobase_mysql_tmpfile(void) -/*========================*/ +innobase_mysql_tmpfile( + const char* path) { #ifdef WITH_INNODB_DISALLOW_WRITES os_event_wait(srv_allow_writes_event); @@ -2379,7 +2589,11 @@ innobase_mysql_tmpfile(void) return(-1); ); - fd = mysql_tmpfile("ib"); + if (path == NULL) { + fd = mysql_tmpfile("ib"); + } else { + fd = mysql_tmpfile_path(path, "ib"); + } if (fd >= 0) { /* Copy the file descriptor, so that the additional resources @@ -2922,7 +3136,7 @@ ha_innobase::ha_innobase( :handler(hton, table_arg), int_table_flags(HA_REC_NOT_IN_SEQ | HA_NULL_IN_KEY | HA_CAN_VIRTUAL_COLUMNS | - HA_CAN_INDEX_BLOBS | + HA_CAN_INDEX_BLOBS | HA_CONCURRENT_OPTIMIZE | HA_CAN_SQL_HANDLER | HA_PRIMARY_KEY_REQUIRED_FOR_POSITION | HA_PRIMARY_KEY_IN_READ_INDEX | @@ -3408,6 +3622,13 @@ ha_innobase::reset_template(void) ut_ad(prebuilt->magic_n == ROW_PREBUILT_ALLOCATED); ut_ad(prebuilt->magic_n2 == prebuilt->magic_n); + /* Force table to be freed in close_thread_table(). */ + DBUG_EXECUTE_IF("free_table_in_fts_query", + if (prebuilt->in_fts_query) { + table->m_needs_reopen = true; + } + ); + prebuilt->keep_other_fields_on_keyread = 0; prebuilt->read_just_key = 0; prebuilt->in_fts_query = 0; @@ -4035,6 +4256,16 @@ innobase_change_buffering_inited_ok: innobase_open_files = tc_size; } } + + if (innobase_open_files > (long) open_files_limit) { + fprintf(stderr, + "innodb_open_files should not be greater" + " than the open_files_limit.\n"); + if (innobase_open_files > (long) tc_size) { + innobase_open_files = tc_size; + } + } + srv_max_n_open_files = (ulint) innobase_open_files; srv_innodb_status = (ibool) innobase_create_status_file; @@ -4076,6 +4307,22 @@ innobase_change_buffering_inited_ok: #ifdef HAVE_POSIX_FALLOCATE srv_use_posix_fallocate = (ibool) innobase_use_fallocate; #endif + + /* Do not enable backoff algorithm for small buffer pool. */ + if (!innodb_empty_free_list_algorithm_backoff_allowed( + static_cast<srv_empty_free_list_t>( + srv_empty_free_list_algorithm), + innobase_buffer_pool_size / srv_page_size)) { + sql_print_information( + "InnoDB: innodb_empty_free_list_algorithm " + "has been changed to legacy " + "because of small buffer pool size. " + "In order to use backoff, " + "increase buffer pool at least up to 20MB.\n"); + srv_empty_free_list_algorithm + = SRV_EMPTY_FREE_LIST_LEGACY; + } + srv_use_atomic_writes = (ibool) innobase_use_atomic_writes; if (innobase_use_atomic_writes) { ib_logf(IB_LOG_LEVEL_INFO, "using atomic writes."); @@ -5718,7 +5965,7 @@ building based on the assumption that there is no concurrent index creation/drop and DMLs that requires index lookup. All table handle will be closed before the index creation/drop. @return TRUE if index translation table built successfully */ -static +UNIV_INTERN ibool innobase_build_index_translation( /*=============================*/ @@ -6270,20 +6517,14 @@ table_opened: prebuilt->clust_index_was_generated = FALSE; if (UNIV_UNLIKELY(primary_key >= MAX_KEY)) { - sql_print_error("Table %s has a primary key in " - "InnoDB data dictionary, but not " - "in MySQL!", name); + ib_table->dict_frm_mismatch = DICT_FRM_NO_PK; /* This mismatch could cause further problems if not attended, bring this to the user's attention by printing a warning in addition to log a message in the errorlog */ - push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, - ER_NO_SUCH_INDEX, - "InnoDB: Table %s has a " - "primary key in InnoDB data " - "dictionary, but not in " - "MySQL!", name); + + ib_push_frm_error(thd, ib_table, table, 0, true); /* If primary_key >= MAX_KEY, its (primary_key) value could be out of bound if continue to index @@ -6330,27 +6571,14 @@ table_opened: } } else { if (primary_key != MAX_KEY) { - sql_print_error( - "Table %s has no primary key in InnoDB data " - "dictionary, but has one in MySQL! If you " - "created the table with a MySQL version < " - "3.23.54 and did not define a primary key, " - "but defined a unique key with all non-NULL " - "columns, then MySQL internally treats that " - "key as the primary key. You can fix this " - "error by dump + DROP + CREATE + reimport " - "of the table.", name); + + ib_table->dict_frm_mismatch = DICT_NO_PK_FRM_HAS; /* This mismatch could cause further problems if not attended, bring this to the user attention by printing a warning in addition to log a message in the errorlog */ - push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, - ER_NO_SUCH_INDEX, - "InnoDB: Table %s has no " - "primary key in InnoDB data " - "dictionary, but has one in " - "MySQL!", name); + ib_push_frm_error(thd, ib_table, table, 0, true); } prebuilt->clust_index_was_generated = TRUE; @@ -12862,7 +13090,8 @@ ha_innobase::delete_table( /* Drop the table in InnoDB */ err = row_drop_table_for_mysql( - norm_name, trx, thd_sql_command(thd) == SQLCOM_DROP_DB); + norm_name, trx, thd_sql_command(thd) == SQLCOM_DROP_DB, + FALSE); if (err == DB_TABLE_NOT_FOUND @@ -12893,7 +13122,8 @@ ha_innobase::delete_table( #endif err = row_drop_table_for_mysql( par_case_name, trx, - thd_sql_command(thd) == SQLCOM_DROP_DB); + thd_sql_command(thd) == SQLCOM_DROP_DB, + FALSE); } } @@ -13960,12 +14190,8 @@ ha_innobase::info_low( } if (table->s->keys != num_innodb_index) { - sql_print_error("InnoDB: Table %s contains %lu " - "indexes inside InnoDB, which " - "is different from the number of " - "indexes %u defined in the MySQL ", - ib_table->name, num_innodb_index, - table->s->keys); + ib_table->dict_frm_mismatch = DICT_FRM_INCONSISTENT_KEYS; + ib_push_frm_error(user_thd, ib_table, table, num_innodb_index, true); } if (!(flag & HA_STATUS_NO_LOCK)) { @@ -13985,15 +14211,8 @@ ha_innobase::info_low( dict_index_t* index = innobase_get_index(i); if (index == NULL) { - sql_print_error("Table %s contains fewer " - "indexes inside InnoDB than " - "are defined in the MySQL " - ".frm file. Have you mixed up " - ".frm files from different " - "installations? See " - REFMAN - "innodb-troubleshooting.html\n", - ib_table->name); + ib_table->dict_frm_mismatch = DICT_FRM_INCONSISTENT_KEYS; + ib_push_frm_error(user_thd, ib_table, table, num_innodb_index, true); break; } @@ -14229,7 +14448,7 @@ ha_innobase::optimize( if (innodb_optimize_fulltext_only) { if (prebuilt->table->fts && prebuilt->table->fts->cache && !dict_table_is_discarded(prebuilt->table)) { - fts_sync_table(prebuilt->table); + fts_sync_table(prebuilt->table, false, true); fts_optimize_table(prebuilt->table); } return(HA_ADMIN_OK); @@ -14291,6 +14510,34 @@ ha_innobase::check( DBUG_RETURN(HA_ADMIN_CORRUPT); } + if (prebuilt->table->corrupted) { + char index_name[MAX_FULL_NAME_LEN + 1]; + /* If some previous operation has marked the table as + corrupted in memory, and has not propagated such to + clustered index, we will do so here */ + index = dict_table_get_first_index(prebuilt->table); + + if (!dict_index_is_corrupted(index)) { + row_mysql_lock_data_dictionary(prebuilt->trx); + dict_set_corrupted(index, prebuilt->trx, "CHECK TABLE"); + row_mysql_unlock_data_dictionary(prebuilt->trx); + } + + innobase_format_name(index_name, sizeof index_name, + index->name, TRUE); + + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + HA_ERR_INDEX_CORRUPT, + "InnoDB: Index %s is marked as" + " corrupted", index_name); + + /* Now that the table is already marked as corrupted, + there is no need to check any index of this table */ + prebuilt->trx->op_info = ""; + + DBUG_RETURN(HA_ADMIN_CORRUPT); + } + prebuilt->trx->op_info = "checking table"; old_isolation_level = prebuilt->trx->isolation_level; @@ -14374,6 +14621,15 @@ ha_innobase::check( prebuilt->index_usable = row_merge_is_index_usable( prebuilt->trx, prebuilt->index); + DBUG_EXECUTE_IF( + "dict_set_index_corrupted", + if (!dict_index_is_clust(index)) { + prebuilt->index_usable = FALSE; + row_mysql_lock_data_dictionary(prebuilt->trx); + dict_set_corrupted(index, prebuilt->trx, "dict_set_index_corrupted");; + row_mysql_unlock_data_dictionary(prebuilt->trx); + }); + if (UNIV_UNLIKELY(!prebuilt->index_usable)) { innobase_format_name( index_name, sizeof index_name, @@ -18762,8 +19018,6 @@ innobase_fts_close_ranking( { fts_result_t* result; - ((NEW_FT_INFO*) fts_hdl)->ft_prebuilt->in_fts_query = false; - result = ((NEW_FT_INFO*) fts_hdl)->ft_result; fts_query_free_result(result); @@ -19476,6 +19730,7 @@ static int innobase_wsrep_get_checkpoint(handlerton* hton, XID* xid) return 0; } + static void wsrep_fake_trx_id( /*==================*/ @@ -19491,6 +19746,88 @@ wsrep_fake_trx_id( #endif /* WITH_WSREP */ + +/*************************************************************//** +Empty free list algorithm. +Checks if buffer pool is big enough to enable backoff algorithm. +InnoDB empty free list algorithm backoff requires free pages +from LRU for the best performance. +buf_LRU_buf_pool_running_out cancels query if 1/4 of +buffer pool belongs to LRU or freelist. +At the same time buf_flush_LRU_list_batch +keeps up to BUF_LRU_MIN_LEN in LRU. +In order to avoid deadlock baclkoff requires buffer pool +to be at least 4*BUF_LRU_MIN_LEN, +but flush peformance is bad because of trashing +and additional BUF_LRU_MIN_LEN pages are requested. +@return true if it's possible to enable backoff. */ +static +bool +innodb_empty_free_list_algorithm_backoff_allowed( + srv_empty_free_list_t algorithm, /*!< in: desired algorithm + from srv_empty_free_list_t */ + long long buf_pool_pages) /*!< in: total number + of pages inside buffer pool */ +{ + return(buf_pool_pages >= BUF_LRU_MIN_LEN * (4 + 1) + || algorithm != SRV_EMPTY_FREE_LIST_BACKOFF); +} + +/*************************************************************//** +Empty free list algorithm. This function is registered as +a callback with MySQL. +@return 0 for valid algorithm */ +static +int +innodb_srv_empty_free_list_algorithm_validate( +/*===========================*/ + THD* thd, /*!< in: thread handle */ + struct st_mysql_sys_var* var, /*!< in: pointer to system + variable */ + void* save, /*!< out: immediate result + for update function */ + struct st_mysql_value* value) /*!< in: incoming string */ +{ + const char* algorithm_name; + char buff[STRING_BUFFER_USUAL_SIZE]; + int len = sizeof(buff); + ulint algo; + srv_empty_free_list_t algorithm; + + algorithm_name = value->val_str(value, buff, &len); + + if (!algorithm_name) { + return(1); + } + + for (algo = 0; algo < array_elements( + innodb_empty_free_list_algorithm_names + ) - 1; + algo++) { + if (!innobase_strcasecmp( + algorithm_name, + innodb_empty_free_list_algorithm_names[algo])) + break; + } + + if (algo == array_elements( innodb_empty_free_list_algorithm_names) - 1) + return(1); + + algorithm = static_cast<srv_empty_free_list_t>(algo); + if (!innodb_empty_free_list_algorithm_backoff_allowed( + algorithm, + innobase_buffer_pool_size / srv_page_size)) { + sql_print_warning( + "InnoDB: innodb_empty_free_list_algorithm " + "= 'backoff' requires at least" + " 20MB buffer pool.\n"); + return(1); + } + + *reinterpret_cast<ulong*>(save) = static_cast<ulong>(algorithm); + return(0); +} + /* plugin options */ static MYSQL_SYSVAR_ENUM(checksum_algorithm, srv_checksum_algorithm, @@ -20041,7 +20378,7 @@ static MYSQL_SYSVAR_ENUM(empty_free_list_algorithm, "The algorithm to use for empty free list handling. Allowed values: " "LEGACY: Original Oracle MySQL 5.6 handling with single page flushes; " "BACKOFF: (default) Wait until cleaner produces a free page.", - NULL, NULL, SRV_EMPTY_FREE_LIST_BACKOFF, + innodb_srv_empty_free_list_algorithm_validate, NULL, SRV_EMPTY_FREE_LIST_BACKOFF, &innodb_empty_free_list_algorithm_typelib); static MYSQL_SYSVAR_LONG(buffer_pool_instances, innobase_buffer_pool_instances, @@ -21081,6 +21418,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(corrupt_table_action), MYSQL_SYSVAR(fake_changes), MYSQL_SYSVAR(locking_fake_changes), + MYSQL_SYSVAR(tmpdir), MYSQL_SYSVAR(use_stacktrace), MYSQL_SYSVAR(force_primary_key), MYSQL_SYSVAR(fatal_semaphore_wait_threshold), @@ -21825,3 +22163,96 @@ ib_push_warning( my_free(buf); va_end(args); } + +/********************************************************************//** +Helper function to push frm mismatch error to error log and +if needed to sql-layer. */ +UNIV_INTERN +void +ib_push_frm_error( +/*==============*/ + THD* thd, /*!< in: MySQL thd */ + dict_table_t* ib_table, /*!< in: InnoDB table */ + TABLE* table, /*!< in: MySQL table */ + ulint n_keys, /*!< in: InnoDB #keys */ + bool push_warning) /*!< in: print warning ? */ +{ + switch (ib_table->dict_frm_mismatch) { + case DICT_FRM_NO_PK: + sql_print_error("Table %s has a primary key in " + "InnoDB data dictionary, but not " + "in MySQL!" + " Have you mixed up " + ".frm files from different " + "installations? See " + REFMAN + "innodb-troubleshooting.html\n", + ib_table->name); + + if (push_warning) { + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_NO_SUCH_INDEX, + "InnoDB: Table %s has a " + "primary key in InnoDB data " + "dictionary, but not in " + "MySQL!", ib_table->name); + } + break; + case DICT_NO_PK_FRM_HAS: + sql_print_error( + "Table %s has no primary key in InnoDB data " + "dictionary, but has one in MySQL! If you " + "created the table with a MySQL version < " + "3.23.54 and did not define a primary key, " + "but defined a unique key with all non-NULL " + "columns, then MySQL internally treats that " + "key as the primary key. You can fix this " + "error by dump + DROP + CREATE + reimport " + "of the table.", ib_table->name); + + if (push_warning) { + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_NO_SUCH_INDEX, + "InnoDB: Table %s has no " + "primary key in InnoDB data " + "dictionary, but has one in " + "MySQL!", + ib_table->name); + } + break; + + case DICT_FRM_INCONSISTENT_KEYS: + sql_print_error("InnoDB: Table %s contains %lu " + "indexes inside InnoDB, which " + "is different from the number of " + "indexes %u defined in the MySQL " + " Have you mixed up " + ".frm files from different " + "installations? See " + REFMAN + "innodb-troubleshooting.html\n", + ib_table->name, n_keys, + table->s->keys); + + if (push_warning) { + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_NO_SUCH_INDEX, + "InnoDB: Table %s contains %lu " + "indexes inside InnoDB, which " + "is different from the number of " + "indexes %u defined in the MySQL ", + ib_table->name, n_keys, + table->s->keys); + } + break; + + case DICT_FRM_CONSISTENT: + default: + sql_print_error("InnoDB: Table %s is consistent " + "on InnoDB data dictionary and MySQL " + " FRM file.", + ib_table->name); + ut_error; + break; + } +} diff --git a/storage/xtradb/handler/ha_innodb.h b/storage/xtradb/handler/ha_innodb.h index 2027a593140..ea4be1fb2e0 100644 --- a/storage/xtradb/handler/ha_innodb.h +++ b/storage/xtradb/handler/ha_innodb.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 2000, 2012, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2013, 2014, SkySQL Ab. All Rights Reserved. +Copyright (c) 2013, 2016, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -696,3 +696,39 @@ innobase_copy_frm_flags_from_table_share( /*=====================================*/ dict_table_t* innodb_table, /*!< in/out: InnoDB table */ const TABLE_SHARE* table_share); /*!< in: table share */ + +/*******************************************************************//** +This function builds a translation table in INNOBASE_SHARE +structure for fast index location with mysql array number from its +table->key_info structure. This also provides the necessary translation +between the key order in mysql key_info and Innodb ib_table->indexes if +they are not fully matched with each other. +Note we do not have any mutex protecting the translation table +building based on the assumption that there is no concurrent +index creation/drop and DMLs that requires index lookup. All table +handle will be closed before the index creation/drop. +@return TRUE if index translation table built successfully */ +UNIV_INTERN +ibool +innobase_build_index_translation( +/*=============================*/ + const TABLE* table, /*!< in: table in MySQL data + dictionary */ + dict_table_t* ib_table, /*!< in: table in Innodb data + dictionary */ + INNOBASE_SHARE* share); /*!< in/out: share structure + where index translation table + will be constructed in. */ + +/********************************************************************//** +Helper function to push frm mismatch error to error log and +if needed to sql-layer. */ +UNIV_INTERN +void +ib_push_frm_error( +/*==============*/ + THD* thd, /*!< in: MySQL thd */ + dict_table_t* ib_table, /*!< in: InnoDB table */ + TABLE* table, /*!< in: MySQL table */ + ulint n_keys, /*!< in: InnoDB #keys */ + bool push_warning); /*!< in: print warning ? */ diff --git a/storage/xtradb/handler/handler0alter.cc b/storage/xtradb/handler/handler0alter.cc index 76ae9517500..68d80d2bff0 100644 --- a/storage/xtradb/handler/handler0alter.cc +++ b/storage/xtradb/handler/handler0alter.cc @@ -463,6 +463,20 @@ ha_innobase::check_if_supported_inplace_alter( } } + ulint n_indexes = UT_LIST_GET_LEN((prebuilt->table)->indexes); + + /* If InnoDB dictionary and MySQL frm file are not consistent + use "Copy" method. */ + if (prebuilt->table->dict_frm_mismatch) { + + ha_alter_info->unsupported_reason = innobase_get_err_msg( + ER_NO_SUCH_INDEX); + ib_push_frm_error(user_thd, prebuilt->table, altered_table, + n_indexes, true); + + DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED); + } + /* We should be able to do the operation in-place. See if we can do it online (LOCK=NONE). */ bool online = true; @@ -2777,6 +2791,10 @@ prepare_inplace_alter_table_dict( ctx->num_to_add_index = ha_alter_info->index_add_count; + ut_ad(ctx->prebuilt->trx->mysql_thd != NULL); + const char* path = thd_innodb_tmpdir( + ctx->prebuilt->trx->mysql_thd); + index_defs = innobase_create_key_defs( ctx->heap, ha_alter_info, altered_table, ctx->num_to_add_index, num_fts_index, @@ -3130,8 +3148,10 @@ prepare_inplace_alter_table_dict( error = DB_OUT_OF_MEMORY; goto error_handling;); rw_lock_x_lock(&ctx->add_index[a]->lock); + bool ok = row_log_allocate(ctx->add_index[a], - NULL, true, NULL, NULL); + NULL, true, NULL, + NULL, path); rw_lock_x_unlock(&ctx->add_index[a]->lock); if (!ok) { @@ -3157,7 +3177,7 @@ prepare_inplace_alter_table_dict( clust_index, ctx->new_table, !(ha_alter_info->handler_flags & Alter_inplace_info::ADD_PK_INDEX), - ctx->add_cols, ctx->col_map); + ctx->add_cols, ctx->col_map, path); rw_lock_x_unlock(&clust_index->lock); if (!ok) { @@ -3920,6 +3940,24 @@ check_if_can_drop_indexes: drop_index = NULL; } + /* Check if any of the existing indexes are marked as corruption + and if they are, refuse adding more indexes. */ + if (ha_alter_info->handler_flags & Alter_inplace_info::ADD_INDEX) { + for (dict_index_t* index = dict_table_get_first_index(indexed_table); + index != NULL; index = dict_table_get_next_index(index)) { + + if (!index->to_be_dropped && dict_index_is_corrupted(index)) { + char index_name[MAX_FULL_NAME_LEN + 1]; + + innobase_format_name(index_name, sizeof index_name, + index->name, TRUE); + + my_error(ER_INDEX_CORRUPT, MYF(0), index_name); + DBUG_RETURN(true); + } + } + } + n_add_fk = 0; if (ha_alter_info->handler_flags @@ -4159,6 +4197,7 @@ ok_exit: files and merge sort. */ DBUG_EXECUTE_IF("innodb_OOM_inplace_alter", error = DB_OUT_OF_MEMORY; goto oom;); + error = row_merge_build_indexes( prebuilt->trx, prebuilt->table, ctx->new_table, @@ -6194,6 +6233,21 @@ foreign_fail: row_mysql_unlock_data_dictionary(trx); trx_free_for_mysql(trx); + /* Rebuild index translation table now for temporary tables if we are + restoring secondary keys, as ha_innobase::open will not be called for + the next access. */ + if (dict_table_is_temporary(ctx0->new_table) + && ctx0->num_to_add_index > 0) { + ut_ad(!ctx0->num_to_drop_index); + ut_ad(!ctx0->num_to_drop_fk); + if (!innobase_build_index_translation(altered_table, + ctx0->new_table, + share)) { + MONITOR_ATOMIC_DEC(MONITOR_PENDING_ALTER_TABLE); + DBUG_RETURN(true); + } + } + /* TODO: The following code could be executed while allowing concurrent access to the table (MDL downgrade). */ diff --git a/storage/xtradb/ibuf/ibuf0ibuf.cc b/storage/xtradb/ibuf/ibuf0ibuf.cc index bac2a92dd0b..4467b713c88 100644 --- a/storage/xtradb/ibuf/ibuf0ibuf.cc +++ b/storage/xtradb/ibuf/ibuf0ibuf.cc @@ -2917,7 +2917,7 @@ ibuf_contract_in_background( sum_bytes += n_bytes; sum_pages += n_pag2; - srv_inc_activity_count(); + srv_inc_activity_count(true); } return(sum_bytes); diff --git a/storage/xtradb/include/buf0buf.ic b/storage/xtradb/include/buf0buf.ic index f7e51231471..7b1c66f2a05 100644 --- a/storage/xtradb/include/buf0buf.ic +++ b/storage/xtradb/include/buf0buf.ic @@ -305,6 +305,20 @@ buf_page_set_state( ut_a(state == BUF_BLOCK_NOT_USED); break; case BUF_BLOCK_FILE_PAGE: + if (!(state == BUF_BLOCK_NOT_USED + || state == BUF_BLOCK_REMOVE_HASH)) { + const char *old_state_name = buf_get_state_name((buf_block_t*)bpage); + bpage->state = state; + + fprintf(stderr, + "InnoDB: Error: block old state %d (%s) " + " new state %d (%s) not correct\n", + old_state, + old_state_name, + state, + buf_get_state_name((buf_block_t*)bpage)); + } + ut_a(state == BUF_BLOCK_NOT_USED || state == BUF_BLOCK_REMOVE_HASH); break; diff --git a/storage/xtradb/include/buf0flu.h b/storage/xtradb/include/buf0flu.h index 87ce7d88bdf..c8a329251fa 100644 --- a/storage/xtradb/include/buf0flu.h +++ b/storage/xtradb/include/buf0flu.h @@ -312,6 +312,12 @@ buf_flush_flush_list_in_progress(void) /*==================================*/ __attribute__((warn_unused_result)); +/** If LRU list of a buf_pool is less than this size then LRU eviction +should not happen. This is because when we do LRU flushing we also put +the blocks on free list. If LRU list is very small then we can end up +in thrashing. */ +#define BUF_LRU_MIN_LEN 256 + /******************************************************************//** Start a buffer flush batch for LRU or flush list */ ibool diff --git a/storage/xtradb/include/dict0mem.h b/storage/xtradb/include/dict0mem.h index 9c43829cecf..a55d5316969 100644 --- a/storage/xtradb/include/dict0mem.h +++ b/storage/xtradb/include/dict0mem.h @@ -2,7 +2,7 @@ Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2012, Facebook Inc. -Copyright (c) 2013, 2015, MariaDB Corporation. +Copyright (c) 2013, 2016, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -1025,6 +1025,18 @@ if table->memcached_sync_count == DICT_TABLE_IN_DDL means there's DDL running on the table, DML from memcached will be blocked. */ #define DICT_TABLE_IN_DDL -1 +/** These are used when MySQL FRM and InnoDB data dictionary are +in inconsistent state. */ +typedef enum { + DICT_FRM_CONSISTENT = 0, /*!< Consistent state */ + DICT_FRM_NO_PK = 1, /*!< MySQL has no primary key + but InnoDB dictionary has + non-generated one. */ + DICT_NO_PK_FRM_HAS = 2, /*!< MySQL has primary key but + InnoDB dictionary has not. */ + DICT_FRM_INCONSISTENT_KEYS = 3 /*!< Key count mismatch */ +} dict_frm_t; + /** Data structure for a database table. Most fields will be initialized to 0, NULL or FALSE in dict_mem_table_create(). */ struct dict_table_t{ @@ -1085,6 +1097,10 @@ struct dict_table_t{ /*!< True if the table belongs to a system database (mysql, information_schema or performance_schema) */ + dict_frm_t dict_frm_mismatch; + /*!< !DICT_FRM_CONSISTENT==0 if data + dictionary information and + MySQL FRM information mismatch. */ #ifndef UNIV_HOTBACKUP hash_node_t name_hash; /*!< hash chain node */ hash_node_t id_hash; /*!< hash chain node */ diff --git a/storage/xtradb/include/fil0fil.h b/storage/xtradb/include/fil0fil.h index 973882a50b4..fb9c79a3e5a 100644 --- a/storage/xtradb/include/fil0fil.h +++ b/storage/xtradb/include/fil0fil.h @@ -1281,11 +1281,6 @@ fil_system_hash_nodes(void); /************************************************************************* functions to access is_corrupt flag of fil_space_t*/ -ibool -fil_space_is_corrupt( -/*=================*/ - ulint space_id); - void fil_space_set_corrupt( /*==================*/ diff --git a/storage/xtradb/include/fil0pagecompress.h b/storage/xtradb/include/fil0pagecompress.h index 8316083d52d..99a05f14ffe 100644 --- a/storage/xtradb/include/fil0pagecompress.h +++ b/storage/xtradb/include/fil0pagecompress.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (C) 2013, 2015 MariaDB Corporation. All Rights Reserved. +Copyright (C) 2013, 2016 MariaDB Corporation. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -117,8 +117,12 @@ fil_decompress_page( byte* buf, /*!< out: buffer from which to read; in aio this must be appropriately aligned */ ulong len, /*!< in: length of output buffer.*/ - ulint* write_size); /*!< in/out: Actual payload size of + ulint* write_size, /*!< in/out: Actual payload size of the compressed data. */ + bool return_error=false); + /*!< in: true if only an error should + be produced when decompression fails. + By default this parameter is false. */ /****************************************************************//** Get space id from fil node diff --git a/storage/xtradb/include/fsp0fsp.h b/storage/xtradb/include/fsp0fsp.h index 8fdacc51277..839aed80418 100644 --- a/storage/xtradb/include/fsp0fsp.h +++ b/storage/xtradb/include/fsp0fsp.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2013, SkySQL Ab. All Rights Reserved. +Copyright (c) 2013, 2016, MariaDB Corporation. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -99,6 +99,9 @@ dictionary */ /** Zero relative shift position of the start of the DATA DIR bits */ #define FSP_FLAGS_POS_DATA_DIR (FSP_FLAGS_POS_PAGE_SSIZE \ + FSP_FLAGS_WIDTH_PAGE_SSIZE) +#define FSP_FLAGS_POS_DATA_DIR_ORACLE (FSP_FLAGS_POS_ATOMIC_BLOBS \ + + FSP_FLAGS_WIDTH_ATOMIC_BLOBS \ + + FSP_FLAGS_WIDTH_PAGE_SSIZE) /** Zero relative shift position of the start of the UNUSED bits */ #define FSP_FLAGS_POS_UNUSED (FSP_FLAGS_POS_DATA_DIR\ + FSP_FLAGS_WIDTH_DATA_DIR) @@ -123,6 +126,10 @@ dictionary */ #define FSP_FLAGS_MASK_DATA_DIR \ ((~(~0 << FSP_FLAGS_WIDTH_DATA_DIR)) \ << FSP_FLAGS_POS_DATA_DIR) +/** Bit mask of the DATA_DIR field */ +#define FSP_FLAGS_MASK_DATA_DIR_ORACLE \ + ((~(~0 << FSP_FLAGS_WIDTH_DATA_DIR)) \ + << FSP_FLAGS_POS_DATA_DIR_ORACLE) /** Bit mask of the PAGE_COMPRESSION field */ #define FSP_FLAGS_MASK_PAGE_COMPRESSION \ ((~(~0 << FSP_FLAGS_WIDTH_PAGE_COMPRESSION)) \ @@ -156,6 +163,9 @@ dictionary */ #define FSP_FLAGS_HAS_DATA_DIR(flags) \ ((flags & FSP_FLAGS_MASK_DATA_DIR) \ >> FSP_FLAGS_POS_DATA_DIR) +#define FSP_FLAGS_HAS_DATA_DIR_ORACLE(flags) \ + ((flags & FSP_FLAGS_MASK_DATA_DIR_ORACLE) \ + >> FSP_FLAGS_POS_DATA_DIR_ORACLE) /** Return the contents of the UNUSED bits */ #define FSP_FLAGS_GET_UNUSED(flags) \ (flags >> FSP_FLAGS_POS_UNUSED) diff --git a/storage/xtradb/include/fts0fts.h b/storage/xtradb/include/fts0fts.h index d54ed281d9a..9f7b0216d9b 100644 --- a/storage/xtradb/include/fts0fts.h +++ b/storage/xtradb/include/fts0fts.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2011, 2015, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2011, 2016, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -724,6 +724,13 @@ fts_optimize_remove_table( /*======================*/ dict_table_t* table); /*!< in: table to remove */ +/** Send sync fts cache for the table. +@param[in] table table to sync */ +UNIV_INTERN +void +fts_optimize_request_sync_table( + dict_table_t* table); + /**********************************************************************//** Signal the optimize thread to prepare for shutdown. */ UNIV_INTERN @@ -826,15 +833,18 @@ fts_drop_index_split_tables( dict_index_t* index) /*!< in: fts instance */ __attribute__((nonnull, warn_unused_result)); -/****************************************************************//** -Run SYNC on the table, i.e., write out data from the cache to the -FTS auxiliary INDEX table and clear the cache at the end. */ +/** Run SYNC on the table, i.e., write out data from the cache to the +FTS auxiliary INDEX table and clear the cache at the end. +@param[in,out] table fts table +@param[in] unlock_cache whether unlock cache when write node +@param[in] wait whether wait for existing sync to finish +@return DB_SUCCESS on success, error code on failure. */ UNIV_INTERN dberr_t fts_sync_table( -/*===========*/ - dict_table_t* table) /*!< in: table */ - __attribute__((nonnull)); + dict_table_t* table, + bool unlock_cache, + bool wait); /****************************************************************//** Free the query graph but check whether dict_sys->mutex is already diff --git a/storage/xtradb/include/fts0types.h b/storage/xtradb/include/fts0types.h index 64677428331..e495fe72a60 100644 --- a/storage/xtradb/include/fts0types.h +++ b/storage/xtradb/include/fts0types.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2007, 2011, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2007, 2016, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -122,7 +122,11 @@ struct fts_sync_t { doc_id_t max_doc_id; /*!< The doc id at which the cache was noted as being full, we use this to set the upper_limit field */ - ib_time_t start_time; /*!< SYNC start time */ + ib_time_t start_time; /*!< SYNC start time */ + bool in_progress; /*!< flag whether sync is in progress.*/ + bool unlock_cache; /*!< flag whether unlock cache when + write fts node */ + os_event_t event; /*!< sync finish event */ }; /** The cache for the FTS system. It is a memory-based inverted index @@ -165,7 +169,6 @@ struct fts_cache_t { objects, they are recreated after a SYNC is completed */ - ib_alloc_t* self_heap; /*!< This heap is the heap out of which an instance of the cache itself was created. Objects created using @@ -212,6 +215,7 @@ struct fts_node_t { ulint ilist_size_alloc; /*!< Allocated size of ilist in bytes */ + bool synced; /*!< flag whether the node is synced */ }; /** A tokenizer word. Contains information about one word. */ diff --git a/storage/xtradb/include/ha_prototypes.h b/storage/xtradb/include/ha_prototypes.h index a9c003d5bb1..077c00d8eb0 100644 --- a/storage/xtradb/include/ha_prototypes.h +++ b/storage/xtradb/include/ha_prototypes.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2006, 2014, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2006, 2015, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -295,8 +295,10 @@ innobase_casedn_str( #ifdef WITH_WSREP UNIV_INTERN int -wsrep_innobase_kill_one_trx(void *thd_ptr, - const trx_t *bf_trx, trx_t *victim_trx, ibool signal); +wsrep_innobase_kill_one_trx(void * const thd_ptr, + const trx_t * const bf_trx, + trx_t *victim_trx, + ibool signal); int wsrep_innobase_mysql_sort(int mysql_type, uint charset_number, unsigned char* str, unsigned int str_length, unsigned int buf_length); @@ -356,6 +358,15 @@ thd_supports_xa( THD* thd); /*!< in: thread handle, or NULL to query the global innodb_supports_xa */ +/** Get status of innodb_tmpdir. +@param[in] thd thread handle, or NULL to query + the global innodb_tmpdir. +@retval NULL if innodb_tmpdir="" */ +UNIV_INTERN +const char* +thd_innodb_tmpdir( + THD* thd); + /******************************************************************//** Check the status of fake changes mode (innodb_fake_changes) @return true if fake change mode is enabled. */ diff --git a/storage/xtradb/include/log0log.h b/storage/xtradb/include/log0log.h index 4ebda650ca0..f5655e98a13 100644 --- a/storage/xtradb/include/log0log.h +++ b/storage/xtradb/include/log0log.h @@ -352,17 +352,6 @@ log_archive_do( ulint* n_bytes);/*!< out: archive log buffer size, 0 if nothing to archive */ /****************************************************************//** -Writes the log contents to the archive up to the lsn when this function was -called, and stops the archiving. When archiving is started again, the archived -log file numbers start from a number one higher, so that the archiving will -not write again to the archived log files which exist when this function -returns. -@return DB_SUCCESS or DB_ERROR */ -UNIV_INTERN -ulint -log_archive_stop(void); -/*==================*/ -/****************************************************************//** Starts again archiving which has been stopped. @return DB_SUCCESS or DB_ERROR */ UNIV_INTERN @@ -624,25 +613,14 @@ log_mem_free(void); /*==============*/ /****************************************************************//** -Safely reads the log_sys->tracked_lsn value. Uses atomic operations -if available, otherwise this field is protected with the log system -mutex. The writer counterpart function is log_set_tracked_lsn() in -log0online.c. +Safely reads the log_sys->tracked_lsn value. The writer counterpart function +is log_set_tracked_lsn() in log0online.c. @return log_sys->tracked_lsn value. */ UNIV_INLINE lsn_t log_get_tracked_lsn(void); /*=====================*/ -/****************************************************************//** -Unsafely reads the log_sys->tracked_lsn value. Uses atomic operations -if available, or use dirty read. Use for printing only. - -@return log_sys->tracked_lsn value. */ -UNIV_INLINE -lsn_t -log_get_tracked_lsn_peek(void); -/*==========================*/ extern log_t* log_sys; @@ -951,7 +929,7 @@ struct log_t{ pending flushes or writes */ /* NOTE on the 'flush' in names of the fields below: starting from 4.0.14, we separate the write of the log file and the actual fsync() - or other method to flush it to disk. The names below shhould really + or other method to flush it to disk. The names below should really be 'flush_or_write'! */ os_event_t no_flush_event; /*!< this event is in the reset state when a flush or a write is running; diff --git a/storage/xtradb/include/log0log.ic b/storage/xtradb/include/log0log.ic index ff5a83be249..70458fa546b 100644 --- a/storage/xtradb/include/log0log.ic +++ b/storage/xtradb/include/log0log.ic @@ -553,37 +553,15 @@ log_free_check(void) #endif /* !UNIV_HOTBACKUP */ /****************************************************************//** -Unsafely reads the log_sys->tracked_lsn value. Uses atomic operations -if available, or use dirty read. Use for printing only. +Safely reads the log_sys->tracked_lsn value. The writer counterpart function +is log_set_tracked_lsn() in log0online.c. @return log_sys->tracked_lsn value. */ UNIV_INLINE lsn_t -log_get_tracked_lsn_peek(void) -/*==========================*/ -{ -#ifdef HAVE_ATOMIC_BUILTINS_64 - return os_atomic_increment_uint64(&log_sys->tracked_lsn, 0); -#else - return log_sys->tracked_lsn; -#endif -} - -/****************************************************************//** -Safely reads the log_sys->tracked_lsn value. Uses atomic operations -if available, otherwise this field is protected with the log system -mutex. The writer counterpart function is log_set_tracked_lsn() in -log0online.c. -@return log_sys->tracked_lsn value. */ -UNIV_INLINE -lsn_t log_get_tracked_lsn(void) /*=====================*/ { -#ifdef HAVE_ATOMIC_BUILTINS_64 - return os_atomic_increment_uint64(&log_sys->tracked_lsn, 0); -#else - ut_ad(mutex_own(&(log_sys->mutex))); + os_rmb; return log_sys->tracked_lsn; -#endif } diff --git a/storage/xtradb/include/log0online.h b/storage/xtradb/include/log0online.h index 1ef4df7d6da..67dc0d72b4b 100644 --- a/storage/xtradb/include/log0online.h +++ b/storage/xtradb/include/log0online.h @@ -73,20 +73,7 @@ UNIV_INTERN ibool log_online_purge_changed_page_bitmaps( /*==================================*/ - ib_uint64_t lsn); /*!<in: LSN to purge files up to */ - -/************************************************************//** -Delete all the bitmap files for data less than the specified LSN. -If called with lsn == 0 (i.e. set by RESET request) or -IB_ULONGLONG_MAX, restart the bitmap file sequence, otherwise -continue it. - -@return FALSE to indicate success, TRUE for failure. */ -UNIV_INTERN -ibool -log_online_purge_changed_page_bitmaps( -/*==================================*/ - ib_uint64_t lsn); /*!<in: LSN to purge files up to */ + lsn_t lsn); /*!<in: LSN to purge files up to */ #define LOG_BITMAP_ITERATOR_START_LSN(i) \ ((i).start_lsn) diff --git a/storage/xtradb/include/log0recv.h b/storage/xtradb/include/log0recv.h index 8fc5daaef1d..1019f43f70c 100644 --- a/storage/xtradb/include/log0recv.h +++ b/storage/xtradb/include/log0recv.h @@ -101,15 +101,6 @@ UNIV_INLINE ibool recv_recovery_is_on(void); /*=====================*/ -#ifdef UNIV_LOG_ARCHIVE -/*******************************************************************//** -Returns TRUE if recovery from backup is currently running. -@return recv_recovery_from_backup_on */ -UNIV_INLINE -ibool -recv_recovery_from_backup_is_on(void); -/*=================================*/ -#endif /* UNIV_LOG_ARCHIVE */ /************************************************************************//** Applies the hashed log records to the page, if the page lsn is less than the lsn of a log record. This can be called when a buffer page has just been @@ -331,30 +322,6 @@ void recv_apply_log_recs_for_backup(void); /*================================*/ #endif -#ifdef UNIV_LOG_ARCHIVE -/********************************************************//** -Recovers from archived log files, and also from log files, if they exist. -@return error code or DB_SUCCESS */ -UNIV_INTERN -dberr_t -recv_recovery_from_archive_start( -/*=============================*/ - lsn_t min_flushed_lsn,/*!< in: min flushed lsn field from the - data files */ - lsn_t limit_lsn, /*!< in: recover up to this lsn if - possible */ - lsn_t first_log_no); /*!< in: number of the first archived - log file to use in the recovery; the - file will be searched from - INNOBASE_LOG_ARCH_DIR specified in - server config file */ -/********************************************************//** -Completes recovery from archive. */ -UNIV_INTERN -void -recv_recovery_from_archive_finish(void); -/*===================================*/ -#endif /* UNIV_LOG_ARCHIVE */ /** Block of log record data */ struct recv_data_t{ diff --git a/storage/xtradb/include/log0recv.ic b/storage/xtradb/include/log0recv.ic index 32c28dd03e6..b29272f4672 100644 --- a/storage/xtradb/include/log0recv.ic +++ b/storage/xtradb/include/log0recv.ic @@ -35,19 +35,3 @@ recv_recovery_is_on(void) { return(recv_recovery_on); } - -#ifdef UNIV_LOG_ARCHIVE -/** TRUE when applying redo log records from an archived log file */ -extern ibool recv_recovery_from_backup_on; - -/*******************************************************************//** -Returns TRUE if recovery from backup is currently running. -@return recv_recovery_from_backup_on */ -UNIV_INLINE -ibool -recv_recovery_from_backup_is_on(void) -/*=================================*/ -{ - return(recv_recovery_from_backup_on); -} -#endif /* UNIV_LOG_ARCHIVE */ diff --git a/storage/xtradb/include/os0file.h b/storage/xtradb/include/os0file.h index c15d896caa6..453536beba4 100644 --- a/storage/xtradb/include/os0file.h +++ b/storage/xtradb/include/os0file.h @@ -451,20 +451,19 @@ os_get_os_version(void); /*===================*/ #endif /* __WIN__ */ #ifndef UNIV_HOTBACKUP -/****************************************************************//** -Creates the seek mutexes used in positioned reads and writes. */ -UNIV_INTERN -void -os_io_init_simple(void); -/*===================*/ -/***********************************************************************//** -Creates a temporary file. This function is like tmpfile(3), but -the temporary file is created in the MySQL temporary directory. -@return temporary file handle, or NULL on error */ + +/** Create a temporary file. This function is like tmpfile(3), but +the temporary file is created in the given parameter path. If the path +is null then it will create the file in the mysql server configuration +parameter (--tmpdir). +@param[in] path location for creating temporary file +@return temporary file handle, or NULL on error */ +UNIV_INTERN FILE* -os_file_create_tmpfile(void); -/*========================*/ +os_file_create_tmpfile( + const char* path); + #endif /* !UNIV_HOTBACKUP */ /***********************************************************************//** The os_file_opendir() function opens a directory stream corresponding to the @@ -1314,14 +1313,14 @@ os_file_get_status( file can be opened in RW mode */ #if !defined(UNIV_HOTBACKUP) -/*********************************************************************//** -Creates a temporary file that will be deleted on close. -This function is defined in ha_innodb.cc. -@return temporary file descriptor, or < 0 on error */ +/** Create a temporary file in the location specified by the parameter +path. If the path is null, then it will be created in tmpdir. +@param[in] path location for creating temporary file +@return temporary file descriptor, or < 0 on error */ UNIV_INTERN int -innobase_mysql_tmpfile(void); -/*========================*/ +innobase_mysql_tmpfile( + const char* path); #endif /* !UNIV_HOTBACKUP */ diff --git a/storage/xtradb/include/os0sync.h b/storage/xtradb/include/os0sync.h index db996b096bb..0f93f3ff074 100644 --- a/storage/xtradb/include/os0sync.h +++ b/storage/xtradb/include/os0sync.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, Google Inc. Portions of this file contain modifications contributed and copyrighted by @@ -42,7 +42,6 @@ Created 9/6/1995 Heikki Tuuri || defined _M_X64 || defined __WIN__ #define IB_STRONG_MEMORY_MODEL -#undef HAVE_IB_GCC_ATOMIC_TEST_AND_SET // Quick-and-dirty fix for bug 1519094 #endif /* __i386__ || __x86_64__ || _M_IX86 || _M_X64 || __WIN__ */ @@ -94,16 +93,62 @@ struct os_event { #endif os_fast_mutex_t os_mutex; /*!< this mutex protects the next fields */ - ibool is_set; /*!< this is TRUE when the event is - in the signaled state, i.e., a thread - does not stop if it tries to wait for - this event */ - ib_int64_t signal_count; /*!< this is incremented each time - the event becomes signaled */ +private: + /** Masks for the event signal count and set flag in the count_and_set + field */ + static const ib_uint64_t count_mask = 0x7fffffffffffffffULL; + static const ib_uint64_t set_mask = 0x8000000000000000ULL; + + /** The MSB is set whenever when the event is in the signaled state, + i.e. a thread does not stop if it tries to wait for this event. Lower + bits are incremented each time the event becomes signaled. */ + ib_uint64_t count_and_set; +public: os_cond_t cond_var; /*!< condition variable is used in waiting for the event */ - UT_LIST_NODE_T(os_event_t) os_event_list; - /*!< list of all created events */ + + /** Initialise count_and_set field */ + void init_count_and_set(void) + { + /* We return this value in os_event_reset(), which can then be + be used to pass to the os_event_wait_low(). The value of zero + is reserved in os_event_wait_low() for the case when the + caller does not want to pass any signal_count value. To + distinguish between the two cases we initialize signal_count + to 1 here. */ + count_and_set = 1; + } + + /** Mark this event as set */ + void set(void) + { + count_and_set |= set_mask; + } + + /** Unmark this event as set */ + void reset(void) + { + count_and_set &= count_mask; + } + + /** Return true if this event is set */ + bool is_set(void) const + { + return count_and_set & set_mask; + } + + /** Bump signal count for this event */ + void inc_signal_count(void) + { + ut_ad(static_cast<ib_uint64_t>(signal_count()) < count_mask); + count_and_set++; + } + + /** Return how many times this event has been signalled */ + ib_int64_t signal_count(void) const + { + return (count_and_set & count_mask); + } }; /** Denotes an infinite delay for os_event_wait_time() */ @@ -115,8 +160,7 @@ struct os_event { /** Operating system mutex handle */ typedef struct os_mutex_t* os_ib_mutex_t; -/** Mutex protecting counts and the event and OS 'slow' mutex lists */ -extern os_ib_mutex_t os_sync_mutex; +// All the os_*_count variables are accessed atomically /** This is incremented by 1 in os_thread_create and decremented by 1 in os_thread_exit */ @@ -132,12 +176,15 @@ UNIV_INTERN void os_sync_init(void); /*==============*/ -/*********************************************************//** -Frees created events and OS 'slow' mutexes. */ + +/** Create an event semaphore, i.e., a semaphore which may just have two +states: signaled and nonsignaled. The created event is manual reset: it must be +reset explicitly by calling sync_os_reset_event. +@param[in,out] event memory block where to create the event */ UNIV_INTERN void -os_sync_free(void); -/*==============*/ +os_event_create(os_event_t event); + /*********************************************************//** Creates an event semaphore, i.e., a semaphore which may just have two states: signaled and nonsignaled. The created event is manual reset: it must be reset @@ -173,7 +220,10 @@ UNIV_INTERN void os_event_free( /*==========*/ - os_event_t event); /*!< in: event to free */ + os_event_t event, /*!< in: event to free */ + bool free_memory = true); + /*!< in: if true, deallocate the event memory + block too */ /**********************************************************//** Waits for an event object until it is in the signaled state. @@ -467,28 +517,7 @@ amount to decrement. */ # define os_atomic_decrement_uint64(ptr, amount) \ os_atomic_decrement(ptr, amount) -# if defined(HAVE_IB_GCC_ATOMIC_TEST_AND_SET) - -/** Do an atomic test-and-set. -@param[in,out] ptr Memory location to set to non-zero -@return the previous value */ -inline -lock_word_t -os_atomic_test_and_set(volatile lock_word_t* ptr) -{ - return(__atomic_test_and_set(ptr, __ATOMIC_ACQUIRE)); -} - -/** Do an atomic clear. -@param[in,out] ptr Memory location to set to zero */ -inline -void -os_atomic_clear(volatile lock_word_t* ptr) -{ - __atomic_clear(ptr, __ATOMIC_RELEASE); -} - -# elif defined(HAVE_ATOMIC_BUILTINS) +# if defined(HAVE_ATOMIC_BUILTINS) /** Do an atomic test and set. @param[in,out] ptr Memory location to set to non-zero @@ -517,6 +546,27 @@ os_atomic_clear(volatile lock_word_t* ptr) return(__sync_lock_test_and_set(ptr, 0)); } +# elif defined(HAVE_IB_GCC_ATOMIC_TEST_AND_SET) + +/** Do an atomic test-and-set. +@param[in,out] ptr Memory location to set to non-zero +@return the previous value */ +inline +lock_word_t +os_atomic_test_and_set(volatile lock_word_t* ptr) +{ + return(__atomic_test_and_set(ptr, __ATOMIC_ACQUIRE)); +} + +/** Do an atomic clear. +@param[in,out] ptr Memory location to set to zero */ +inline +void +os_atomic_clear(volatile lock_word_t* ptr) +{ + __atomic_clear(ptr, __ATOMIC_RELEASE); +} + # else # error "Unsupported platform" diff --git a/storage/xtradb/include/row0log.h b/storage/xtradb/include/row0log.h index f105838eece..e127504c484 100644 --- a/storage/xtradb/include/row0log.h +++ b/storage/xtradb/include/row0log.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2011, 2014, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2011, 2015, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -55,8 +55,9 @@ row_log_allocate( const dtuple_t* add_cols, /*!< in: default values of added columns, or NULL */ - const ulint* col_map)/*!< in: mapping of old column + const ulint* col_map,/*!< in: mapping of old column numbers to new ones, or NULL if !table */ + const char* path) /*!< in: where to create temporary file */ __attribute__((nonnull(1), warn_unused_result)); /******************************************************//** diff --git a/storage/xtradb/include/row0merge.h b/storage/xtradb/include/row0merge.h index 3e3459b8703..53164b5197f 100644 --- a/storage/xtradb/include/row0merge.h +++ b/storage/xtradb/include/row0merge.h @@ -1,7 +1,7 @@ /***************************************************************************** -Copyright (c) 2005, 2014, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2015, MariaDB Corporation. +Copyright (c) 2005, 2015, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2015, 2016, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -187,14 +187,14 @@ void row_merge_drop_temp_indexes(void); /*=============================*/ -/*********************************************************************//** -Creates temporary merge files, and if UNIV_PFS_IO defined, register -the file descriptor with Performance Schema. +/** Create temporary merge files in the given paramater path, and if +UNIV_PFS_IO defined, register the file descriptor with Performance Schema. +@param[in] path location for creating temporary merge files. @return File descriptor */ UNIV_INTERN int -row_merge_file_create_low(void) -/*===========================*/ +row_merge_file_create_low( + const char* path) __attribute__((warn_unused_result)); /*********************************************************************//** Destroy a merge file. And de-register the file from Performance Schema @@ -372,15 +372,17 @@ row_merge_buf_empty( /*================*/ row_merge_buf_t* buf) /*!< in,own: sort buffer */ __attribute__((warn_unused_result, nonnull)); -/*********************************************************************//** -Create a merge file. + +/** Create a merge file in the given location. +@param[out] merge_file merge file structure +@param[in] path location for creating temporary file @return file descriptor, or -1 on failure */ UNIV_INTERN int row_merge_file_create( -/*==================*/ - merge_file_t* merge_file) /*!< out: merge file structure */ - __attribute__((nonnull)); + merge_file_t* merge_file, + const char* path); + /*********************************************************************//** Merge disk files. @return DB_SUCCESS or error code */ diff --git a/storage/xtradb/include/row0mysql.h b/storage/xtradb/include/row0mysql.h index a0ff6cdad15..e6a201be7a5 100644 --- a/storage/xtradb/include/row0mysql.h +++ b/storage/xtradb/include/row0mysql.h @@ -491,6 +491,9 @@ row_drop_table_for_mysql( const char* name, /*!< in: table name */ trx_t* trx, /*!< in: dictionary transaction handle */ bool drop_db,/*!< in: true=dropping whole database */ + ibool create_failed,/*!<in: TRUE=create table failed + because e.g. foreign key column + type mismatch. */ bool nonatomic = true) /*!< in: whether it is permitted to release and reacquire dict_operation_lock */ diff --git a/storage/xtradb/include/srv0srv.h b/storage/xtradb/include/srv0srv.h index 4a3a2da733f..018852a7f92 100644 --- a/storage/xtradb/include/srv0srv.h +++ b/storage/xtradb/include/srv0srv.h @@ -477,8 +477,6 @@ extern ulong srv_innodb_stats_method; #ifdef UNIV_LOG_ARCHIVE extern ibool srv_log_archive_on; -extern ibool srv_archive_recovery; -extern ib_uint64_t srv_archive_recovery_limit_lsn; #endif /* UNIV_LOG_ARCHIVE */ extern char* srv_file_flush_method_str; @@ -951,19 +949,29 @@ ulint srv_get_activity_count(void); /*========================*/ /*******************************************************************//** -Check if there has been any activity. +Check if there has been any activity. Considers background change buffer +merge as regular server activity unless a non-default +old_ibuf_merge_activity_count value is passed, in which case the merge will be +treated as keeping server idle. @return FALSE if no change in activity counter. */ UNIV_INTERN ibool srv_check_activity( /*===============*/ - ulint old_activity_count); /*!< old activity count */ + ulint old_activity_count, /*!< old activity count */ + /*!< old change buffer merge + activity count, or + ULINT_UNDEFINED */ + ulint old_ibuf_merge_activity_count = ULINT_UNDEFINED); /******************************************************************//** Increment the server activity counter. */ UNIV_INTERN void -srv_inc_activity_count(void); -/*=========================*/ +srv_inc_activity_count( +/*===================*/ + bool ibuf_merge_activity = false); /*!< whether this activity bump + is caused by the background + change buffer merge */ /**********************************************************************//** Enqueues a task to server task queue and releases a worker thread, if there diff --git a/storage/xtradb/include/sync0rw.h b/storage/xtradb/include/sync0rw.h index d59613e0abb..cef2008c4ea 100644 --- a/storage/xtradb/include/sync0rw.h +++ b/storage/xtradb/include/sync0rw.h @@ -734,8 +734,8 @@ struct rw_lock_t { /*!< Thread id of writer thread. Is only guaranteed to have sane and non-stale value iff recursive flag is set. */ - os_event_t event; /*!< Used by sync0arr.cc for thread queueing */ - os_event_t wait_ex_event; + struct os_event event; /*!< Used by sync0arr.cc for thread queueing */ + struct os_event wait_ex_event; /*!< Event for next-writer to wait on. A thread must decrement lock_word before waiting. */ #ifndef INNODB_RW_LOCKS_USE_ATOMICS @@ -788,12 +788,12 @@ struct prio_rw_lock_t { volatile ulint high_priority_s_waiters; /* Number of high priority S waiters */ - os_event_t high_priority_s_event; /* High priority wait + struct os_event high_priority_s_event; /* High priority wait array event for S waiters */ volatile ulint high_priority_x_waiters; /* Number of high priority X waiters */ - os_event_t high_priority_x_event; + struct os_event high_priority_x_event; /* High priority wait arraay event for X waiters */ volatile ulint high_priority_wait_ex_waiter; diff --git a/storage/xtradb/include/sync0rw.ic b/storage/xtradb/include/sync0rw.ic index f66d435b8fc..b65e48e0881 100644 --- a/storage/xtradb/include/sync0rw.ic +++ b/storage/xtradb/include/sync0rw.ic @@ -602,7 +602,7 @@ rw_lock_s_unlock_func( /* wait_ex waiter exists. It may not be asleep, but we signal anyway. We do not wake other waiters, because they can't exist without wait_ex waiter and wait_ex waiter goes first.*/ - os_event_set(lock->wait_ex_event); + os_event_set(&lock->wait_ex_event); sync_array_object_signalled(); } @@ -642,7 +642,7 @@ rw_lock_s_unlock_func( /* A waiting next-writer exists, either high priority or regular, sharing the same wait event. */ - os_event_set(lock->base_lock.wait_ex_event); + os_event_set(&lock->base_lock.wait_ex_event); sync_array_object_signalled(); } else if (lock_word == X_LOCK_DECR) { @@ -653,7 +653,7 @@ rw_lock_s_unlock_func( if (lock->base_lock.waiters) { rw_lock_reset_waiter_flag(&lock->base_lock); - os_event_set(lock->base_lock.event); + os_event_set(&lock->base_lock.event); sync_array_object_signalled(); } } @@ -735,7 +735,7 @@ rw_lock_x_unlock_func( if (lock->waiters) { rw_lock_reset_waiter_flag(lock); - os_event_set(lock->event); + os_event_set(&lock->event); sync_array_object_signalled(); } } @@ -778,16 +778,16 @@ rw_lock_x_unlock_func( if (lock->high_priority_x_waiters) { - os_event_set(lock->high_priority_x_event); + os_event_set(&lock->high_priority_x_event); sync_array_object_signalled(); } else if (lock->high_priority_s_waiters) { - os_event_set(lock->high_priority_s_event); + os_event_set(&lock->high_priority_s_event); sync_array_object_signalled(); } else if (lock->base_lock.waiters) { rw_lock_reset_waiter_flag(&lock->base_lock); - os_event_set(lock->base_lock.event); + os_event_set(&lock->base_lock.event); sync_array_object_signalled(); } } diff --git a/storage/xtradb/include/sync0sync.h b/storage/xtradb/include/sync0sync.h index 88fe4644a07..2b794059399 100644 --- a/storage/xtradb/include/sync0sync.h +++ b/storage/xtradb/include/sync0sync.h @@ -926,7 +926,7 @@ implementation of a mutual exclusion semaphore. */ /** InnoDB mutex */ struct ib_mutex_t { - os_event_t event; /*!< Used by sync0arr.cc for the wait queue */ + struct os_event event; /*!< Used by sync0arr.cc for the wait queue */ volatile lock_word_t lock_word; /*!< lock_word is the target of the atomic test-and-set instruction when atomic operations are enabled. */ @@ -974,14 +974,13 @@ struct ib_mutex_t { struct ib_prio_mutex_t { ib_mutex_t base_mutex; /* The regular mutex provides the lock word etc. for the priority mutex */ - os_event_t high_priority_event; /* High priority wait array + struct os_event high_priority_event; /* High priority wait array event */ volatile ulint high_priority_waiters; /* Number of threads that asked for this mutex to be acquired with high priority in the global wait array waiting for this mutex to be released. */ - UT_LIST_NODE_T(ib_prio_mutex_t) list; }; /** Constant determining how long spin wait is continued before suspending diff --git a/storage/xtradb/include/sync0sync.ic b/storage/xtradb/include/sync0sync.ic index fa0724d7996..83f28bfeded 100644 --- a/storage/xtradb/include/sync0sync.ic +++ b/storage/xtradb/include/sync0sync.ic @@ -224,7 +224,7 @@ mutex_exit_func( /* Wake up any high priority waiters first. */ if (mutex->high_priority_waiters != 0) { - os_event_set(mutex->high_priority_event); + os_event_set(&mutex->high_priority_event); sync_array_object_signalled(); } else if (mutex_get_waiters(&mutex->base_mutex) != 0) { diff --git a/storage/xtradb/include/univ.i b/storage/xtradb/include/univ.i index f4a0da12476..528abe183c7 100644 --- a/storage/xtradb/include/univ.i +++ b/storage/xtradb/include/univ.i @@ -45,10 +45,10 @@ Created 1/20/1994 Heikki Tuuri #define INNODB_VERSION_MAJOR 5 #define INNODB_VERSION_MINOR 6 -#define INNODB_VERSION_BUGFIX 28 +#define INNODB_VERSION_BUGFIX 30 #ifndef PERCONA_INNODB_VERSION -#define PERCONA_INNODB_VERSION 76.1 +#define PERCONA_INNODB_VERSION 76.3 #endif /* Enable UNIV_LOG_ARCHIVE in XtraDB */ diff --git a/storage/xtradb/include/ut0byte.ic b/storage/xtradb/include/ut0byte.ic index 873d98c727e..1a7af5ae33d 100644 --- a/storage/xtradb/include/ut0byte.ic +++ b/storage/xtradb/include/ut0byte.ic @@ -110,7 +110,7 @@ ut_align_down( ut_ad(sizeof(void*) == sizeof(ulint)); - return((void*)((((ulint) ptr)) & ~(align_no - 1))); + return((void*)(((ulint) ptr) & ~(align_no - 1))); } /*********************************************************//** diff --git a/storage/xtradb/lock/lock0lock.cc b/storage/xtradb/lock/lock0lock.cc index 4f7a60c1c7a..6aef3e666aa 100644 --- a/storage/xtradb/lock/lock0lock.cc +++ b/storage/xtradb/lock/lock0lock.cc @@ -639,7 +639,7 @@ lock_sys_create( lock_sys->rec_num = 0; if (!srv_read_only_mode) { - lock_latest_err_file = os_file_create_tmpfile(); + lock_latest_err_file = os_file_create_tmpfile(NULL); ut_a(lock_latest_err_file); } } @@ -661,6 +661,17 @@ lock_sys_close(void) mutex_free(&lock_sys->mutex); mutex_free(&lock_sys->wait_mutex); + os_event_free(lock_sys->timeout_event); + + for (srv_slot_t* slot = lock_sys->waiting_threads; + slot < lock_sys->waiting_threads + OS_THREAD_MAX_N; slot++) { + + ut_ad(!slot->in_use); + ut_ad(!slot->thr); + if (slot->event != NULL) + os_event_free(slot->event); + } + mem_free(lock_stack); mem_free(lock_sys); diff --git a/storage/xtradb/log/log0log.cc b/storage/xtradb/log/log0log.cc index 36531f3c6f4..a19cb70e747 100644 --- a/storage/xtradb/log/log0log.cc +++ b/storage/xtradb/log/log0log.cc @@ -1086,8 +1086,7 @@ log_group_init( ulint space_id, /*!< in: space id of the file space which contains the log files of this group */ - ulint archive_space_id __attribute__((unused))) - /*!< in: space id of the file space + ulint archive_space_id) /*!< in: space id of the file space which contains some archived log files for this group; currently, only for the first log group this is @@ -3304,10 +3303,9 @@ log_archive_close_groups( Writes the log contents to the archive up to the lsn when this function was called, and stops the archiving. When archiving is started again, the archived log file numbers start from 2 higher, so that the archiving will not write -again to the archived log files which exist when this function returns. -@return DB_SUCCESS or DB_ERROR */ -UNIV_INTERN -ulint +again to the archived log files which exist when this function returns. */ +static +void log_archive_stop(void) /*==================*/ { @@ -3315,13 +3313,7 @@ log_archive_stop(void) mutex_enter(&(log_sys->mutex)); - if (log_sys->archiving_state != LOG_ARCH_ON) { - - mutex_exit(&(log_sys->mutex)); - - return(DB_ERROR); - } - + ut_ad(log_sys->archiving_state == LOG_ARCH_ON); log_sys->archiving_state = LOG_ARCH_STOPPING; mutex_exit(&(log_sys->mutex)); @@ -3363,8 +3355,6 @@ log_archive_stop(void) log_sys->archiving_state = LOG_ARCH_STOPPED; mutex_exit(&(log_sys->mutex)); - - return(DB_SUCCESS); } /****************************************************************//** @@ -4013,7 +4003,7 @@ log_print( "Log tracking enabled\n" "Log tracked up to " LSN_PF "\n" "Max tracked LSN age " LSN_PF "\n", - log_get_tracked_lsn_peek(), + log_get_tracked_lsn(), log_sys->max_checkpoint_age); } @@ -4110,6 +4100,7 @@ log_shutdown(void) rw_lock_free(&log_sys->checkpoint_lock); mutex_free(&log_sys->mutex); + mutex_free(&log_sys->log_flush_order_mutex); #ifdef UNIV_LOG_ARCHIVE rw_lock_free(&log_sys->archive_lock); diff --git a/storage/xtradb/log/log0online.cc b/storage/xtradb/log/log0online.cc index 51a9fa8f6c5..3f53791ed4c 100644 --- a/storage/xtradb/log/log0online.cc +++ b/storage/xtradb/log/log0online.cc @@ -290,7 +290,7 @@ log_online_read_bitmap_page( /* The following call prints an error message */ os_file_get_last_error(TRUE); ib_logf(IB_LOG_LEVEL_WARN, - "failed reading changed page bitmap file \'%s\'\n", + "failed reading changed page bitmap file \'%s\'", bitmap_file->name); return FALSE; } @@ -350,7 +350,7 @@ log_online_read_last_tracked_lsn(void) ib_logf(IB_LOG_LEVEL_WARN, "corruption detected in \'%s\' at offset " - UINT64PF "\n", + UINT64PF, log_bmp_sys->out.name, read_offset); } }; @@ -364,7 +364,7 @@ log_online_read_last_tracked_lsn(void) log_bmp_sys->out.offset)) { ib_logf(IB_LOG_LEVEL_WARN, "failed truncating changed page bitmap file \'%s\' to " - UINT64PF " bytes\n", + UINT64PF " bytes", log_bmp_sys->out.name, log_bmp_sys->out.offset); result = 0; } @@ -382,16 +382,8 @@ log_set_tracked_lsn( /*================*/ lsn_t tracked_lsn) /*!<in: new value */ { -#ifdef HAVE_ATOMIC_BUILTINS_64 - /* Single writer, no data race here */ - lsn_t old_value = os_atomic_increment_uint64(&log_sys->tracked_lsn, 0); - (void) os_atomic_increment_uint64(&log_sys->tracked_lsn, - tracked_lsn - old_value); -#else - mutex_enter(&log_sys->mutex); log_sys->tracked_lsn = tracked_lsn; - mutex_exit(&log_sys->mutex); -#endif + os_wmb; } /*********************************************************************//** @@ -416,7 +408,7 @@ log_online_can_track_missing( ib_logf(IB_LOG_LEVEL_ERROR, "last tracked LSN " LSN_PF " is ahead of tracking " "start LSN " LSN_PF ". This can be caused by " - "mismatched bitmap files.\n", + "mismatched bitmap files.", last_tracked_lsn, tracking_start_lsn); exit(1); } @@ -444,7 +436,7 @@ log_online_track_missing_on_startup( ib_logf(IB_LOG_LEVEL_WARN, "last tracked LSN in \'%s\' is " LSN_PF ", but the last checkpoint LSN is " LSN_PF ". This might be " - "due to a server crash or a very fast shutdown. ", + "due to a server crash or a very fast shutdown.", log_bmp_sys->out.name, last_tracked_lsn, tracking_start_lsn); /* See if we can fully recover the missing interval */ @@ -452,7 +444,7 @@ log_online_track_missing_on_startup( tracking_start_lsn)) { ib_logf(IB_LOG_LEVEL_INFO, - "reading the log to advance the last tracked LSN.\n"); + "reading the log to advance the last tracked LSN."); log_bmp_sys->start_lsn = ut_max(last_tracked_lsn, MIN_TRACKED_LSN); @@ -463,22 +455,22 @@ log_online_track_missing_on_startup( ut_ad(log_bmp_sys->end_lsn >= tracking_start_lsn); ib_logf(IB_LOG_LEVEL_INFO, - "continuing tracking changed pages from LSN " LSN_PF - "\n", log_bmp_sys->end_lsn); + "continuing tracking changed pages from LSN " LSN_PF, + log_bmp_sys->end_lsn); } else { ib_logf(IB_LOG_LEVEL_WARN, "the age of last tracked LSN exceeds log capacity, " "tracking-based incremental backups will work only " - "from the higher LSN!\n"); + "from the higher LSN!"); log_bmp_sys->end_lsn = log_bmp_sys->start_lsn = tracking_start_lsn; log_set_tracked_lsn(log_bmp_sys->start_lsn); ib_logf(IB_LOG_LEVEL_INFO, - "starting tracking changed pages from LSN " LSN_PF - "\n", log_bmp_sys->end_lsn); + "starting tracking changed pages from LSN " LSN_PF, + log_bmp_sys->end_lsn); } } @@ -546,7 +538,7 @@ log_online_start_bitmap_file(void) /* The following call prints an error message */ os_file_get_last_error(TRUE); ib_logf(IB_LOG_LEVEL_ERROR, - "cannot create \'%s\'\n", log_bmp_sys->out.name); + "cannot create \'%s\'", log_bmp_sys->out.name); return FALSE; } @@ -682,7 +674,7 @@ log_online_read_init(void) if (os_file_closedir(bitmap_dir)) { os_file_get_last_error(TRUE); - ib_logf(IB_LOG_LEVEL_ERROR, "cannot close \'%s\'\n", + ib_logf(IB_LOG_LEVEL_ERROR, "cannot close \'%s\'", log_bmp_sys->bmp_file_home); exit(1); } @@ -722,7 +714,7 @@ log_online_read_init(void) ib_logf(IB_LOG_LEVEL_WARN, "truncated block detected in \'%s\' at offset " - UINT64PF "\n", + UINT64PF, log_bmp_sys->out.name, log_bmp_sys->out.offset); log_bmp_sys->out.offset -= @@ -760,14 +752,14 @@ log_online_read_init(void) "last tracked LSN is " LSN_PF ", but the last " "checkpoint LSN is " LSN_PF ". The " "tracking-based incremental backups will work " - "only from the latter LSN!\n", + "only from the latter LSN!", last_tracked_lsn, tracking_start_lsn); } } ib_logf(IB_LOG_LEVEL_INFO, "starting tracking changed pages from LSN " - LSN_PF "\n", tracking_start_lsn); + LSN_PF, tracking_start_lsn); log_bmp_sys->start_lsn = tracking_start_lsn; log_set_tracked_lsn(tracking_start_lsn); } @@ -911,7 +903,7 @@ log_online_is_valid_log_seg( ib_logf(IB_LOG_LEVEL_ERROR, "log block checksum mismatch: expected " ULINTPF ", " - "calculated checksum " ULINTPF "\n", + "calculated checksum " ULINTPF, log_block_get_checksum(log_block), log_block_calc_checksum(log_block)); } @@ -1110,7 +1102,7 @@ log_online_write_bitmap_page( /* The following call prints an error message */ os_file_get_last_error(TRUE); ib_logf(IB_LOG_LEVEL_ERROR, "failed writing changed page " - "bitmap file \'%s\'\n", log_bmp_sys->out.name); + "bitmap file \'%s\'", log_bmp_sys->out.name); return FALSE; } @@ -1120,7 +1112,7 @@ log_online_write_bitmap_page( /* The following call prints an error message */ os_file_get_last_error(TRUE); ib_logf(IB_LOG_LEVEL_ERROR, "failed flushing changed page " - "bitmap file \'%s\'\n", log_bmp_sys->out.name); + "bitmap file \'%s\'", log_bmp_sys->out.name); return FALSE; } @@ -1267,8 +1259,7 @@ log_online_diagnose_inconsistent_dir( ib_logf(IB_LOG_LEVEL_WARN, "InnoDB: Warning: inconsistent bitmap file " "directory for a " - "INFORMATION_SCHEMA.INNODB_CHANGED_PAGES query" - "\n"); + "INFORMATION_SCHEMA.INNODB_CHANGED_PAGES query"); free(bitmap_files->files); } @@ -1310,7 +1301,7 @@ log_online_setup_bitmap_file_range( if (UNIV_UNLIKELY(!bitmap_dir)) { ib_logf(IB_LOG_LEVEL_ERROR, - "failed to open bitmap directory \'%s\'\n", + "failed to open bitmap directory \'%s\'", srv_data_home); return FALSE; } @@ -1360,7 +1351,7 @@ log_online_setup_bitmap_file_range( if (UNIV_UNLIKELY(os_file_closedir(bitmap_dir))) { os_file_get_last_error(TRUE); - ib_logf(IB_LOG_LEVEL_ERROR, "cannot close \'%s\'\n", + ib_logf(IB_LOG_LEVEL_ERROR, "cannot close \'%s\'", srv_data_home); return FALSE; } @@ -1381,7 +1372,7 @@ log_online_setup_bitmap_file_range( if (UNIV_UNLIKELY(!bitmap_dir)) { ib_logf(IB_LOG_LEVEL_ERROR, - "failed to open bitmap directory \'%s\'\n", + "failed to open bitmap directory \'%s\'", srv_data_home); return FALSE; } @@ -1432,7 +1423,7 @@ log_online_setup_bitmap_file_range( if (UNIV_UNLIKELY(os_file_closedir(bitmap_dir))) { os_file_get_last_error(TRUE); - ib_logf(IB_LOG_LEVEL_ERROR, "cannot close \'%s\'\n", + ib_logf(IB_LOG_LEVEL_ERROR, "cannot close \'%s\'", srv_data_home); free(bitmap_files->files); return FALSE; @@ -1507,7 +1498,7 @@ log_online_open_bitmap_file_read_only( /* Here and below assume that bitmap file names do not contain apostrophes, thus no need for ut_print_filename(). */ ib_logf(IB_LOG_LEVEL_WARN, - "error opening the changed page bitmap \'%s\'\n", + "error opening the changed page bitmap \'%s\'", bitmap_file->name); return FALSE; } @@ -1553,7 +1544,7 @@ log_online_diagnose_bitmap_eof( ib_logf(IB_LOG_LEVEL_WARN, "junk at the end of changed page bitmap file " - "\'%s\'.\n", bitmap_file->name); + "\'%s\'.", bitmap_file->name); } if (UNIV_UNLIKELY(!last_page_in_run)) { @@ -1564,7 +1555,7 @@ log_online_diagnose_bitmap_eof( for the whole server */ ib_logf(IB_LOG_LEVEL_WARN, "changed page bitmap file \'%s\' does not " - "contain a complete run at the end.\n", + "contain a complete run at the end.", bitmap_file->name); return FALSE; } @@ -1757,7 +1748,7 @@ log_online_bitmap_iterator_next( os_file_get_last_error(TRUE); ib_logf(IB_LOG_LEVEL_WARN, "failed reading changed page bitmap file " - "\'%s\'\n", i->in_files.files[i->in_i].name); + "\'%s\'", i->in_files.files[i->in_i].name); i->failed = TRUE; return FALSE; } diff --git a/storage/xtradb/log/log0recv.cc b/storage/xtradb/log/log0recv.cc index 23ca8b1381f..67f4050a1f1 100644 --- a/storage/xtradb/log/log0recv.cc +++ b/storage/xtradb/log/log0recv.cc @@ -90,10 +90,6 @@ UNIV_INTERN recv_sys_t* recv_sys = NULL; otherwise. Note that this is FALSE while a background thread is rolling back incomplete transactions. */ UNIV_INTERN ibool recv_recovery_on; -#ifdef UNIV_LOG_ARCHIVE -/** TRUE when applying redo log records from an archived log file */ -UNIV_INTERN ibool recv_recovery_from_backup_on; -#endif /* UNIV_LOG_ARCHIVE */ #ifndef UNIV_HOTBACKUP /** TRUE when recv_init_crash_recovery() has been called. */ @@ -306,10 +302,6 @@ recv_sys_var_init(void) recv_recovery_on = FALSE; -#ifdef UNIV_LOG_ARCHIVE - recv_recovery_from_backup_on = FALSE; -#endif /* UNIV_LOG_ARCHIVE */ - recv_needed_recovery = FALSE; recv_lsn_checks_on = FALSE; @@ -3779,332 +3771,6 @@ recv_reset_log_files_for_backup( } #endif /* UNIV_HOTBACKUP */ -#ifdef UNIV_LOG_ARCHIVE -/******************************************************//** -Reads from the archive of a log group and performs recovery. -@return TRUE if no more complete consistent archive files */ -static -ibool -log_group_recover_from_archive_file( -/*================================*/ - log_group_t* group) /*!< in: log group */ -{ - os_file_t file_handle; - ib_uint64_t start_lsn; - ib_uint64_t file_end_lsn; - ib_uint64_t dummy_lsn; - ib_uint64_t scanned_lsn; - ulint len; - ibool ret; - byte* buf; - os_offset_t read_offset; - os_offset_t file_size; - int input_char; - char name[OS_FILE_MAX_PATH]; - dberr_t err; - - ut_a(0); - -try_open_again: - buf = log_sys->buf; - - /* Add the file to the archive file space; open the file */ - - log_archived_file_name_gen(name, sizeof(name), - group->id, group->archived_file_no); - - file_handle = os_file_create(innodb_file_log_key, - name, OS_FILE_OPEN, - OS_FILE_LOG, OS_FILE_AIO, &ret, FALSE); - - if (ret == FALSE) { -ask_again: - fprintf(stderr, - "InnoDB: Do you want to copy additional" - " archived log files\n" - "InnoDB: to the directory\n"); - fprintf(stderr, - "InnoDB: or were these all the files needed" - " in recovery?\n"); - fprintf(stderr, - "InnoDB: (Y == copy more files; N == this is all)?"); - - input_char = getchar(); - - if (input_char == (int) 'N') { - - return(TRUE); - } else if (input_char == (int) 'Y') { - - goto try_open_again; - } else { - goto ask_again; - } - } - - file_size = os_file_get_size(file_handle); - ut_a(file_size != (os_offset_t) -1); - - fprintf(stderr, "InnoDB: Opened archived log file %s\n", name); - - ret = os_file_close(file_handle); - - if (file_size < LOG_FILE_HDR_SIZE) { - fprintf(stderr, - "InnoDB: Archive file header incomplete %s\n", name); - - return(TRUE); - } - - ut_a(ret); - - /* Add the archive file as a node to the space */ - - ut_a(fil_node_create(name, 1 + file_size / UNIV_PAGE_SIZE, - group->archive_space_id, FALSE)); - ut_a(RECV_SCAN_SIZE >= LOG_FILE_HDR_SIZE); - - /* Read the archive file header */ - fil_io(OS_FILE_READ | OS_FILE_LOG, true, group->archive_space_id, 0, - 0, 0, - LOG_FILE_HDR_SIZE, buf, NULL, 0); - - /* Check if the archive file header is consistent */ - - if (mach_read_from_4(buf + LOG_GROUP_ID) != group->id - || mach_read_from_8(buf + LOG_FILE_START_LSN) - != group->archived_file_no) { - fprintf(stderr, - "InnoDB: Archive file header inconsistent %s\n", name); - - return(TRUE); - } - - if (!mach_read_from_4(buf + LOG_FILE_ARCH_COMPLETED)) { - fprintf(stderr, - "InnoDB: Archive file not completely written %s\n", - name); - - return(TRUE); - } - - start_lsn = mach_read_from_8(buf + LOG_FILE_START_LSN); - file_end_lsn = mach_read_from_8(buf + LOG_FILE_END_LSN); - - if (!recv_sys->scanned_lsn) { - - if (recv_sys->parse_start_lsn < start_lsn) { - fprintf(stderr, - "InnoDB: Archive log file %s" - " starts from too big a lsn\n", - name); - return(TRUE); - } - - recv_sys->scanned_lsn = start_lsn; - } - - if (recv_sys->scanned_lsn != start_lsn) { - - fprintf(stderr, - "InnoDB: Archive log file %s starts from" - " a wrong lsn\n", - name); - return(TRUE); - } - - read_offset = LOG_FILE_HDR_SIZE; - - for (;;) { - len = RECV_SCAN_SIZE; - - if (read_offset + len > file_size) { - len = ut_calc_align_down(file_size - read_offset, - OS_FILE_LOG_BLOCK_SIZE); - } - - if (len == 0) { - - break; - } - -#ifdef UNIV_DEBUG - if (log_debug_writes) { - fprintf(stderr, - "InnoDB: Archive read starting at" - " lsn " LSN_PF ", len %lu from file %s\n", - start_lsn, - (ulong) len, name); - } -#endif /* UNIV_DEBUG */ - - fil_io(OS_FILE_READ | OS_FILE_LOG, true, - group->archive_space_id, 0, - read_offset / UNIV_PAGE_SIZE, - read_offset % UNIV_PAGE_SIZE, len, buf, NULL, 0); - - ret = recv_scan_log_recs( - (buf_pool_get_n_pages() - - (recv_n_pool_free_frames * srv_buf_pool_instances)) - * UNIV_PAGE_SIZE, TRUE, buf, len, start_lsn, - &dummy_lsn, &scanned_lsn, &err); - - if (err != DB_SUCCESS) { - return(FALSE); - } - - if (scanned_lsn == file_end_lsn) { - - return(FALSE); - } - - if (ret) { - fprintf(stderr, - "InnoDB: Archive log file %s" - " does not scan right\n", - name); - return(TRUE); - } - - read_offset += len; - start_lsn += len; - - ut_ad(start_lsn == scanned_lsn); - } - - return(FALSE); -} - -/********************************************************//** -Recovers from archived log files, and also from log files, if they exist. -@return error code or DB_SUCCESS */ -UNIV_INTERN -dberr_t -recv_recovery_from_archive_start( -/*=============================*/ - ib_uint64_t min_flushed_lsn,/*!< in: min flushed lsn field from the - data files */ - ib_uint64_t limit_lsn, /*!< in: recover up to this lsn if - possible */ - lsn_t first_log_no) /*!< in: number of the first archived - log file to use in the recovery; the - file will be searched from - INNOBASE_LOG_ARCH_DIR specified in - server config file */ -{ - log_group_t* group; - ulint group_id; - ulint trunc_len; - ibool ret; - dberr_t err; - - ut_a(0); - - recv_sys_create(); - recv_sys_init(buf_pool_get_curr_size()); - - recv_recovery_on = TRUE; - recv_recovery_from_backup_on = TRUE; - - recv_sys->limit_lsn = limit_lsn; - - group_id = 0; - - group = UT_LIST_GET_FIRST(log_sys->log_groups); - - while (group) { - if (group->id == group_id) { - - break; - } - - group = UT_LIST_GET_NEXT(log_groups, group); - } - - if (!group) { - fprintf(stderr, - "InnoDB: There is no log group defined with id %lu!\n", - (ulong) group_id); - return(DB_ERROR); - } - - group->archived_file_no = first_log_no; - - recv_sys->parse_start_lsn = min_flushed_lsn; - - recv_sys->scanned_lsn = 0; - recv_sys->scanned_checkpoint_no = 0; - recv_sys->recovered_lsn = recv_sys->parse_start_lsn; - - recv_sys->archive_group = group; - - ret = FALSE; - - mutex_enter(&(log_sys->mutex)); - - while (!ret) { - ret = log_group_recover_from_archive_file(group); - - /* Close and truncate a possible processed archive file - from the file space */ - - trunc_len = UNIV_PAGE_SIZE - * fil_space_get_size(group->archive_space_id); - if (trunc_len > 0) { - fil_space_truncate_start(group->archive_space_id, - trunc_len); - } - - group->archived_file_no += group->file_size - LOG_FILE_HDR_SIZE; - } - - if (recv_sys->recovered_lsn < limit_lsn) { - - if (!recv_sys->scanned_lsn) { - - recv_sys->scanned_lsn = recv_sys->parse_start_lsn; - } - - mutex_exit(&(log_sys->mutex)); - - err = recv_recovery_from_checkpoint_start(LOG_ARCHIVE, - limit_lsn, - LSN_MAX, - LSN_MAX); - if (err != DB_SUCCESS) { - - return(err); - } - - mutex_enter(&(log_sys->mutex)); - } - - if (limit_lsn != LSN_MAX) { - - recv_apply_hashed_log_recs(FALSE); - - recv_reset_logs(0, FALSE, recv_sys->recovered_lsn); - } - - mutex_exit(&(log_sys->mutex)); - - return(DB_SUCCESS); -} - -/********************************************************//** -Completes recovery from archive. */ -UNIV_INTERN -void -recv_recovery_from_archive_finish(void) -/*===================================*/ -{ - recv_recovery_from_checkpoint_finish(); - - recv_recovery_from_backup_on = FALSE; -} -#endif /* UNIV_LOG_ARCHIVE */ - - void recv_dblwr_t::add(byte* page) { pages.push_back(page); diff --git a/storage/xtradb/mem/mem0pool.cc b/storage/xtradb/mem/mem0pool.cc index fe9a84d21fa..42d0417c768 100644 --- a/storage/xtradb/mem/mem0pool.cc +++ b/storage/xtradb/mem/mem0pool.cc @@ -280,6 +280,7 @@ mem_pool_free( /*==========*/ mem_pool_t* pool) /*!< in, own: memory pool */ { + mutex_free(&pool->mutex); ut_free(pool->buf); ut_free(pool); } diff --git a/storage/xtradb/os/os0file.cc b/storage/xtradb/os/os0file.cc index 7cb283dba75..ed16a18e548 100644 --- a/storage/xtradb/os/os0file.cc +++ b/storage/xtradb/os/os0file.cc @@ -554,6 +554,42 @@ PIMAGE_TLS_CALLBACK p_thread_callback_base = win_tls_thread_exit; #endif /*_WIN32 */ /***********************************************************************//** +For an EINVAL I/O error, prints a diagnostic message if innodb_flush_method +== ALL_O_DIRECT. +@return true if the diagnostic message was printed +@return false if the diagnostic message does not apply */ +static +bool +os_diagnose_all_o_direct_einval( +/*============================*/ + ulint err) /*!< in: C error code */ +{ + if ((err == EINVAL) + && (srv_unix_file_flush_method == SRV_UNIX_ALL_O_DIRECT)) { + ib_logf(IB_LOG_LEVEL_INFO, + "The error might be caused by redo log I/O not " + "satisfying innodb_flush_method=ALL_O_DIRECT " + "requirements by the underlying file system."); + if (srv_log_block_size != 512) + ib_logf(IB_LOG_LEVEL_INFO, + "This might be caused by an incompatible " + "non-default innodb_log_block_size value %lu.", + srv_log_block_size); + ib_logf(IB_LOG_LEVEL_INFO, + "Please file a bug at https://bugs.percona.com and " + "include this error message, my.cnf settings, and " + "information about the file system where the redo log " + "resides."); + ib_logf(IB_LOG_LEVEL_INFO, + "A possible workaround is to change " + "innodb_flush_method value to something else " + "than ALL_O_DIRECT."); + return(true); + } + return(false); +} + +/***********************************************************************//** Retrieves the last error number if an error occurs in a file io function. The number should be retrieved before any other OS calls (because they may overwrite the error number). If the number is not known to this program, @@ -717,7 +753,7 @@ os_file_get_last_error_low( "InnoDB: Error trying to enable atomic writes on " "non-supported destination!\n"); } - } else { + } else if (!os_diagnose_all_o_direct_einval(err)) { if (strerror(err) != NULL) { fprintf(stderr, "InnoDB: Error number %d" @@ -974,7 +1010,7 @@ os_file_lock( #ifndef UNIV_HOTBACKUP /****************************************************************//** Creates the seek mutexes used in positioned reads and writes. */ -UNIV_INTERN +static void os_io_init_simple(void) /*===================*/ @@ -991,19 +1027,21 @@ os_io_init_simple(void) #endif } -/***********************************************************************//** -Creates a temporary file. This function is like tmpfile(3), but -the temporary file is created in the MySQL temporary directory. -@return temporary file handle, or NULL on error */ +/** Create a temporary file. This function is like tmpfile(3), but +the temporary file is created in the given parameter path. If the path +is null then it will create the file in the mysql server configuration +parameter (--tmpdir). +@param[in] path location for creating temporary file +@return temporary file handle, or NULL on error */ UNIV_INTERN FILE* -os_file_create_tmpfile(void) -/*========================*/ +os_file_create_tmpfile( + const char* path) { - FILE* file = NULL; - int fd; WAIT_ALLOW_WRITES(); - fd = innobase_mysql_tmpfile(); + + FILE* file = NULL; + int fd = innobase_mysql_tmpfile(path); ut_ad(!srv_read_only_mode); @@ -2526,7 +2564,7 @@ os_file_set_size( ib_logf(IB_LOG_LEVEL_ERROR, "preallocating file " "space for file \'%s\' failed. Current size " - INT64PF ", desired size " INT64PF "\n", + INT64PF ", desired size " INT64PF, name, current_size, size); os_file_handle_error_no_exit (name, "posix_fallocate", FALSE, __FILE__, __LINE__); @@ -2996,6 +3034,9 @@ os_file_pwrite( /* Handle partial writes and signal interruptions correctly */ for (ret = 0; ret < (ssize_t) n; ) { n_written = pwrite(file, buf, (ssize_t)n - ret, offs); + DBUG_EXECUTE_IF("xb_simulate_all_o_direct_write_failure", + n_written = -1; + errno = EINVAL;); if (n_written >= 0) { ret += n_written; offs += n_written; @@ -3141,6 +3182,10 @@ try_again: try_again: ret = os_file_pread(file, buf, n, offset, trx); + DBUG_EXECUTE_IF("xb_simulate_all_o_direct_read_failure", + ret = -1; + errno = EINVAL;); + if ((ulint) ret == n) { return(TRUE); } else if (ret == -1) { @@ -3470,6 +3515,8 @@ retry: "InnoDB: " REFMAN "operating-system-error-codes.html\n"); + os_diagnose_all_o_direct_einval(errno); + os_has_said_disk_full = TRUE; } @@ -4067,7 +4114,7 @@ os_aio_native_aio_supported(void) return(FALSE); } else if (!srv_read_only_mode) { /* Now check if tmpdir supports native aio ops. */ - fd = innobase_mysql_tmpfile(); + fd = innobase_mysql_tmpfile(NULL); if (fd < 0) { ib_logf(IB_LOG_LEVEL_WARN, @@ -4437,6 +4484,14 @@ os_aio_free(void) os_event_free(os_aio_segment_wait_events[i]); } +#if !defined(HAVE_ATOMIC_BUILTINS) || UNIV_WORD_SIZE < 8 + os_mutex_free(os_file_count_mutex); +#endif /* !HAVE_ATOMIC_BUILTINS || UNIV_WORD_SIZE < 8 */ + + for (ulint i = 0; i < OS_FILE_N_SEEK_MUTEXES; i++) { + os_mutex_free(os_file_seek_mutexes[i]); + } + ut_free(os_aio_segment_wait_events); os_aio_segment_wait_events = 0; os_aio_n_segments = 0; @@ -6124,7 +6179,7 @@ os_aio_print( srv_io_thread_function[i]); #ifndef __WIN__ - if (os_aio_segment_wait_events[i]->is_set) { + if (os_aio_segment_wait_events[i]->is_set()) { fprintf(file, " ev set"); } #endif /* __WIN__ */ diff --git a/storage/xtradb/os/os0sync.cc b/storage/xtradb/os/os0sync.cc index e5b0765b3fd..423b94c6db0 100644 --- a/storage/xtradb/os/os0sync.cc +++ b/storage/xtradb/os/os0sync.cc @@ -47,27 +47,14 @@ struct os_mutex_t{ do not assume that the OS mutex supports recursive locking, though NT seems to do that */ - UT_LIST_NODE_T(os_mutex_t) os_mutex_list; - /* list of all 'slow' OS mutexes created */ }; -/** Mutex protecting counts and the lists of OS mutexes and events */ -UNIV_INTERN os_ib_mutex_t os_sync_mutex; -/** TRUE if os_sync_mutex has been initialized */ -static ibool os_sync_mutex_inited = FALSE; -/** TRUE when os_sync_free() is being executed */ -static ibool os_sync_free_called = FALSE; +// All the os_*_count variables are accessed atomically /** This is incremented by 1 in os_thread_create and decremented by 1 in -os_thread_exit */ +os_thread_exit. */ UNIV_INTERN ulint os_thread_count = 0; -/** The list of all events created */ -static UT_LIST_BASE_NODE_T(os_event) os_event_list; - -/** The list of all OS 'slow' mutexes */ -static UT_LIST_BASE_NODE_T(os_mutex_t) os_mutex_list; - UNIV_INTERN ulint os_event_count = 0; UNIV_INTERN ulint os_mutex_count = 0; UNIV_INTERN ulint os_fast_mutex_count = 0; @@ -80,12 +67,6 @@ UNIV_INTERN mysql_pfs_key_t event_os_mutex_key; UNIV_INTERN mysql_pfs_key_t os_mutex_key; #endif -/* Because a mutex is embedded inside an event and there is an -event embedded inside a mutex, on free, this generates a recursive call. -This version of the free event function doesn't acquire the global lock */ -static void os_event_free_internal(os_event_t event); - - /*********************************************************//** Initialitze condition variable */ UNIV_INLINE @@ -226,52 +207,27 @@ void os_sync_init(void) /*==============*/ { - UT_LIST_INIT(os_event_list); - UT_LIST_INIT(os_mutex_list); - - os_sync_mutex = NULL; - os_sync_mutex_inited = FALSE; - - os_sync_mutex = os_mutex_create(); - - os_sync_mutex_inited = TRUE; } -/*********************************************************//** -Frees created events and OS 'slow' mutexes. */ +/** Create an event semaphore, i.e., a semaphore which may just have two +states: signaled and nonsignaled. The created event is manual reset: it must be +reset explicitly by calling sync_os_reset_event. +@param[in,out] event memory block where to create the event */ UNIV_INTERN void -os_sync_free(void) -/*==============*/ +os_event_create(os_event_t event) { - os_event_t event; - os_ib_mutex_t mutex; - - os_sync_free_called = TRUE; - event = UT_LIST_GET_FIRST(os_event_list); - - while (event) { - - os_event_free(event); - - event = UT_LIST_GET_FIRST(os_event_list); - } - - mutex = UT_LIST_GET_FIRST(os_mutex_list); +#ifndef PFS_SKIP_EVENT_MUTEX + os_fast_mutex_init(event_os_mutex_key, &event->os_mutex); +#else + os_fast_mutex_init(PFS_NOT_INSTRUMENTED, &event->os_mutex); +#endif - while (mutex) { - if (mutex == os_sync_mutex) { - /* Set the flag to FALSE so that we do not try to - reserve os_sync_mutex any more in remaining freeing - operations in shutdown */ - os_sync_mutex_inited = FALSE; - } + os_cond_init(&(event->cond_var)); - os_mutex_free(mutex); + event->init_count_and_set(); - mutex = UT_LIST_GET_FIRST(os_mutex_list); - } - os_sync_free_called = FALSE; + os_atomic_increment_ulint(&os_event_count, 1); } /*********************************************************//** @@ -284,43 +240,9 @@ os_event_t os_event_create(void) /*==================*/ { - os_event_t event; - - event = static_cast<os_event_t>(ut_malloc(sizeof *event)); - -#ifndef PFS_SKIP_EVENT_MUTEX - os_fast_mutex_init(event_os_mutex_key, &event->os_mutex); -#else - os_fast_mutex_init(PFS_NOT_INSTRUMENTED, &event->os_mutex); -#endif + os_event_t event = static_cast<os_event_t>(ut_malloc(sizeof(*event)));; - os_cond_init(&(event->cond_var)); - - event->is_set = FALSE; - - /* We return this value in os_event_reset(), which can then be - be used to pass to the os_event_wait_low(). The value of zero - is reserved in os_event_wait_low() for the case when the - caller does not want to pass any signal_count value. To - distinguish between the two cases we initialize signal_count - to 1 here. */ - event->signal_count = 1; - - /* The os_sync_mutex can be NULL because during startup an event - can be created [ because it's embedded in the mutex/rwlock ] before - this module has been initialized */ - if (os_sync_mutex != NULL) { - os_mutex_enter(os_sync_mutex); - } - - /* Put to the list of events */ - UT_LIST_ADD_FIRST(os_event_list, os_event_list, event); - - os_event_count++; - - if (os_sync_mutex != NULL) { - os_mutex_exit(os_sync_mutex); - } + os_event_create(event); return(event); } @@ -338,11 +260,11 @@ os_event_set( os_fast_mutex_lock(&(event->os_mutex)); - if (event->is_set) { + if (UNIV_UNLIKELY(event->is_set())) { /* Do nothing */ } else { - event->is_set = TRUE; - event->signal_count += 1; + event->set(); + event->inc_signal_count(); os_cond_broadcast(&(event->cond_var)); } @@ -369,48 +291,26 @@ os_event_reset( os_fast_mutex_lock(&(event->os_mutex)); - if (!event->is_set) { + if (UNIV_UNLIKELY(!event->is_set())) { /* Do nothing */ } else { - event->is_set = FALSE; + event->reset(); } - ret = event->signal_count; + ret = event->signal_count(); os_fast_mutex_unlock(&(event->os_mutex)); return(ret); } /**********************************************************//** -Frees an event object, without acquiring the global lock. */ -static -void -os_event_free_internal( -/*===================*/ - os_event_t event) /*!< in: event to free */ -{ - - ut_a(event); - - /* This is to avoid freeing the mutex twice */ - os_fast_mutex_free(&(event->os_mutex)); - - os_cond_destroy(&(event->cond_var)); - - /* Remove from the list of events */ - UT_LIST_REMOVE(os_event_list, os_event_list, event); - - os_event_count--; - - ut_free(event); -} - -/**********************************************************//** Frees an event object. */ UNIV_INTERN void os_event_free( /*==========*/ - os_event_t event) /*!< in: event to free */ + os_event_t event, /*!< in: event to free */ + bool free_memory)/*!< in: if true, deallocate the event + memory block too */ { ut_a(event); @@ -419,16 +319,10 @@ os_event_free( os_cond_destroy(&(event->cond_var)); - /* Remove from the list of events */ - os_mutex_enter(os_sync_mutex); - - UT_LIST_REMOVE(os_event_list, os_event_list, event); + os_atomic_decrement_ulint(&os_event_count, 1); - os_event_count--; - - os_mutex_exit(os_sync_mutex); - - ut_free(event); + if (free_memory) + ut_free(event); } /**********************************************************//** @@ -461,10 +355,10 @@ os_event_wait_low( os_fast_mutex_lock(&event->os_mutex); if (!reset_sig_count) { - reset_sig_count = event->signal_count; + reset_sig_count = event->signal_count(); } - while (!event->is_set && event->signal_count == reset_sig_count) { + while (!event->is_set() && event->signal_count() == reset_sig_count) { os_cond_wait(&(event->cond_var), &(event->os_mutex)); /* Solaris manual said that spurious wakeups may occur: we @@ -536,11 +430,12 @@ os_event_wait_time_low( os_fast_mutex_lock(&event->os_mutex); if (!reset_sig_count) { - reset_sig_count = event->signal_count; + reset_sig_count = event->signal_count(); } do { - if (event->is_set || event->signal_count != reset_sig_count) { + if (event->is_set() + || event->signal_count() != reset_sig_count) { break; } @@ -584,18 +479,7 @@ os_mutex_create(void) mutex_str->count = 0; mutex_str->event = os_event_create(); - if (UNIV_LIKELY(os_sync_mutex_inited)) { - /* When creating os_sync_mutex itself we cannot reserve it */ - os_mutex_enter(os_sync_mutex); - } - - UT_LIST_ADD_FIRST(os_mutex_list, os_mutex_list, mutex_str); - - os_mutex_count++; - - if (UNIV_LIKELY(os_sync_mutex_inited)) { - os_mutex_exit(os_sync_mutex); - } + os_atomic_increment_ulint(&os_mutex_count, 1); return(mutex_str); } @@ -641,21 +525,9 @@ os_mutex_free( { ut_a(mutex); - if (UNIV_LIKELY(!os_sync_free_called)) { - os_event_free_internal(mutex->event); - } + os_event_free(mutex->event); - if (UNIV_LIKELY(os_sync_mutex_inited)) { - os_mutex_enter(os_sync_mutex); - } - - UT_LIST_REMOVE(os_mutex_list, os_mutex_list, mutex); - - os_mutex_count--; - - if (UNIV_LIKELY(os_sync_mutex_inited)) { - os_mutex_exit(os_sync_mutex); - } + os_atomic_decrement_ulint(&os_mutex_count, 1); os_fast_mutex_free(static_cast<os_fast_mutex_t*>(mutex->handle)); ut_free(mutex->handle); @@ -677,18 +549,7 @@ os_fast_mutex_init_func( #else ut_a(0 == pthread_mutex_init(fast_mutex, MY_MUTEX_INIT_FAST)); #endif - if (UNIV_LIKELY(os_sync_mutex_inited)) { - /* When creating os_sync_mutex itself (in Unix) we cannot - reserve it */ - - os_mutex_enter(os_sync_mutex); - } - - os_fast_mutex_count++; - - if (UNIV_LIKELY(os_sync_mutex_inited)) { - os_mutex_exit(os_sync_mutex); - } + os_atomic_increment_ulint(&os_fast_mutex_count, 1); } /**********************************************************//** @@ -769,17 +630,6 @@ os_fast_mutex_free_func( putc('\n', stderr); } #endif - if (UNIV_LIKELY(os_sync_mutex_inited)) { - /* When freeing the last mutexes, we have - already freed os_sync_mutex */ - os_mutex_enter(os_sync_mutex); - } - - ut_ad(os_fast_mutex_count > 0); - os_fast_mutex_count--; - - if (UNIV_LIKELY(os_sync_mutex_inited)) { - os_mutex_exit(os_sync_mutex); - } + os_atomic_decrement_ulint(&os_fast_mutex_count, 1); } diff --git a/storage/xtradb/os/os0thread.cc b/storage/xtradb/os/os0thread.cc index 8420a94787b..aabdd06d76b 100644 --- a/storage/xtradb/os/os0thread.cc +++ b/storage/xtradb/os/os0thread.cc @@ -145,9 +145,7 @@ os_thread_create_func( os_thread_t thread; DWORD win_thread_id; - os_mutex_enter(os_sync_mutex); - os_thread_count++; - os_mutex_exit(os_sync_mutex); + os_atomic_increment_ulint(&os_thread_count, 1); thread = CreateThread(NULL, /* no security attributes */ 0, /* default size stack */ @@ -186,9 +184,8 @@ os_thread_create_func( exit(1); } #endif - os_mutex_enter(os_sync_mutex); - os_thread_count++; - os_mutex_exit(os_sync_mutex); + ulint new_count = os_atomic_increment_ulint(&os_thread_count, 1); + ut_a(new_count <= OS_THREAD_MAX_N); #ifdef UNIV_HPUX10 ret = pthread_create(&pthread, pthread_attr_default, func, arg); @@ -205,8 +202,6 @@ os_thread_create_func( pthread_attr_destroy(&attr); #endif - ut_a(os_thread_count <= OS_THREAD_MAX_N); - if (thread_id) { *thread_id = pthread; } @@ -233,9 +228,7 @@ os_thread_exit( pfs_delete_thread(); #endif - os_mutex_enter(os_sync_mutex); - os_thread_count--; - os_mutex_exit(os_sync_mutex); + os_atomic_decrement_ulint(&os_thread_count, 1); #ifdef __WIN__ ExitThread((DWORD) exit_value); diff --git a/storage/xtradb/row/row0ftsort.cc b/storage/xtradb/row/row0ftsort.cc index c7a5b51ef55..ac0dc844b1b 100644 --- a/storage/xtradb/row/row0ftsort.cc +++ b/storage/xtradb/row/row0ftsort.cc @@ -1,7 +1,7 @@ /***************************************************************************** -Copyright (c) 2010, 2014, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2015, MariaDB Corporation. +Copyright (c) 2010, 2015, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2015, 2016, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -240,6 +240,9 @@ row_fts_psort_info_init( crypt_data = NULL; } + ut_ad(trx->mysql_thd != NULL); + const char* path = thd_innodb_tmpdir(trx->mysql_thd); + /* There will be FTS_NUM_AUX_INDEX number of "sort buckets" for each parallel sort thread. Each "sort bucket" holds records for a particular "FTS index partition" */ @@ -261,8 +264,8 @@ row_fts_psort_info_init( psort_info[j].merge_buf[i] = row_merge_buf_create( dup->index); - if (row_merge_file_create(psort_info[j].merge_file[i]) - < 0) { + if (row_merge_file_create(psort_info[j].merge_file[i], + path) < 0) { goto func_exit; } @@ -662,6 +665,11 @@ fts_parallel_tokenization( dberr_t error = DB_SUCCESS; fil_space_crypt_t* crypt_data = NULL; + ut_ad(psort_info->psort_common->trx->mysql_thd != NULL); + + const char* path = thd_innodb_tmpdir( + psort_info->psort_common->trx->mysql_thd); + ut_ad(psort_info); buf = psort_info->merge_buf; @@ -905,7 +913,7 @@ exit: continue; } - tmpfd[i] = row_merge_file_create_low(); + tmpfd[i] = row_merge_file_create_low(path); if (tmpfd[i] < 0) { error = DB_OUT_OF_MEMORY; goto func_exit; diff --git a/storage/xtradb/row/row0log.cc b/storage/xtradb/row/row0log.cc index b0c6f881f81..f0b200459dd 100644 --- a/storage/xtradb/row/row0log.cc +++ b/storage/xtradb/row/row0log.cc @@ -198,8 +198,25 @@ struct row_log_t { or by index->lock X-latch only */ row_log_buf_t head; /*!< reader context; protected by MDL only; modifiable by row_log_apply_ops() */ + const char* path; /*!< where to create temporary file during + log operation */ }; +/** Create the file or online log if it does not exist. +@param[in,out] log online rebuild log +@return file descriptor. */ +static __attribute__((warn_unused_result)) +int +row_log_tmpfile( + row_log_t* log) +{ + DBUG_ENTER("row_log_tmpfile"); + if (log->fd < 0) { + log->fd = row_merge_file_create_low(log->path); + } + + DBUG_RETURN(log->fd); +} /** Allocate the memory for the log buffer. @param[in,out] log_buf Buffer used for log operation @@ -344,6 +361,12 @@ row_log_online_op( log->tail.buf, avail_size); } UNIV_MEM_ASSERT_RW(log->tail.block, srv_sort_buf_size); + + if (row_log_tmpfile(log) < 0) { + log->error = DB_OUT_OF_MEMORY; + goto err_exit; + } + ret = os_file_write( "(modification log)", OS_FILE_FROM_FD(log->fd), @@ -454,6 +477,12 @@ row_log_table_close_func( log->tail.buf, avail); } UNIV_MEM_ASSERT_RW(log->tail.block, srv_sort_buf_size); + + if (row_log_tmpfile(log) < 0) { + log->error = DB_OUT_OF_MEMORY; + goto err_exit; + } + ret = os_file_write( "(modification log)", OS_FILE_FROM_FD(log->fd), @@ -473,6 +502,7 @@ write_failed: log->tail.total += size; UNIV_MEM_INVALID(log->tail.buf, sizeof log->tail.buf); +err_exit: mutex_exit(&log->mutex); os_atomic_increment_ulint(&onlineddl_rowlog_rows, 1); @@ -2544,7 +2574,8 @@ corruption: if (index->online_log->head.blocks) { #ifdef HAVE_FTRUNCATE /* Truncate the file in order to save space. */ - if (ftruncate(index->online_log->fd, 0) == -1) { + if (index->online_log->fd != -1 + && ftruncate(index->online_log->fd, 0) == -1) { perror("ftruncate"); } #endif /* HAVE_FTRUNCATE */ @@ -2860,8 +2891,9 @@ row_log_allocate( const dtuple_t* add_cols, /*!< in: default values of added columns, or NULL */ - const ulint* col_map)/*!< in: mapping of old column + const ulint* col_map,/*!< in: mapping of old column numbers to new ones, or NULL if !table */ + const char* path) /*!< in: where to create temporary file */ { row_log_t* log; DBUG_ENTER("row_log_allocate"); @@ -2880,11 +2912,7 @@ row_log_allocate( DBUG_RETURN(false); } - log->fd = row_merge_file_create_low(); - if (log->fd < 0) { - ut_free(log); - DBUG_RETURN(false); - } + log->fd = -1; mutex_create(index_online_log_key, &log->mutex, SYNC_INDEX_ONLINE_LOG); log->blobs = NULL; @@ -2899,6 +2927,7 @@ row_log_allocate( log->tail.block = log->head.block = NULL; log->head.blocks = log->head.bytes = 0; log->head.total = 0; + log->path = path; dict_index_set_online_status(index, ONLINE_INDEX_CREATION); index->online_log = log; @@ -3376,7 +3405,8 @@ corruption: if (index->online_log->head.blocks) { #ifdef HAVE_FTRUNCATE /* Truncate the file in order to save space. */ - if (ftruncate(index->online_log->fd, 0) == -1) { + if (index->online_log->fd != -1 + && ftruncate(index->online_log->fd, 0) == -1) { perror("ftruncate"); } #endif /* HAVE_FTRUNCATE */ diff --git a/storage/xtradb/row/row0merge.cc b/storage/xtradb/row/row0merge.cc index 49dd12ae96f..9c3e9dfc885 100644 --- a/storage/xtradb/row/row0merge.cc +++ b/storage/xtradb/row/row0merge.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2005, 2015, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2005, 2016, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -1389,47 +1389,95 @@ row_merge_write_eof( return(&block[0]); } -/********************************************************************//** -Reads clustered index of the table and create temporary files +/** Create a temporary file if it has not been created already. +@param[in,out] tmpfd temporary file handle +@param[in] path path to create temporary file +@return file descriptor, or -1 on failure */ +static __attribute__((warn_unused_result)) +int +row_merge_tmpfile_if_needed( + int* tmpfd, + const char* path) +{ + if (*tmpfd < 0) { + *tmpfd = row_merge_file_create_low(path); + } + + return(*tmpfd); +} + +/** Create a temporary file for merge sort if it was not created already. +@param[in,out] file merge file structure +@param[in,out] tmpfd temporary file structure +@param[in] nrec number of records in the file +@param[in] path path to create temporary files +@return file descriptor, or -1 on failure */ +static __attribute__((warn_unused_result)) +int +row_merge_file_create_if_needed( + merge_file_t* file, + int* tmpfd, + ulint nrec, + const char* path) +{ + ut_ad(file->fd < 0 || *tmpfd >=0); + if (file->fd < 0 && row_merge_file_create(file, path) >= 0) { + if (row_merge_tmpfile_if_needed(tmpfd, path) < 0) { + return(-1); + } + + file->n_rec = nrec; + } + + ut_ad(file->fd < 0 || *tmpfd >=0); + return(file->fd); +} + +/** Reads clustered index of the table and create temporary files containing the index entries for the indexes to be built. -@return DB_SUCCESS or error */ +@param[in] trx transaction +@param[in,out] table MySQL table object, for reporting erroneous + records +@param[in] old_table table where rows are read from +@param[in] new_table table where indexes are created; identical to + old_table unless creating a PRIMARY KEY +@param[in] online true if creating indexes online +@param[in] index indexes to be created +@param[in] fts_sort_idx full-text index to be created, or NULL +@param[in] psort_info parallel sort info for fts_sort_idx creation, + or NULL +@param[in] files temporary files +@param[in] key_numbers MySQL key numbers to create +@param[in] n_index number of indexes to create +@param[in] add_cols default values of added columns, or NULL +@param[in] col_map mapping of old column numbers to new ones, or + NULL if old_table == new_table +@param[in] add_autoinc number of added AUTO_INCREMENT columns, or + ULINT_UNDEFINED if none is added +@param[in,out] sequence autoinc sequence +@param[in,out] block file buffer +@param[in,out] tmpfd temporary file handle +return DB_SUCCESS or error */ static __attribute__((nonnull(1,2,3,4,6,9,10,16), warn_unused_result)) dberr_t row_merge_read_clustered_index( -/*===========================*/ - trx_t* trx, /*!< in: transaction */ - struct TABLE* table, /*!< in/out: MySQL table object, - for reporting erroneous records */ - const dict_table_t* old_table,/*!< in: table where rows are - read from */ - const dict_table_t* new_table,/*!< in: table where indexes are - created; identical to old_table - unless creating a PRIMARY KEY */ - bool online, /*!< in: true if creating indexes - online */ - dict_index_t** index, /*!< in: indexes to be created */ + trx_t* trx, + struct TABLE* table, + const dict_table_t* old_table, + const dict_table_t* new_table, + bool online, + dict_index_t** index, dict_index_t* fts_sort_idx, - /*!< in: full-text index to be created, - or NULL */ fts_psort_t* psort_info, - /*!< in: parallel sort info for - fts_sort_idx creation, or NULL */ - merge_file_t* files, /*!< in: temporary files */ + merge_file_t* files, const ulint* key_numbers, - /*!< in: MySQL key numbers to create */ - ulint n_index,/*!< in: number of indexes to create */ + ulint n_index, const dtuple_t* add_cols, - /*!< in: default values of - added columns, or NULL */ - const ulint* col_map,/*!< in: mapping of old column - numbers to new ones, or NULL - if old_table == new_table */ + const ulint* col_map, ulint add_autoinc, - /*!< in: number of added - AUTO_INCREMENT column, or - ULINT_UNDEFINED if none is added */ - ib_sequence_t& sequence,/*!< in/out: autoinc sequence */ - row_merge_block_t* block, /*!< in/out: file buffer */ + ib_sequence_t& sequence, + row_merge_block_t* block, + int* tmpfd, float pct_cost, /*!< in: percent of task weight out of total alter job */ fil_space_crypt_t* crypt_data,/*!< in: crypt data or NULL */ @@ -1477,6 +1525,9 @@ row_merge_read_clustered_index( DEBUG_FTS_SORT_PRINT("FTS_SORT: Start Create Index\n"); #endif + ut_ad(trx->mysql_thd != NULL); + const char* path = thd_innodb_tmpdir(trx->mysql_thd); + /* Create and initialize memory for record buffers */ merge_buf = static_cast<row_merge_buf_t**>( @@ -1954,13 +2005,26 @@ write_buffers: dict_index_get_lock(buf->index)); } - row_merge_buf_write(buf, file, block); + if (buf->n_tuples > 0) { - if (!row_merge_write(file->fd, file->offset++, block, - crypt_data, crypt_block, new_table->space)) { - err = DB_TEMP_FILE_WRITE_FAILURE; - trx->error_key_num = i; - break; + if (row_merge_file_create_if_needed( + file, tmpfd, buf->n_tuples, path) < 0) { + err = DB_OUT_OF_MEMORY; + trx->error_key_num = i; + break; + } + + ut_ad(file->n_rec > 0); + + row_merge_buf_write(buf, file, block); + + if (!row_merge_write(file->fd, file->offset++, + block, crypt_data, crypt_block, + new_table->space)) { + err = DB_TEMP_FILE_WRITE_FAILURE; + trx->error_key_num = i; + break; + } } UNIV_MEM_INVALID(&block[0], srv_sort_buf_size); @@ -2016,6 +2080,7 @@ write_buffers: func_exit: mtr_commit(&mtr); + mem_heap_free(row_heap); if (nonnull) { @@ -2111,7 +2176,8 @@ wait_again: if (max_doc_id && err == DB_SUCCESS) { /* Sync fts cache for other fts indexes to keep all fts indexes consistent in sync_doc_id. */ - err = fts_sync_table(const_cast<dict_table_t*>(new_table)); + err = fts_sync_table(const_cast<dict_table_t*>(new_table), + false, true); if (err == DB_SUCCESS) { fts_update_next_doc_id( @@ -3344,14 +3410,15 @@ row_merge_drop_temp_indexes(void) trx_free_for_background(trx); } -/*********************************************************************//** -Creates temporary merge files, and if UNIV_PFS_IO defined, register -the file descriptor with Performance Schema. -@return file descriptor, or -1 on failure */ + +/** Create temporary merge files in the given paramater path, and if +UNIV_PFS_IO defined, register the file descriptor with Performance Schema. +@param[in] path location for creating temporary merge files. +@return File descriptor */ UNIV_INTERN int -row_merge_file_create_low(void) -/*===========================*/ +row_merge_file_create_low( + const char* path) { int fd; #ifdef UNIV_PFS_IO @@ -3365,7 +3432,7 @@ row_merge_file_create_low(void) "Innodb Merge Temp File", __FILE__, __LINE__); #endif - fd = innobase_mysql_tmpfile(); + fd = innobase_mysql_tmpfile(path); #ifdef UNIV_PFS_IO register_pfs_file_open_end(locker, fd); #endif @@ -3378,16 +3445,18 @@ row_merge_file_create_low(void) return(fd); } -/*********************************************************************//** -Create a merge file. + +/** Create a merge file in the given location. +@param[out] merge_file merge file structure +@param[in] path location for creating temporary file @return file descriptor, or -1 on failure */ UNIV_INTERN int row_merge_file_create( -/*==================*/ - merge_file_t* merge_file) /*!< out: merge file structure */ + merge_file_t* merge_file, + const char* path) { - merge_file->fd = row_merge_file_create_low(); + merge_file->fd = row_merge_file_create_low(path); merge_file->offset = 0; merge_file->n_rec = 0; @@ -3845,7 +3914,7 @@ row_merge_drop_table( /* There must be no open transactions on the table. */ ut_a(table->n_ref_count == 0); - return(row_drop_table_for_mysql(table->name, trx, false, false)); + return(row_drop_table_for_mysql(table->name, trx, false, false, false)); } /*********************************************************************//** @@ -3958,10 +4027,6 @@ row_merge_build_indexes( total_dynamic_cost = COST_BUILD_INDEX_DYNAMIC * n_indexes; for (i = 0; i < n_indexes; i++) { - if (row_merge_file_create(&merge_files[i]) < 0) { - error = DB_OUT_OF_MEMORY; - goto func_exit; - } if (indexes[i]->type & DICT_FTS) { ibool opt_doc_id_size = FALSE; @@ -3990,13 +4055,6 @@ row_merge_build_indexes( } } - tmpfd = row_merge_file_create_low(); - - if (tmpfd < 0) { - error = DB_OUT_OF_MEMORY; - goto func_exit; - } - /* Reset the MySQL row buffer that is used when reporting duplicate keys. */ innobase_rec_reset(table); @@ -4025,7 +4083,7 @@ row_merge_build_indexes( trx, table, old_table, new_table, online, indexes, fts_sort_idx, psort_info, merge_files, key_numbers, n_indexes, add_cols, col_map, - add_autoinc, sequence, block, pct_cost, + add_autoinc, sequence, block, &tmpfd, pct_cost, crypt_data, crypt_block); pct_progress += pct_cost; @@ -4119,7 +4177,7 @@ wait_again: #ifdef FTS_INTERNAL_DIAG_PRINT DEBUG_FTS_SORT_PRINT("FTS_SORT: Complete Insert\n"); #endif - } else if (UNIV_LIKELY(merge_files[i].n_rec)) { + } else if (merge_files[i].fd != -1) { char buf[3 * NAME_LEN]; char *bufend; row_merge_dup_t dup = { diff --git a/storage/xtradb/row/row0mysql.cc b/storage/xtradb/row/row0mysql.cc index 9427b20daf9..ccdfc1332a0 100644 --- a/storage/xtradb/row/row0mysql.cc +++ b/storage/xtradb/row/row0mysql.cc @@ -2419,7 +2419,7 @@ err_exit: dict_table_close(table, TRUE, FALSE); - row_drop_table_for_mysql(table->name, trx, FALSE); + row_drop_table_for_mysql(table->name, trx, FALSE, TRUE); if (commit) { trx_commit_for_mysql(trx); @@ -2579,7 +2579,7 @@ error_handling: trx_rollback_to_savepoint(trx, NULL); - row_drop_table_for_mysql(table_name, trx, FALSE); + row_drop_table_for_mysql(table_name, trx, FALSE, TRUE); trx_commit_for_mysql(trx); @@ -2656,7 +2656,7 @@ row_table_add_foreign_constraints( trx_rollback_to_savepoint(trx, NULL); - row_drop_table_for_mysql(name, trx, FALSE); + row_drop_table_for_mysql(name, trx, FALSE, TRUE); trx_commit_for_mysql(trx); @@ -2697,7 +2697,7 @@ row_drop_table_for_mysql_in_background( /* Try to drop the table in InnoDB */ - error = row_drop_table_for_mysql(name, trx, FALSE); + error = row_drop_table_for_mysql(name, trx, FALSE, FALSE); /* Flush the log to reduce probability that the .frm files and the InnoDB data dictionary get out-of-sync if the user runs @@ -3844,6 +3844,9 @@ row_drop_table_for_mysql( const char* name, /*!< in: table name */ trx_t* trx, /*!< in: transaction handle */ bool drop_db,/*!< in: true=dropping whole database */ + ibool create_failed,/*!<in: TRUE=create table failed + because e.g. foreign key column + type mismatch. */ bool nonatomic) /*!< in: whether it is permitted to release and reacquire dict_operation_lock */ @@ -4049,7 +4052,12 @@ row_drop_table_for_mysql( name, foreign->foreign_table_name_lookup); - if (foreign->foreign_table != table && !ref_ok) { + /* We should allow dropping a referenced table if creating + that referenced table has failed for some reason. For example + if referenced table is created but it column types that are + referenced do not match. */ + if (foreign->foreign_table != table && + !create_failed && !ref_ok) { FILE* ef = dict_foreign_err_file; @@ -4593,7 +4601,7 @@ row_mysql_drop_temp_tables(void) table = dict_table_get_low(table_name); if (table) { - row_drop_table_for_mysql(table_name, trx, FALSE); + row_drop_table_for_mysql(table_name, trx, FALSE, FALSE); trx_commit_for_mysql(trx); } @@ -4762,7 +4770,7 @@ loop: goto loop; } - err = row_drop_table_for_mysql(table_name, trx, TRUE); + err = row_drop_table_for_mysql(table_name, trx, TRUE, FALSE); trx_commit_for_mysql(trx); if (err != DB_SUCCESS) { diff --git a/storage/xtradb/srv/srv0conc.cc b/storage/xtradb/srv/srv0conc.cc index cd73e5cd891..a8e7e2ab1aa 100644 --- a/storage/xtradb/srv/srv0conc.cc +++ b/storage/xtradb/srv/srv0conc.cc @@ -165,6 +165,10 @@ srv_conc_free(void) { #ifndef HAVE_ATOMIC_BUILTINS os_fast_mutex_free(&srv_conc_mutex); + + for (ulint i = 0; i < OS_THREAD_MAX_N; i++) + os_event_free(srv_conc_slots[i].event); + mem_free(srv_conc_slots); srv_conc_slots = NULL; #endif /* !HAVE_ATOMIC_BUILTINS */ diff --git a/storage/xtradb/srv/srv0srv.cc b/storage/xtradb/srv/srv0srv.cc index 58336924908..85e988e16b2 100644 --- a/storage/xtradb/srv/srv0srv.cc +++ b/storage/xtradb/srv/srv0srv.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, 2009 Google Inc. Copyright (c) 2009, Percona Inc. Copyright (c) 2013, 2014, SkySQL Ab. All Rights Reserved. @@ -372,8 +372,6 @@ UNIV_INTERN ulong srv_read_ahead_threshold = 56; #ifdef UNIV_LOG_ARCHIVE UNIV_INTERN ibool srv_log_archive_on = FALSE; -UNIV_INTERN ibool srv_archive_recovery = 0; -UNIV_INTERN ib_uint64_t srv_archive_recovery_limit_lsn; #endif /* UNIV_LOG_ARCHIVE */ /* This parameter is used to throttle the number of insert buffers that are @@ -805,6 +803,10 @@ struct srv_sys_t{ srv_stats_t::ulint_ctr_1_t activity_count; /*!< For tracking server activity */ + srv_stats_t::ulint_ctr_1_t + ibuf_merge_activity_count;/*!< For tracking change + buffer merge activity, a subset + of overall server activity */ }; #ifndef HAVE_ATOMIC_BUILTINS @@ -1195,8 +1197,9 @@ srv_init(void) srv_checkpoint_completed_event = os_event_create(); + srv_redo_log_tracked_event = os_event_create(); + if (srv_track_changed_pages) { - srv_redo_log_tracked_event = os_event_create(); os_event_set(srv_redo_log_tracked_event); } @@ -1246,17 +1249,34 @@ srv_free(void) { srv_conc_free(); - /* The mutexes srv_sys->mutex and srv_sys->tasks_mutex should have - been freed by sync_close() already. */ - mem_free(srv_sys); - srv_sys = NULL; + if (!srv_read_only_mode) { - trx_i_s_cache_free(trx_i_s_cache); + for (ulint i = 0; i < srv_sys->n_sys_threads; i++) + os_event_free(srv_sys->sys_threads[i].event); - if (!srv_read_only_mode) { + os_event_free(srv_error_event); + os_event_free(srv_monitor_event); os_event_free(srv_buf_dump_event); - srv_buf_dump_event = NULL; + os_event_free(srv_checkpoint_completed_event); + os_event_free(srv_redo_log_tracked_event); + mutex_free(&srv_sys->mutex); + mutex_free(&srv_sys->tasks_mutex); } + +#ifdef WITH_INNODB_DISALLOW_WRITES + os_event_free(srv_allow_writes_event); +#endif /* WITH_INNODB_DISALLOW_WRITES */ + +#ifndef HAVE_ATOMIC_BUILTINS + mutex_free(&server_mutex); +#endif + mutex_free(&srv_innodb_monitor_mutex); + mutex_free(&page_zip_stat_per_index_mutex); + + mem_free(srv_sys); + srv_sys = NULL; + + trx_i_s_cache_free(trx_i_s_cache); } /*********************************************************************//** @@ -2238,6 +2258,8 @@ exit_func: /*********************************************************************//** A thread which prints warnings about semaphore waits which have lasted too long. These can be used to track bugs which cause hangs. +Note: In order to make sync_arr_wake_threads_if_sema_free work as expected, +we should avoid waiting any mutexes in this function! @return a dummy parameter */ extern "C" UNIV_INTERN os_thread_ret_t @@ -2277,23 +2299,21 @@ loop: /* Try to track a strange bug reported by Harald Fuchs and others, where the lsn seems to decrease at times */ - /* We have to use nowait to ensure we don't block */ - new_lsn= log_get_lsn_nowait(); - - if (new_lsn && new_lsn < old_lsn) { - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Error: old log sequence number " LSN_PF - " was greater\n" - "InnoDB: than the new log sequence number " LSN_PF "!\n" - "InnoDB: Please submit a bug report" - " to http://bugs.mysql.com\n", - old_lsn, new_lsn); - ut_ad(0); - } + if (log_peek_lsn(&new_lsn)) { + if (new_lsn < old_lsn) { + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: Error: old log sequence number " LSN_PF + " was greater\n" + "InnoDB: than the new log sequence number " LSN_PF "!\n" + "InnoDB: Please submit a bug report" + " to http://bugs.mysql.com\n", + old_lsn, new_lsn); + ut_ad(0); + } - if (new_lsn) old_lsn = new_lsn; + } if (difftime(time(NULL), srv_last_monitor_time) > 60) { /* We referesh InnoDB Monitor values so that averages are @@ -2315,7 +2335,7 @@ loop: if (sync_array_print_long_waits(&waiter, &sema) && sema == old_sema && os_thread_eq(waiter, old_waiter)) { #if defined(WITH_WSREP) && defined(WITH_INNODB_DISALLOW_WRITES) - if (srv_allow_writes_event->is_set) { + if (srv_allow_writes_event->is_set()) { #endif /* WITH_WSREP */ fatal_cnt++; #if defined(WITH_WSREP) && defined(WITH_INNODB_DISALLOW_WRITES) @@ -2403,10 +2423,15 @@ rescan_idle: Increment the server activity count. */ UNIV_INTERN void -srv_inc_activity_count(void) -/*========================*/ +srv_inc_activity_count( +/*===================*/ + bool ibuf_merge_activity) /*!< whether this activity bump + is caused by the background + change buffer merge */ { srv_sys->activity_count.inc(); + if (ibuf_merge_activity) + srv_sys->ibuf_merge_activity_count.inc(); } /**********************************************************************//** @@ -2526,7 +2551,7 @@ DECLARE_THREAD(srv_redo_log_follow_thread)( /* TODO: sync with I_S log tracking status? */ ib_logf(IB_LOG_LEVEL_ERROR, "log tracking bitmap write failed, " - "stopping log tracking thread!\n"); + "stopping log tracking thread!"); break; } os_event_set(srv_redo_log_tracked_event); @@ -2568,7 +2593,7 @@ purge_archived_logs( if (!dir) { ib_logf(IB_LOG_LEVEL_WARN, "opening archived log directory %s failed. " - "Purge archived logs are not available\n", + "Purge archived logs are not available", srv_arch_dir); /* failed to open directory */ return(DB_ERROR); @@ -2656,7 +2681,7 @@ purge_archived_logs( archived_log_filename)) { ib_logf(IB_LOG_LEVEL_WARN, - "can't delete archived log file %s.\n", + "can't delete archived log file %s.", archived_log_filename); mutex_exit(&log_sys->mutex); @@ -2764,16 +2789,49 @@ srv_get_activity_count(void) return(srv_sys->activity_count); } +/** Get current server ibuf merge activity count. +@return ibuf merge activity count */ +static +ulint +srv_get_ibuf_merge_activity_count(void) +{ + return(srv_sys->ibuf_merge_activity_count); +} + /*******************************************************************//** -Check if there has been any activity. +Check if there has been any activity. Considers background change buffer +merge as regular server activity unless a non-default +old_ibuf_merge_activity_count value is passed, in which case the merge will be +treated as keeping server idle. @return FALSE if no change in activity counter. */ UNIV_INTERN ibool srv_check_activity( /*===============*/ - ulint old_activity_count) /*!< in: old activity count */ + ulint old_activity_count, /*!< in: old activity count */ + /*!< old change buffer merge + activity count, or + ULINT_UNDEFINED */ + ulint old_ibuf_merge_activity_count) { - return(srv_sys->activity_count != old_activity_count); + ulint new_activity_count = srv_sys->activity_count; + if (old_ibuf_merge_activity_count == ULINT_UNDEFINED) + return(new_activity_count != old_activity_count); + + /* If we care about ibuf merge activity, then the server is considered + idle if all activity, if any, was due to ibuf merge. */ + ulint new_ibuf_merge_activity_count + = srv_sys->ibuf_merge_activity_count; + + ut_ad(new_ibuf_merge_activity_count <= new_activity_count); + ut_ad(new_ibuf_merge_activity_count >= old_ibuf_merge_activity_count); + ut_ad(new_activity_count >= old_activity_count); + + ulint ibuf_merge_activity_delta = + new_ibuf_merge_activity_count - old_ibuf_merge_activity_count; + ulint activity_delta = new_activity_count - old_activity_count; + + return (activity_delta > ibuf_merge_activity_delta); } /********************************************************************//** @@ -3137,6 +3195,8 @@ DECLARE_THREAD(srv_master_thread)( { srv_slot_t* slot; ulint old_activity_count = srv_get_activity_count(); + ulint old_ibuf_merge_activity_count + = srv_get_ibuf_merge_activity_count(); ib_time_t last_print_time; ut_ad(!srv_read_only_mode); @@ -3174,8 +3234,12 @@ loop: srv_current_thread_priority = srv_master_thread_priority; - if (srv_check_activity(old_activity_count)) { + if (srv_check_activity(old_activity_count, + old_ibuf_merge_activity_count)) { + old_activity_count = srv_get_activity_count(); + old_ibuf_merge_activity_count + = srv_get_ibuf_merge_activity_count(); srv_master_do_active_tasks(); } else { srv_master_do_idle_tasks(); diff --git a/storage/xtradb/srv/srv0start.cc b/storage/xtradb/srv/srv0start.cc index 1db90b2eda5..6a3e756cb44 100644 --- a/storage/xtradb/srv/srv0start.cc +++ b/storage/xtradb/srv/srv0start.cc @@ -1975,7 +1975,7 @@ innobase_start_or_create_for_mysql(void) } } else { srv_monitor_file_name = NULL; - srv_monitor_file = os_file_create_tmpfile(); + srv_monitor_file = os_file_create_tmpfile(NULL); if (!srv_monitor_file) { return(DB_ERROR); @@ -1985,7 +1985,7 @@ innobase_start_or_create_for_mysql(void) mutex_create(srv_dict_tmpfile_mutex_key, &srv_dict_tmpfile_mutex, SYNC_DICT_OPERATION); - srv_dict_tmpfile = os_file_create_tmpfile(); + srv_dict_tmpfile = os_file_create_tmpfile(NULL); if (!srv_dict_tmpfile) { return(DB_ERROR); @@ -1994,7 +1994,7 @@ innobase_start_or_create_for_mysql(void) mutex_create(srv_misc_tmpfile_mutex_key, &srv_misc_tmpfile_mutex, SYNC_ANY_LATCH); - srv_misc_tmpfile = os_file_create_tmpfile(); + srv_misc_tmpfile = os_file_create_tmpfile(NULL); if (!srv_misc_tmpfile) { return(DB_ERROR); @@ -2458,40 +2458,6 @@ files_checked: create_log_files_rename(logfilename, dirnamelen, max_flushed_lsn, logfile0); -#ifdef UNIV_LOG_ARCHIVE - } else if (srv_archive_recovery) { - - ib_logf(IB_LOG_LEVEL_INFO, - " Starting archive recovery from a backup..."); - - err = recv_recovery_from_archive_start( - min_flushed_lsn, srv_archive_recovery_limit_lsn, - min_arch_log_no); - if (err != DB_SUCCESS) { - - return(DB_ERROR); - } - /* Since ibuf init is in dict_boot, and ibuf is needed - in any disk i/o, first call dict_boot */ - - err = dict_boot(); - - if (err != DB_SUCCESS) { - return(err); - } - - ib_bh = trx_sys_init_at_db_start(); - n_recovered_trx = UT_LIST_GET_LEN(trx_sys->rw_trx_list); - - /* The purge system needs to create the purge view and - therefore requires that the trx_sys is inited. */ - - trx_purge_sys_create(srv_n_purge_threads, ib_bh); - - srv_startup_is_before_trx_rollback_phase = FALSE; - - recv_recovery_from_archive_finish(); -#endif /* UNIV_LOG_ARCHIVE */ } else { /* Check if we support the max format that is stamped @@ -3188,15 +3154,13 @@ innobase_shutdown_for_mysql(void) logs_empty_and_mark_files_at_shutdown() and should have already quit or is quitting right now. */ - if (srv_use_mtflush) { /* g. Exit the multi threaded flush threads */ buf_mtflu_io_thread_exit(); } - os_mutex_enter(os_sync_mutex); - + os_rmb; if (os_thread_count == 0) { /* All the threads have exited or are just exiting; NOTE that the threads may not have completed their @@ -3206,15 +3170,11 @@ innobase_shutdown_for_mysql(void) os_thread_exit(). Now we just sleep 0.1 seconds and hope that is enough! */ - os_mutex_exit(os_sync_mutex); - os_thread_sleep(100000); break; } - os_mutex_exit(os_sync_mutex); - os_thread_sleep(100000); } @@ -3323,26 +3283,23 @@ innobase_shutdown_for_mysql(void) que_close(); row_mysql_close(); srv_mon_free(); - sync_close(); srv_free(); fil_close(); - /* 4. Free the os_conc_mutex and all os_events and os_mutexes */ - - os_sync_free(); - - /* 5. Free all allocated memory */ + /* 4. Free all allocated memory */ pars_lexer_close(); log_mem_free(); buf_pool_free(srv_buf_pool_instances); mem_close(); + sync_close(); /* ut_free_all_mem() frees all allocated memory not freed yet in shutdown, and it will also free the ut_list_mutex, so it should be the last one for all operation */ ut_free_all_mem(); + os_rmb; if (os_thread_count != 0 || os_event_count != 0 || os_mutex_count != 0 diff --git a/storage/xtradb/sync/sync0arr.cc b/storage/xtradb/sync/sync0arr.cc index d881c5de2f5..c311d2cbd7d 100644 --- a/storage/xtradb/sync/sync0arr.cc +++ b/storage/xtradb/sync/sync0arr.cc @@ -336,21 +336,21 @@ sync_cell_get_event( ulint type = cell->request_type; if (type == SYNC_MUTEX) { - return(((ib_mutex_t*) cell->wait_object)->event); + return(&((ib_mutex_t*) cell->wait_object)->event); } else if (type == SYNC_PRIO_MUTEX) { - return(((ib_prio_mutex_t*) cell->wait_object) + return(&((ib_prio_mutex_t*) cell->wait_object) ->high_priority_event); } else if (type == RW_LOCK_WAIT_EX) { - return(((rw_lock_t*) cell->wait_object)->wait_ex_event); + return(&((rw_lock_t*) cell->wait_object)->wait_ex_event); } else if (type == PRIO_RW_LOCK_SHARED) { - return(((prio_rw_lock_t *) cell->wait_object) + return(&((prio_rw_lock_t *) cell->wait_object) ->high_priority_s_event); } else if (type == PRIO_RW_LOCK_EX) { - return(((prio_rw_lock_t *) cell->wait_object) + return(&((prio_rw_lock_t *) cell->wait_object) ->high_priority_x_event); } else { /* RW_LOCK_SHARED and RW_LOCK_EX wait on the same event */ ut_ad(type == RW_LOCK_SHARED || type == RW_LOCK_EX); - return(((rw_lock_t*) cell->wait_object)->event); + return(&((rw_lock_t*) cell->wait_object)->event); } } diff --git a/storage/xtradb/sync/sync0rw.cc b/storage/xtradb/sync/sync0rw.cc index 00fb5e511a4..729f510013d 100644 --- a/storage/xtradb/sync/sync0rw.cc +++ b/storage/xtradb/sync/sync0rw.cc @@ -261,8 +261,8 @@ rw_lock_create_func( lock->last_x_file_name = "not yet reserved"; lock->last_s_line = 0; lock->last_x_line = 0; - lock->event = os_event_create(); - lock->wait_ex_event = os_event_create(); + os_event_create(&lock->event); + os_event_create(&lock->wait_ex_event); mutex_enter(&rw_lock_list_mutex); @@ -304,9 +304,9 @@ rw_lock_create_func( cline); lock->high_priority_s_waiters = 0; - lock->high_priority_s_event = os_event_create(); + os_event_create(&lock->high_priority_s_event); lock->high_priority_x_waiters = 0; - lock->high_priority_x_event = os_event_create(); + os_event_create(&lock->high_priority_x_event); lock->high_priority_wait_ex_waiter = 0; } @@ -334,9 +334,9 @@ rw_lock_free_func( mutex = rw_lock_get_mutex(lock); #endif /* !INNODB_RW_LOCKS_USE_ATOMICS */ - os_event_free(lock->event); + os_event_free(&lock->event, false); - os_event_free(lock->wait_ex_event); + os_event_free(&lock->wait_ex_event, false); ut_ad(UT_LIST_GET_PREV(list, lock) == NULL || UT_LIST_GET_PREV(list, lock)->magic_n == RW_LOCK_MAGIC_N); @@ -366,8 +366,8 @@ rw_lock_free_func( /*==============*/ prio_rw_lock_t* lock) /*!< in: rw-lock */ { - os_event_free(lock->high_priority_s_event); - os_event_free(lock->high_priority_x_event); + os_event_free(&lock->high_priority_s_event, false); + os_event_free(&lock->high_priority_x_event, false); rw_lock_free_func(&lock->base_lock); } diff --git a/storage/xtradb/sync/sync0sync.cc b/storage/xtradb/sync/sync0sync.cc index 702dd240f16..c699fbf2ded 100644 --- a/storage/xtradb/sync/sync0sync.cc +++ b/storage/xtradb/sync/sync0sync.cc @@ -212,10 +212,7 @@ UNIV_INTERN mysql_pfs_key_t sync_thread_mutex_key; /** Global list of database mutexes (not OS mutexes) created. */ UNIV_INTERN ut_list_base_node_t mutex_list; -/** Global list of priority mutexes. A subset of mutex_list */ -UNIV_INTERN UT_LIST_BASE_NODE_T(ib_prio_mutex_t) prio_mutex_list; - -/** Mutex protecting the mutex_list and prio_mutex_list variables */ +/** Mutex protecting the mutex_list variable */ UNIV_INTERN ib_mutex_t mutex_list_mutex; #ifdef UNIV_PFS_MUTEX @@ -280,7 +277,7 @@ mutex_create_func( os_fast_mutex_init(PFS_NOT_INSTRUMENTED, &mutex->os_fast_mutex); mutex->lock_word = 0; #endif - mutex->event = os_event_create(); + os_event_create(&mutex->event); mutex_set_waiters(mutex, 0); #ifdef UNIV_DEBUG mutex->magic_n = MUTEX_MAGIC_N; @@ -349,11 +346,7 @@ mutex_create_func( cline, cmutex_name); mutex->high_priority_waiters = 0; - mutex->high_priority_event = os_event_create(); - - mutex_enter(&mutex_list_mutex); - UT_LIST_ADD_FIRST(list, prio_mutex_list, mutex); - mutex_exit(&mutex_list_mutex); + os_event_create(&mutex->high_priority_event); } /******************************************************************//** @@ -400,7 +393,7 @@ mutex_free_func( mutex_exit(&mutex_list_mutex); } - os_event_free(mutex->event); + os_event_free(&mutex->event, false); #ifdef UNIV_MEM_DEBUG func_exit: #endif /* UNIV_MEM_DEBUG */ @@ -427,12 +420,8 @@ mutex_free_func( /*============*/ ib_prio_mutex_t* mutex) /*!< in: mutex */ { - mutex_enter(&mutex_list_mutex); - UT_LIST_REMOVE(list, prio_mutex_list, mutex); - mutex_exit(&mutex_list_mutex); - ut_a(mutex->high_priority_waiters == 0); - os_event_free(mutex->high_priority_event); + os_event_free(&mutex->high_priority_event, false); mutex_free_func(&mutex->base_mutex); } @@ -722,7 +711,7 @@ mutex_signal_object( /* The memory order of resetting the waiters field and signaling the object is important. See LEMMA 1 above. */ - os_event_set(mutex->event); + os_event_set(&mutex->event); sync_array_object_signalled(); } @@ -1555,7 +1544,6 @@ sync_init(void) /* Init the mutex list and create the mutex to protect it. */ UT_LIST_INIT(mutex_list); - UT_LIST_INIT(prio_mutex_list); mutex_create(mutex_list_mutex_key, &mutex_list_mutex, SYNC_NO_ORDER_CHECK); #ifdef UNIV_SYNC_DEBUG @@ -1602,22 +1590,17 @@ sync_thread_level_arrays_free(void) #endif /* UNIV_SYNC_DEBUG */ /******************************************************************//** -Frees the resources in InnoDB's own synchronization data structures. Use -os_sync_free() after calling this. */ +Frees the resources in InnoDB's own synchronization data structures. */ UNIV_INTERN void sync_close(void) /*===========*/ { ib_mutex_t* mutex; - ib_prio_mutex_t* prio_mutex; sync_array_close(); - for (prio_mutex = UT_LIST_GET_FIRST(prio_mutex_list); prio_mutex;) { - mutex_free(prio_mutex); - prio_mutex = UT_LIST_GET_FIRST(prio_mutex_list); - } + mutex_free(&rw_lock_list_mutex); for (mutex = UT_LIST_GET_FIRST(mutex_list); mutex != NULL; @@ -1635,7 +1618,6 @@ sync_close(void) mutex = UT_LIST_GET_FIRST(mutex_list); } - mutex_free(&mutex_list_mutex); #ifdef UNIV_SYNC_DEBUG mutex_free(&sync_thread_mutex); @@ -1646,6 +1628,8 @@ sync_close(void) os_fast_mutex_free(&rw_lock_debug_mutex); #endif /* UNIV_SYNC_DEBUG */ + mutex_free(&mutex_list_mutex); + sync_initialized = FALSE; } diff --git a/storage/xtradb/trx/trx0i_s.cc b/storage/xtradb/trx/trx0i_s.cc index 3230ac8308c..eacd9212d2f 100644 --- a/storage/xtradb/trx/trx0i_s.cc +++ b/storage/xtradb/trx/trx0i_s.cc @@ -1466,6 +1466,8 @@ trx_i_s_cache_free( /*===============*/ trx_i_s_cache_t* cache) /*!< in, own: cache to free */ { + rw_lock_free(&cache->rw_lock); + mutex_free(&cache->last_read_mutex); hash_table_free(cache->locks_hash); ha_storage_free(cache->storage); table_cache_free(&cache->innodb_trx); diff --git a/storage/xtradb/trx/trx0roll.cc b/storage/xtradb/trx/trx0roll.cc index 67f63d08303..4e8547c7d64 100644 --- a/storage/xtradb/trx/trx0roll.cc +++ b/storage/xtradb/trx/trx0roll.cc @@ -653,7 +653,7 @@ trx_rollback_active( "in recovery", table->name, trx->table_id); - err = row_drop_table_for_mysql(table->name, trx, TRUE); + err = row_drop_table_for_mysql(table->name, trx, TRUE, FALSE); trx_commit_for_mysql(trx); ut_a(err == DB_SUCCESS); |