diff options
author | Sergei Golubchik <serg@mariadb.org> | 2016-05-04 15:23:26 +0200 |
---|---|---|
committer | Sergei Golubchik <serg@mariadb.org> | 2016-05-04 15:23:26 +0200 |
commit | 87e3e67f434628768b5125fbab7e8862fa60da1a (patch) | |
tree | df19b8bcac9988d83270ed1da05f765bfc4e8624 /storage/xtradb | |
parent | 80da57cc4f0c717ee3e01ac5abccc859b88a2fbf (diff) | |
parent | cee9ab9d85a8d75290b0d60bc7af26c8cf179a1d (diff) | |
download | mariadb-git-87e3e67f434628768b5125fbab7e8862fa60da1a.tar.gz |
Merge branch '10.0' into 10.1
Diffstat (limited to 'storage/xtradb')
24 files changed, 831 insertions, 231 deletions
diff --git a/storage/xtradb/buf/buf0dump.cc b/storage/xtradb/buf/buf0dump.cc index 7bce480e652..a68bb8a1344 100644 --- a/storage/xtradb/buf/buf0dump.cc +++ b/storage/xtradb/buf/buf0dump.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2011, 2012, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2011, 2015, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -163,6 +163,25 @@ buf_load_status( va_end(ap); } +/** Returns the directory path where the buffer pool dump file will be created. +@return directory path */ +static +const char* +get_buf_dump_dir() +{ + const char* dump_dir; + + /* The dump file should be created in the default data directory if + innodb_data_home_dir is set as an empty string. */ + if (strcmp(srv_data_home, "") == 0) { + dump_dir = fil_path_to_mysql_datadir; + } else { + dump_dir = srv_data_home; + } + + return(dump_dir); +} + /*****************************************************************//** Perform a buffer pool dump into the file specified by innodb_buffer_pool_filename. If any errors occur then the value of @@ -186,7 +205,7 @@ buf_dump( int ret; ut_snprintf(full_filename, sizeof(full_filename), - "%s%c%s", srv_data_home, SRV_PATH_SEPARATOR, + "%s%c%s", get_buf_dump_dir(), SRV_PATH_SEPARATOR, srv_buf_dump_filename); ut_snprintf(tmp_filename, sizeof(tmp_filename), @@ -471,7 +490,7 @@ buf_load() buf_load_abort_flag = FALSE; ut_snprintf(full_filename, sizeof(full_filename), - "%s%c%s", srv_data_home, SRV_PATH_SEPARATOR, + "%s%c%s", get_buf_dump_dir(), SRV_PATH_SEPARATOR, srv_buf_dump_filename); buf_load_status(STATUS_NOTICE, diff --git a/storage/xtradb/buf/buf0flu.cc b/storage/xtradb/buf/buf0flu.cc index 5d64b75784a..2440637ccca 100644 --- a/storage/xtradb/buf/buf0flu.cc +++ b/storage/xtradb/buf/buf0flu.cc @@ -72,12 +72,6 @@ UNIV_INTERN mysql_pfs_key_t buf_page_cleaner_thread_key; UNIV_INTERN mysql_pfs_key_t buf_lru_manager_thread_key; #endif /* UNIV_PFS_THREAD */ -/** If LRU list of a buf_pool is less than this size then LRU eviction -should not happen. This is because when we do LRU flushing we also put -the blocks on free list. If LRU list is very small then we can end up -in thrashing. */ -#define BUF_LRU_MIN_LEN 256 - /* @} */ /******************************************************************//** diff --git a/storage/xtradb/dict/dict0boot.cc b/storage/xtradb/dict/dict0boot.cc index 59f210fcab9..138d3131e09 100644 --- a/storage/xtradb/dict/dict0boot.cc +++ b/storage/xtradb/dict/dict0boot.cc @@ -474,7 +474,7 @@ dict_boot(void) } else { ib_logf(IB_LOG_LEVEL_WARN, "Change buffer not empty when --innodb-read-only " - "is set! but srv_force_recovery = %d, ignoring.", + "is set! but srv_force_recovery = %lu, ignoring.", srv_force_recovery); } } diff --git a/storage/xtradb/dict/dict0dict.cc b/storage/xtradb/dict/dict0dict.cc index 206038d36c9..a147fbbc671 100644 --- a/storage/xtradb/dict/dict0dict.cc +++ b/storage/xtradb/dict/dict0dict.cc @@ -1116,7 +1116,7 @@ dict_init(void) &dict_operation_lock, SYNC_DICT_OPERATION); if (!srv_read_only_mode) { - dict_foreign_err_file = os_file_create_tmpfile(); + dict_foreign_err_file = os_file_create_tmpfile(NULL); ut_a(dict_foreign_err_file); mutex_create(dict_foreign_err_mutex_key, diff --git a/storage/xtradb/dict/dict0stats.cc b/storage/xtradb/dict/dict0stats.cc index 12ead09d829..5c283f693d5 100644 --- a/storage/xtradb/dict/dict0stats.cc +++ b/storage/xtradb/dict/dict0stats.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2009, 2014, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2009, 2015, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -1488,7 +1488,6 @@ on the leaf page. when comparing records @param[out] n_diff number of distinct records @param[out] n_external_pages number of external pages -@param[in,out] mtr mini-transaction @return number of distinct records on the leaf page */ static void @@ -1496,8 +1495,7 @@ dict_stats_analyze_index_below_cur( const btr_cur_t* cur, ulint n_prefix, ib_uint64_t* n_diff, - ib_uint64_t* n_external_pages, - mtr_t* mtr) + ib_uint64_t* n_external_pages) { dict_index_t* index; ulint space; @@ -1511,6 +1509,7 @@ dict_stats_analyze_index_below_cur( ulint* offsets2; ulint* offsets_rec; ulint size; + mtr_t mtr; index = btr_cur_get_index(cur); @@ -1549,12 +1548,14 @@ dict_stats_analyze_index_below_cur( function without analyzing any leaf pages */ *n_external_pages = 0; + mtr_start(&mtr); + /* descend to the leaf level on the B-tree */ for (;;) { block = buf_page_get_gen(space, zip_size, page_no, RW_S_LATCH, NULL /* no guessed block */, - BUF_GET, __FILE__, __LINE__, mtr); + BUF_GET, __FILE__, __LINE__, &mtr); page = buf_block_get_frame(block); @@ -1576,6 +1577,8 @@ dict_stats_analyze_index_below_cur( ut_a(*n_diff > 0); if (*n_diff == 1) { + mtr_commit(&mtr); + /* page has all keys equal and the end of the page was reached by dict_stats_scan_page(), no need to descend to the leaf level */ @@ -1600,7 +1603,7 @@ dict_stats_analyze_index_below_cur( } /* make sure we got a leaf page as a result from the above loop */ - ut_ad(btr_page_get_level(page, mtr) == 0); + ut_ad(btr_page_get_level(page, &mtr) == 0); /* scan the leaf page and find the number of distinct keys, when looking only at the first n_prefix columns; also estimate @@ -1617,6 +1620,7 @@ dict_stats_analyze_index_below_cur( __func__, page_no, n_diff); #endif + mtr_commit(&mtr); mem_heap_free(heap); } @@ -1826,8 +1830,7 @@ dict_stats_analyze_index_for_n_prefix( dict_stats_analyze_index_below_cur(btr_pcur_get_btr_cur(&pcur), n_prefix, &n_diff_on_leaf_page, - &n_external_pages, - mtr); + &n_external_pages); /* We adjust n_diff_on_leaf_page here to avoid counting one record twice - once as the last on some page and once diff --git a/storage/xtradb/fil/fil0fil.cc b/storage/xtradb/fil/fil0fil.cc index 2da234ad094..bc30211032b 100644 --- a/storage/xtradb/fil/fil0fil.cc +++ b/storage/xtradb/fil/fil0fil.cc @@ -5293,12 +5293,15 @@ retry: os_offset_t offset = ((os_offset_t) (start_page_no - file_start_page_no)) * page_size; + + const char* name = node->name == NULL ? space->name : node->name; + #ifdef UNIV_HOTBACKUP - success = os_file_write(node->name, node->handle, buf, + success = os_file_write(name, node->handle, buf, offset, page_size * n_pages); #else success = os_aio(OS_FILE_WRITE, 0, OS_AIO_SYNC, - node->name, node->handle, buf, + name, node->handle, buf, offset, page_size * n_pages, page_size, node, NULL, space_id, NULL, 0); #endif /* UNIV_HOTBACKUP */ @@ -5962,22 +5965,12 @@ _fil_io( } } + const char* name = node->name == NULL ? space->name : node->name; + /* Queue the aio request */ - ret = os_aio( - type, - is_log, - mode | wake_later, - node->name, - node->handle, - buf, - offset, - len, - zip_size ? zip_size : UNIV_PAGE_SIZE, - node, - message, - space_id, - trx, - write_size); + ret = os_aio(type, is_log, mode | wake_later, name, node->handle, buf, + offset, len, zip_size ? zip_size : UNIV_PAGE_SIZE, node, + message, space_id, trx, write_size); #else /* In mysqlbackup do normal i/o, not aio */ @@ -5985,7 +5978,7 @@ _fil_io( ret = os_file_read(node->handle, buf, offset, len); } else { ut_ad(!srv_read_only_mode); - ret = os_file_write(node->name, node->handle, buf, + ret = os_file_write(name, node->handle, buf, offset, len); } #endif /* !UNIV_HOTBACKUP */ diff --git a/storage/xtradb/fts/fts0opt.cc b/storage/xtradb/fts/fts0opt.cc index 0703b050848..00b3b4682c3 100644 --- a/storage/xtradb/fts/fts0opt.cc +++ b/storage/xtradb/fts/fts0opt.cc @@ -580,7 +580,7 @@ fts_zip_read_word( #ifdef UNIV_DEBUG ulint i; #endif - byte len = 0; + short len = 0; void* null = NULL; byte* ptr = word->f_str; int flush = Z_NO_FLUSH; @@ -590,7 +590,7 @@ fts_zip_read_word( return(NULL); } - zip->zp->next_out = &len; + zip->zp->next_out = reinterpret_cast<byte*>(&len); zip->zp->avail_out = sizeof(len); while (zip->status == Z_OK && zip->zp->avail_out > 0) { @@ -688,11 +688,12 @@ fts_fetch_index_words( fts_zip_t* zip = static_cast<fts_zip_t*>(user_arg); que_node_t* exp = sel_node->select_list; dfield_t* dfield = que_node_get_val(exp); - byte len = (byte) dfield_get_len(dfield); + short len = static_cast<short>(dfield_get_len(dfield)); void* data = dfield_get_data(dfield); /* Skip the duplicate words. */ - if (zip->word.f_len == len && !memcmp(zip->word.f_str, data, len)) { + if (zip->word.f_len == static_cast<ulint>(len) + && !memcmp(zip->word.f_str, data, len)) { return(TRUE); } @@ -706,7 +707,7 @@ fts_fetch_index_words( ut_a(zip->zp->next_in == NULL); /* The string is prefixed by len. */ - zip->zp->next_in = &len; + zip->zp->next_in = reinterpret_cast<byte*>(&len); zip->zp->avail_in = sizeof(len); /* Compress the word, create output blocks as necessary. */ diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc index 065f1bd2ca0..5a86f5662cc 100644 --- a/storage/xtradb/handler/ha_innodb.cc +++ b/storage/xtradb/handler/ha_innodb.cc @@ -617,7 +617,6 @@ ib_cb_t innodb_api_cb[] = { (ib_cb_t) ib_trx_read_only }; - static void innodb_remember_check_sysvar_funcs(); mysql_var_check_func check_sysvar_enum; @@ -652,6 +651,67 @@ ha_create_table_option innodb_table_option_list[]= HA_TOPTION_END }; +/** + Test a file path whether it is same as mysql data directory path. + + @param path null terminated character string + + @return + @retval TRUE The path is different from mysql data directory. + @retval FALSE The path is same as mysql data directory. +*/ +static bool is_mysql_datadir_path(const char *path) +{ + if (path == NULL) + return false; + + char mysql_data_dir[FN_REFLEN], path_dir[FN_REFLEN]; + convert_dirname(path_dir, path, NullS); + convert_dirname(mysql_data_dir, mysql_unpacked_real_data_home, NullS); + size_t mysql_data_home_len= dirname_length(mysql_data_dir); + size_t path_len = dirname_length(path_dir); + + if (path_len < mysql_data_home_len) + return true; + + if (!lower_case_file_system) + return(memcmp(mysql_data_dir, path_dir, mysql_data_home_len)); + + return(files_charset_info->coll->strnncoll(files_charset_info, + (uchar *) path_dir, path_len, + (uchar *) mysql_data_dir, + mysql_data_home_len, + TRUE)); +} + + +static int mysql_tmpfile_path(const char *path, const char *prefix) +{ + DBUG_ASSERT(path != NULL); + DBUG_ASSERT((strlen(path) + strlen(prefix)) <= FN_REFLEN); + + char filename[FN_REFLEN]; + File fd = create_temp_file(filename, path, prefix, +#ifdef __WIN__ + O_BINARY | O_TRUNC | O_SEQUENTIAL | + O_SHORT_LIVED | +#endif /* __WIN__ */ + O_CREAT | O_EXCL | O_RDWR | O_TEMPORARY, + MYF(MY_WME)); + if (fd >= 0) { +#ifndef __WIN__ + /* + This can be removed once the following bug is fixed: + Bug #28903 create_temp_file() doesn't honor O_TEMPORARY option + (file not removed) (Unix) + */ + unlink(filename); +#endif /* !__WIN__ */ + } + + return fd; +} + /*************************************************************//** Check whether valid argument given to innodb_ft_*_stopword_table. This function is registered as a callback with MySQL. @@ -667,6 +727,108 @@ innodb_stopword_table_validate( for update function */ struct st_mysql_value* value); /*!< in: incoming string */ +/** Validate passed-in "value" is a valid directory name. +This function is registered as a callback with MySQL. +@param[in,out] thd thread handle +@param[in] var pointer to system variable +@param[out] save immediate result for update +@param[in] value incoming string +@return 0 for valid name */ +static +int +innodb_tmpdir_validate( + THD* thd, + struct st_mysql_sys_var* var, + void* save, + struct st_mysql_value* value) +{ + + char* alter_tmp_dir; + char* innodb_tmp_dir; + char buff[OS_FILE_MAX_PATH]; + int len = sizeof(buff); + char tmp_abs_path[FN_REFLEN + 2]; + + ut_ad(save != NULL); + ut_ad(value != NULL); + + if (check_global_access(thd, FILE_ACL)) { + push_warning_printf( + thd, Sql_condition::WARN_LEVEL_WARN, + ER_WRONG_ARGUMENTS, + "InnoDB: FILE Permissions required"); + *static_cast<const char**>(save) = NULL; + return(1); + } + + alter_tmp_dir = (char*) value->val_str(value, buff, &len); + + if (!alter_tmp_dir) { + *static_cast<const char**>(save) = alter_tmp_dir; + return(0); + } + + if (strlen(alter_tmp_dir) > FN_REFLEN) { + push_warning_printf( + thd, Sql_condition::WARN_LEVEL_WARN, + ER_WRONG_ARGUMENTS, + "Path length should not exceed %d bytes", FN_REFLEN); + *static_cast<const char**>(save) = NULL; + return(1); + } + + my_realpath(tmp_abs_path, alter_tmp_dir, 0); + size_t tmp_abs_len = strlen(tmp_abs_path); + + if (my_access(tmp_abs_path, F_OK)) { + + push_warning_printf( + thd, Sql_condition::WARN_LEVEL_WARN, + ER_WRONG_ARGUMENTS, + "InnoDB: Path doesn't exist."); + *static_cast<const char**>(save) = NULL; + return(1); + } else if (my_access(tmp_abs_path, R_OK | W_OK)) { + push_warning_printf( + thd, Sql_condition::WARN_LEVEL_WARN, + ER_WRONG_ARGUMENTS, + "InnoDB: Server doesn't have permission in " + "the given location."); + *static_cast<const char**>(save) = NULL; + return(1); + } + + MY_STAT stat_info_dir; + + if (my_stat(tmp_abs_path, &stat_info_dir, MYF(0))) { + if ((stat_info_dir.st_mode & S_IFDIR) != S_IFDIR) { + + push_warning_printf( + thd, Sql_condition::WARN_LEVEL_WARN, + ER_WRONG_ARGUMENTS, + "Given path is not a directory. "); + *static_cast<const char**>(save) = NULL; + return(1); + } + } + + if (!is_mysql_datadir_path(tmp_abs_path)) { + + push_warning_printf( + thd, Sql_condition::WARN_LEVEL_WARN, + ER_WRONG_ARGUMENTS, + "InnoDB: Path Location should not be same as " + "mysql data directory location."); + *static_cast<const char**>(save) = NULL; + return(1); + } + + innodb_tmp_dir = static_cast<char*>( + thd_memdup(thd, tmp_abs_path, tmp_abs_len + 1)); + *static_cast<const char**>(save) = innodb_tmp_dir; + return(0); +} + /** "GEN_CLUST_INDEX" is the name reserved for InnoDB default system clustered index when there is no primary key. */ const char innobase_index_reserve_name[] = "GEN_CLUST_INDEX"; @@ -796,6 +958,11 @@ static MYSQL_THDVAR_BOOL(fake_changes, PLUGIN_VAR_OPCMDARG, "This is to cause replication prefetch IO. ATTENTION: the transaction started after enabled is affected.", NULL, NULL, FALSE); +static MYSQL_THDVAR_STR(tmpdir, + PLUGIN_VAR_OPCMDARG|PLUGIN_VAR_MEMALLOC, + "Directory for temporary non-tablespace files.", + innodb_tmpdir_validate, NULL, NULL); + static ibool innodb_have_lzo=IF_LZO(1, 0); static ibool innodb_have_lz4=IF_LZ4(1, 0); static ibool innodb_have_lzma=IF_LZMA(1, 0); @@ -1492,6 +1659,28 @@ normalize_table_name_low( ibool set_lower_case); /* in: TRUE if we want to set name to lower case */ +/*************************************************************//** +Checks if buffer pool is big enough to enable backoff algorithm. +InnoDB empty free list algorithm backoff requires free pages +from LRU for the best performance. +buf_LRU_buf_pool_running_out cancels query if 1/4 of +buffer pool belongs to LRU or freelist. +At the same time buf_flush_LRU_list_batch +keeps up to BUF_LRU_MIN_LEN in LRU. +In order to avoid deadlock baclkoff requires buffer pool +to be at least 4*BUF_LRU_MIN_LEN, +but flush peformance is bad because of trashing +and additional BUF_LRU_MIN_LEN pages are requested. +@return true if it's possible to enable backoff. */ +static +bool +innodb_empty_free_list_algorithm_backoff_allowed( + srv_empty_free_list_t + algorithm, /*!< in: desired algorithm + from srv_empty_free_list_t */ + long long buf_pool_pages); /*!< in: total number + of pages inside buffer pool */ + #ifdef NOT_USED /*************************************************************//** Removes old archived transaction log files. @@ -1780,6 +1969,26 @@ thd_supports_xa( return(THDVAR(thd, support_xa)); } +/** Get the value of innodb_tmpdir. +@param[in] thd thread handle, or NULL to query + the global innodb_tmpdir. +@retval NULL if innodb_tmpdir="" */ +UNIV_INTERN +const char* +thd_innodb_tmpdir( + THD* thd) +{ +#ifdef UNIV_SYNC_DEBUG + ut_ad(!sync_thread_levels_nonempty_trx(false)); +#endif /* UNIV_SYNC_DEBUG */ + + const char* tmp_dir = THDVAR(thd, tmpdir); + if (tmp_dir != NULL && *tmp_dir == '\0') { + tmp_dir = NULL; + } + + return(tmp_dir); +} /******************************************************************//** Check the status of fake changes mode (innodb_fake_changes) @return true if fake change mode is enabled. */ @@ -2360,13 +2569,14 @@ innobase_get_lower_case_table_names(void) return(lower_case_table_names); } -/*********************************************************************//** -Creates a temporary file. +/** Create a temporary file in the location specified by the parameter +path. If the path is null, then it will be created in tmpdir. +@param[in] path location for creating temporary file @return temporary file descriptor, or < 0 on error */ UNIV_INTERN int -innobase_mysql_tmpfile(void) -/*========================*/ +innobase_mysql_tmpfile( + const char* path) { #ifdef WITH_INNODB_DISALLOW_WRITES os_event_wait(srv_allow_writes_event); @@ -2379,7 +2589,11 @@ innobase_mysql_tmpfile(void) return(-1); ); - fd = mysql_tmpfile("ib"); + if (path == NULL) { + fd = mysql_tmpfile("ib"); + } else { + fd = mysql_tmpfile_path(path, "ib"); + } if (fd >= 0) { /* Copy the file descriptor, so that the additional resources @@ -3408,6 +3622,13 @@ ha_innobase::reset_template(void) ut_ad(prebuilt->magic_n == ROW_PREBUILT_ALLOCATED); ut_ad(prebuilt->magic_n2 == prebuilt->magic_n); + /* Force table to be freed in close_thread_table(). */ + DBUG_EXECUTE_IF("free_table_in_fts_query", + if (prebuilt->in_fts_query) { + table->m_needs_reopen = true; + } + ); + prebuilt->keep_other_fields_on_keyread = 0; prebuilt->read_just_key = 0; prebuilt->in_fts_query = 0; @@ -4076,6 +4297,22 @@ innobase_change_buffering_inited_ok: #ifdef HAVE_POSIX_FALLOCATE srv_use_posix_fallocate = (ibool) innobase_use_fallocate; #endif + + /* Do not enable backoff algorithm for small buffer pool. */ + if (!innodb_empty_free_list_algorithm_backoff_allowed( + static_cast<srv_empty_free_list_t>( + srv_empty_free_list_algorithm), + innobase_buffer_pool_size / srv_page_size)) { + sql_print_information( + "InnoDB: innodb_empty_free_list_algorithm " + "has been changed to legacy " + "because of small buffer pool size. " + "In order to use backoff, " + "increase buffer pool at least up to 20MB.\n"); + srv_empty_free_list_algorithm + = SRV_EMPTY_FREE_LIST_LEGACY; + } + srv_use_atomic_writes = (ibool) innobase_use_atomic_writes; if (innobase_use_atomic_writes) { ib_logf(IB_LOG_LEVEL_INFO, "using atomic writes."); @@ -5718,7 +5955,7 @@ building based on the assumption that there is no concurrent index creation/drop and DMLs that requires index lookup. All table handle will be closed before the index creation/drop. @return TRUE if index translation table built successfully */ -static +UNIV_INTERN ibool innobase_build_index_translation( /*=============================*/ @@ -6270,20 +6507,14 @@ table_opened: prebuilt->clust_index_was_generated = FALSE; if (UNIV_UNLIKELY(primary_key >= MAX_KEY)) { - sql_print_error("Table %s has a primary key in " - "InnoDB data dictionary, but not " - "in MySQL!", name); + ib_table->dict_frm_mismatch = DICT_FRM_NO_PK; /* This mismatch could cause further problems if not attended, bring this to the user's attention by printing a warning in addition to log a message in the errorlog */ - push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, - ER_NO_SUCH_INDEX, - "InnoDB: Table %s has a " - "primary key in InnoDB data " - "dictionary, but not in " - "MySQL!", name); + + ib_push_frm_error(thd, ib_table, table, 0, true); /* If primary_key >= MAX_KEY, its (primary_key) value could be out of bound if continue to index @@ -6330,27 +6561,14 @@ table_opened: } } else { if (primary_key != MAX_KEY) { - sql_print_error( - "Table %s has no primary key in InnoDB data " - "dictionary, but has one in MySQL! If you " - "created the table with a MySQL version < " - "3.23.54 and did not define a primary key, " - "but defined a unique key with all non-NULL " - "columns, then MySQL internally treats that " - "key as the primary key. You can fix this " - "error by dump + DROP + CREATE + reimport " - "of the table.", name); + + ib_table->dict_frm_mismatch = DICT_NO_PK_FRM_HAS; /* This mismatch could cause further problems if not attended, bring this to the user attention by printing a warning in addition to log a message in the errorlog */ - push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, - ER_NO_SUCH_INDEX, - "InnoDB: Table %s has no " - "primary key in InnoDB data " - "dictionary, but has one in " - "MySQL!", name); + ib_push_frm_error(thd, ib_table, table, 0, true); } prebuilt->clust_index_was_generated = TRUE; @@ -13960,12 +14178,8 @@ ha_innobase::info_low( } if (table->s->keys != num_innodb_index) { - sql_print_error("InnoDB: Table %s contains %lu " - "indexes inside InnoDB, which " - "is different from the number of " - "indexes %u defined in the MySQL ", - ib_table->name, num_innodb_index, - table->s->keys); + ib_table->dict_frm_mismatch = DICT_FRM_INCONSISTENT_KEYS; + ib_push_frm_error(user_thd, ib_table, table, num_innodb_index, true); } if (!(flag & HA_STATUS_NO_LOCK)) { @@ -13985,15 +14199,8 @@ ha_innobase::info_low( dict_index_t* index = innobase_get_index(i); if (index == NULL) { - sql_print_error("Table %s contains fewer " - "indexes inside InnoDB than " - "are defined in the MySQL " - ".frm file. Have you mixed up " - ".frm files from different " - "installations? See " - REFMAN - "innodb-troubleshooting.html\n", - ib_table->name); + ib_table->dict_frm_mismatch = DICT_FRM_INCONSISTENT_KEYS; + ib_push_frm_error(user_thd, ib_table, table, num_innodb_index, true); break; } @@ -18762,8 +18969,6 @@ innobase_fts_close_ranking( { fts_result_t* result; - ((NEW_FT_INFO*) fts_hdl)->ft_prebuilt->in_fts_query = false; - result = ((NEW_FT_INFO*) fts_hdl)->ft_result; fts_query_free_result(result); @@ -19473,6 +19678,7 @@ static int innobase_wsrep_get_checkpoint(handlerton* hton, XID* xid) return 0; } + static void wsrep_fake_trx_id( /*==================*/ @@ -19488,6 +19694,88 @@ wsrep_fake_trx_id( #endif /* WITH_WSREP */ + +/*************************************************************//** +Empty free list algorithm. +Checks if buffer pool is big enough to enable backoff algorithm. +InnoDB empty free list algorithm backoff requires free pages +from LRU for the best performance. +buf_LRU_buf_pool_running_out cancels query if 1/4 of +buffer pool belongs to LRU or freelist. +At the same time buf_flush_LRU_list_batch +keeps up to BUF_LRU_MIN_LEN in LRU. +In order to avoid deadlock baclkoff requires buffer pool +to be at least 4*BUF_LRU_MIN_LEN, +but flush peformance is bad because of trashing +and additional BUF_LRU_MIN_LEN pages are requested. +@return true if it's possible to enable backoff. */ +static +bool +innodb_empty_free_list_algorithm_backoff_allowed( + srv_empty_free_list_t algorithm, /*!< in: desired algorithm + from srv_empty_free_list_t */ + long long buf_pool_pages) /*!< in: total number + of pages inside buffer pool */ +{ + return(buf_pool_pages >= BUF_LRU_MIN_LEN * (4 + 1) + || algorithm != SRV_EMPTY_FREE_LIST_BACKOFF); +} + +/*************************************************************//** +Empty free list algorithm. This function is registered as +a callback with MySQL. +@return 0 for valid algorithm */ +static +int +innodb_srv_empty_free_list_algorithm_validate( +/*===========================*/ + THD* thd, /*!< in: thread handle */ + struct st_mysql_sys_var* var, /*!< in: pointer to system + variable */ + void* save, /*!< out: immediate result + for update function */ + struct st_mysql_value* value) /*!< in: incoming string */ +{ + const char* algorithm_name; + char buff[STRING_BUFFER_USUAL_SIZE]; + int len = sizeof(buff); + ulint algo; + srv_empty_free_list_t algorithm; + + algorithm_name = value->val_str(value, buff, &len); + + if (!algorithm_name) { + return(1); + } + + for (algo = 0; algo < array_elements( + innodb_empty_free_list_algorithm_names + ) - 1; + algo++) { + if (!innobase_strcasecmp( + algorithm_name, + innodb_empty_free_list_algorithm_names[algo])) + break; + } + + if (algo == array_elements( innodb_empty_free_list_algorithm_names) - 1) + return(1); + + algorithm = static_cast<srv_empty_free_list_t>(algo); + if (!innodb_empty_free_list_algorithm_backoff_allowed( + algorithm, + innobase_buffer_pool_size / srv_page_size)) { + sql_print_warning( + "InnoDB: innodb_empty_free_list_algorithm " + "= 'backoff' requires at least" + " 20MB buffer pool.\n"); + return(1); + } + + *reinterpret_cast<ulong*>(save) = static_cast<ulong>(algorithm); + return(0); +} + /* plugin options */ static MYSQL_SYSVAR_ENUM(checksum_algorithm, srv_checksum_algorithm, @@ -20038,7 +20326,7 @@ static MYSQL_SYSVAR_ENUM(empty_free_list_algorithm, "The algorithm to use for empty free list handling. Allowed values: " "LEGACY: Original Oracle MySQL 5.6 handling with single page flushes; " "BACKOFF: (default) Wait until cleaner produces a free page.", - NULL, NULL, SRV_EMPTY_FREE_LIST_BACKOFF, + innodb_srv_empty_free_list_algorithm_validate, NULL, SRV_EMPTY_FREE_LIST_BACKOFF, &innodb_empty_free_list_algorithm_typelib); static MYSQL_SYSVAR_LONG(buffer_pool_instances, innobase_buffer_pool_instances, @@ -21078,6 +21366,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(corrupt_table_action), MYSQL_SYSVAR(fake_changes), MYSQL_SYSVAR(locking_fake_changes), + MYSQL_SYSVAR(tmpdir), MYSQL_SYSVAR(use_stacktrace), MYSQL_SYSVAR(force_primary_key), MYSQL_SYSVAR(fatal_semaphore_wait_threshold), @@ -21822,3 +22111,96 @@ ib_push_warning( my_free(buf); va_end(args); } + +/********************************************************************//** +Helper function to push frm mismatch error to error log and +if needed to sql-layer. */ +UNIV_INTERN +void +ib_push_frm_error( +/*==============*/ + THD* thd, /*!< in: MySQL thd */ + dict_table_t* ib_table, /*!< in: InnoDB table */ + TABLE* table, /*!< in: MySQL table */ + ulint n_keys, /*!< in: InnoDB #keys */ + bool push_warning) /*!< in: print warning ? */ +{ + switch (ib_table->dict_frm_mismatch) { + case DICT_FRM_NO_PK: + sql_print_error("Table %s has a primary key in " + "InnoDB data dictionary, but not " + "in MySQL!" + " Have you mixed up " + ".frm files from different " + "installations? See " + REFMAN + "innodb-troubleshooting.html\n", + ib_table->name); + + if (push_warning) { + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_NO_SUCH_INDEX, + "InnoDB: Table %s has a " + "primary key in InnoDB data " + "dictionary, but not in " + "MySQL!", ib_table->name); + } + break; + case DICT_NO_PK_FRM_HAS: + sql_print_error( + "Table %s has no primary key in InnoDB data " + "dictionary, but has one in MySQL! If you " + "created the table with a MySQL version < " + "3.23.54 and did not define a primary key, " + "but defined a unique key with all non-NULL " + "columns, then MySQL internally treats that " + "key as the primary key. You can fix this " + "error by dump + DROP + CREATE + reimport " + "of the table.", ib_table->name); + + if (push_warning) { + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_NO_SUCH_INDEX, + "InnoDB: Table %s has no " + "primary key in InnoDB data " + "dictionary, but has one in " + "MySQL!", + ib_table->name); + } + break; + + case DICT_FRM_INCONSISTENT_KEYS: + sql_print_error("InnoDB: Table %s contains %lu " + "indexes inside InnoDB, which " + "is different from the number of " + "indexes %u defined in the MySQL " + " Have you mixed up " + ".frm files from different " + "installations? See " + REFMAN + "innodb-troubleshooting.html\n", + ib_table->name, n_keys, + table->s->keys); + + if (push_warning) { + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_NO_SUCH_INDEX, + "InnoDB: Table %s contains %lu " + "indexes inside InnoDB, which " + "is different from the number of " + "indexes %u defined in the MySQL ", + ib_table->name, n_keys, + table->s->keys); + } + break; + + case DICT_FRM_CONSISTENT: + default: + sql_print_error("InnoDB: Table %s is consistent " + "on InnoDB data dictionary and MySQL " + " FRM file.", + ib_table->name); + ut_error; + break; + } +} diff --git a/storage/xtradb/handler/ha_innodb.h b/storage/xtradb/handler/ha_innodb.h index 2027a593140..ea4be1fb2e0 100644 --- a/storage/xtradb/handler/ha_innodb.h +++ b/storage/xtradb/handler/ha_innodb.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 2000, 2012, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2013, 2014, SkySQL Ab. All Rights Reserved. +Copyright (c) 2013, 2016, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -696,3 +696,39 @@ innobase_copy_frm_flags_from_table_share( /*=====================================*/ dict_table_t* innodb_table, /*!< in/out: InnoDB table */ const TABLE_SHARE* table_share); /*!< in: table share */ + +/*******************************************************************//** +This function builds a translation table in INNOBASE_SHARE +structure for fast index location with mysql array number from its +table->key_info structure. This also provides the necessary translation +between the key order in mysql key_info and Innodb ib_table->indexes if +they are not fully matched with each other. +Note we do not have any mutex protecting the translation table +building based on the assumption that there is no concurrent +index creation/drop and DMLs that requires index lookup. All table +handle will be closed before the index creation/drop. +@return TRUE if index translation table built successfully */ +UNIV_INTERN +ibool +innobase_build_index_translation( +/*=============================*/ + const TABLE* table, /*!< in: table in MySQL data + dictionary */ + dict_table_t* ib_table, /*!< in: table in Innodb data + dictionary */ + INNOBASE_SHARE* share); /*!< in/out: share structure + where index translation table + will be constructed in. */ + +/********************************************************************//** +Helper function to push frm mismatch error to error log and +if needed to sql-layer. */ +UNIV_INTERN +void +ib_push_frm_error( +/*==============*/ + THD* thd, /*!< in: MySQL thd */ + dict_table_t* ib_table, /*!< in: InnoDB table */ + TABLE* table, /*!< in: MySQL table */ + ulint n_keys, /*!< in: InnoDB #keys */ + bool push_warning); /*!< in: print warning ? */ diff --git a/storage/xtradb/handler/handler0alter.cc b/storage/xtradb/handler/handler0alter.cc index ea02463010c..1e57f6c5293 100644 --- a/storage/xtradb/handler/handler0alter.cc +++ b/storage/xtradb/handler/handler0alter.cc @@ -463,6 +463,20 @@ ha_innobase::check_if_supported_inplace_alter( } } + ulint n_indexes = UT_LIST_GET_LEN((prebuilt->table)->indexes); + + /* If InnoDB dictionary and MySQL frm file are not consistent + use "Copy" method. */ + if (prebuilt->table->dict_frm_mismatch) { + + ha_alter_info->unsupported_reason = innobase_get_err_msg( + ER_NO_SUCH_INDEX); + ib_push_frm_error(user_thd, prebuilt->table, altered_table, + n_indexes, true); + + DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED); + } + /* We should be able to do the operation in-place. See if we can do it online (LOCK=NONE). */ bool online = true; @@ -2777,6 +2791,10 @@ prepare_inplace_alter_table_dict( ctx->num_to_add_index = ha_alter_info->index_add_count; + ut_ad(ctx->prebuilt->trx->mysql_thd != NULL); + const char* path = thd_innodb_tmpdir( + ctx->prebuilt->trx->mysql_thd); + index_defs = innobase_create_key_defs( ctx->heap, ha_alter_info, altered_table, ctx->num_to_add_index, num_fts_index, @@ -3130,8 +3148,10 @@ prepare_inplace_alter_table_dict( error = DB_OUT_OF_MEMORY; goto error_handling;); rw_lock_x_lock(&ctx->add_index[a]->lock); + bool ok = row_log_allocate(ctx->add_index[a], - NULL, true, NULL, NULL); + NULL, true, NULL, + NULL, path); rw_lock_x_unlock(&ctx->add_index[a]->lock); if (!ok) { @@ -3157,7 +3177,7 @@ prepare_inplace_alter_table_dict( clust_index, ctx->new_table, !(ha_alter_info->handler_flags & Alter_inplace_info::ADD_PK_INDEX), - ctx->add_cols, ctx->col_map); + ctx->add_cols, ctx->col_map, path); rw_lock_x_unlock(&clust_index->lock); if (!ok) { @@ -4159,6 +4179,7 @@ ok_exit: files and merge sort. */ DBUG_EXECUTE_IF("innodb_OOM_inplace_alter", error = DB_OUT_OF_MEMORY; goto oom;); + error = row_merge_build_indexes( prebuilt->trx, prebuilt->table, ctx->new_table, @@ -6194,6 +6215,21 @@ foreign_fail: row_mysql_unlock_data_dictionary(trx); trx_free_for_mysql(trx); + /* Rebuild index translation table now for temporary tables if we are + restoring secondary keys, as ha_innobase::open will not be called for + the next access. */ + if (dict_table_is_temporary(ctx0->new_table) + && ctx0->num_to_add_index > 0) { + ut_ad(!ctx0->num_to_drop_index); + ut_ad(!ctx0->num_to_drop_fk); + if (!innobase_build_index_translation(altered_table, + ctx0->new_table, + share)) { + MONITOR_ATOMIC_DEC(MONITOR_PENDING_ALTER_TABLE); + DBUG_RETURN(true); + } + } + /* TODO: The following code could be executed while allowing concurrent access to the table (MDL downgrade). */ diff --git a/storage/xtradb/include/buf0flu.h b/storage/xtradb/include/buf0flu.h index 87ce7d88bdf..c8a329251fa 100644 --- a/storage/xtradb/include/buf0flu.h +++ b/storage/xtradb/include/buf0flu.h @@ -312,6 +312,12 @@ buf_flush_flush_list_in_progress(void) /*==================================*/ __attribute__((warn_unused_result)); +/** If LRU list of a buf_pool is less than this size then LRU eviction +should not happen. This is because when we do LRU flushing we also put +the blocks on free list. If LRU list is very small then we can end up +in thrashing. */ +#define BUF_LRU_MIN_LEN 256 + /******************************************************************//** Start a buffer flush batch for LRU or flush list */ ibool diff --git a/storage/xtradb/include/dict0mem.h b/storage/xtradb/include/dict0mem.h index 9c43829cecf..a55d5316969 100644 --- a/storage/xtradb/include/dict0mem.h +++ b/storage/xtradb/include/dict0mem.h @@ -2,7 +2,7 @@ Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2012, Facebook Inc. -Copyright (c) 2013, 2015, MariaDB Corporation. +Copyright (c) 2013, 2016, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -1025,6 +1025,18 @@ if table->memcached_sync_count == DICT_TABLE_IN_DDL means there's DDL running on the table, DML from memcached will be blocked. */ #define DICT_TABLE_IN_DDL -1 +/** These are used when MySQL FRM and InnoDB data dictionary are +in inconsistent state. */ +typedef enum { + DICT_FRM_CONSISTENT = 0, /*!< Consistent state */ + DICT_FRM_NO_PK = 1, /*!< MySQL has no primary key + but InnoDB dictionary has + non-generated one. */ + DICT_NO_PK_FRM_HAS = 2, /*!< MySQL has primary key but + InnoDB dictionary has not. */ + DICT_FRM_INCONSISTENT_KEYS = 3 /*!< Key count mismatch */ +} dict_frm_t; + /** Data structure for a database table. Most fields will be initialized to 0, NULL or FALSE in dict_mem_table_create(). */ struct dict_table_t{ @@ -1085,6 +1097,10 @@ struct dict_table_t{ /*!< True if the table belongs to a system database (mysql, information_schema or performance_schema) */ + dict_frm_t dict_frm_mismatch; + /*!< !DICT_FRM_CONSISTENT==0 if data + dictionary information and + MySQL FRM information mismatch. */ #ifndef UNIV_HOTBACKUP hash_node_t name_hash; /*!< hash chain node */ hash_node_t id_hash; /*!< hash chain node */ diff --git a/storage/xtradb/include/ha_prototypes.h b/storage/xtradb/include/ha_prototypes.h index a9c003d5bb1..212a47a7a9b 100644 --- a/storage/xtradb/include/ha_prototypes.h +++ b/storage/xtradb/include/ha_prototypes.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2006, 2014, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2006, 2015, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -356,6 +356,15 @@ thd_supports_xa( THD* thd); /*!< in: thread handle, or NULL to query the global innodb_supports_xa */ +/** Get status of innodb_tmpdir. +@param[in] thd thread handle, or NULL to query + the global innodb_tmpdir. +@retval NULL if innodb_tmpdir="" */ +UNIV_INTERN +const char* +thd_innodb_tmpdir( + THD* thd); + /******************************************************************//** Check the status of fake changes mode (innodb_fake_changes) @return true if fake change mode is enabled. */ diff --git a/storage/xtradb/include/os0file.h b/storage/xtradb/include/os0file.h index c15d896caa6..c890f96b0e1 100644 --- a/storage/xtradb/include/os0file.h +++ b/storage/xtradb/include/os0file.h @@ -457,14 +457,19 @@ UNIV_INTERN void os_io_init_simple(void); /*===================*/ -/***********************************************************************//** -Creates a temporary file. This function is like tmpfile(3), but -the temporary file is created in the MySQL temporary directory. -@return temporary file handle, or NULL on error */ + +/** Create a temporary file. This function is like tmpfile(3), but +the temporary file is created in the given parameter path. If the path +is null then it will create the file in the mysql server configuration +parameter (--tmpdir). +@param[in] path location for creating temporary file +@return temporary file handle, or NULL on error */ +UNIV_INTERN FILE* -os_file_create_tmpfile(void); -/*========================*/ +os_file_create_tmpfile( + const char* path); + #endif /* !UNIV_HOTBACKUP */ /***********************************************************************//** The os_file_opendir() function opens a directory stream corresponding to the @@ -1314,14 +1319,14 @@ os_file_get_status( file can be opened in RW mode */ #if !defined(UNIV_HOTBACKUP) -/*********************************************************************//** -Creates a temporary file that will be deleted on close. -This function is defined in ha_innodb.cc. -@return temporary file descriptor, or < 0 on error */ +/** Create a temporary file in the location specified by the parameter +path. If the path is null, then it will be created in tmpdir. +@param[in] path location for creating temporary file +@return temporary file descriptor, or < 0 on error */ UNIV_INTERN int -innobase_mysql_tmpfile(void); -/*========================*/ +innobase_mysql_tmpfile( + const char* path); #endif /* !UNIV_HOTBACKUP */ diff --git a/storage/xtradb/include/row0log.h b/storage/xtradb/include/row0log.h index f105838eece..e127504c484 100644 --- a/storage/xtradb/include/row0log.h +++ b/storage/xtradb/include/row0log.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2011, 2014, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2011, 2015, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -55,8 +55,9 @@ row_log_allocate( const dtuple_t* add_cols, /*!< in: default values of added columns, or NULL */ - const ulint* col_map)/*!< in: mapping of old column + const ulint* col_map,/*!< in: mapping of old column numbers to new ones, or NULL if !table */ + const char* path) /*!< in: where to create temporary file */ __attribute__((nonnull(1), warn_unused_result)); /******************************************************//** diff --git a/storage/xtradb/include/row0merge.h b/storage/xtradb/include/row0merge.h index 3e3459b8703..53164b5197f 100644 --- a/storage/xtradb/include/row0merge.h +++ b/storage/xtradb/include/row0merge.h @@ -1,7 +1,7 @@ /***************************************************************************** -Copyright (c) 2005, 2014, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2015, MariaDB Corporation. +Copyright (c) 2005, 2015, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2015, 2016, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -187,14 +187,14 @@ void row_merge_drop_temp_indexes(void); /*=============================*/ -/*********************************************************************//** -Creates temporary merge files, and if UNIV_PFS_IO defined, register -the file descriptor with Performance Schema. +/** Create temporary merge files in the given paramater path, and if +UNIV_PFS_IO defined, register the file descriptor with Performance Schema. +@param[in] path location for creating temporary merge files. @return File descriptor */ UNIV_INTERN int -row_merge_file_create_low(void) -/*===========================*/ +row_merge_file_create_low( + const char* path) __attribute__((warn_unused_result)); /*********************************************************************//** Destroy a merge file. And de-register the file from Performance Schema @@ -372,15 +372,17 @@ row_merge_buf_empty( /*================*/ row_merge_buf_t* buf) /*!< in,own: sort buffer */ __attribute__((warn_unused_result, nonnull)); -/*********************************************************************//** -Create a merge file. + +/** Create a merge file in the given location. +@param[out] merge_file merge file structure +@param[in] path location for creating temporary file @return file descriptor, or -1 on failure */ UNIV_INTERN int row_merge_file_create( -/*==================*/ - merge_file_t* merge_file) /*!< out: merge file structure */ - __attribute__((nonnull)); + merge_file_t* merge_file, + const char* path); + /*********************************************************************//** Merge disk files. @return DB_SUCCESS or error code */ diff --git a/storage/xtradb/include/univ.i b/storage/xtradb/include/univ.i index f4a0da12476..b0fe3f020d2 100644 --- a/storage/xtradb/include/univ.i +++ b/storage/xtradb/include/univ.i @@ -45,10 +45,10 @@ Created 1/20/1994 Heikki Tuuri #define INNODB_VERSION_MAJOR 5 #define INNODB_VERSION_MINOR 6 -#define INNODB_VERSION_BUGFIX 28 +#define INNODB_VERSION_BUGFIX 29 #ifndef PERCONA_INNODB_VERSION -#define PERCONA_INNODB_VERSION 76.1 +#define PERCONA_INNODB_VERSION 76.2 #endif /* Enable UNIV_LOG_ARCHIVE in XtraDB */ diff --git a/storage/xtradb/lock/lock0lock.cc b/storage/xtradb/lock/lock0lock.cc index c3044daafad..7364c10c08e 100644 --- a/storage/xtradb/lock/lock0lock.cc +++ b/storage/xtradb/lock/lock0lock.cc @@ -639,7 +639,7 @@ lock_sys_create( lock_sys->rec_num = 0; if (!srv_read_only_mode) { - lock_latest_err_file = os_file_create_tmpfile(); + lock_latest_err_file = os_file_create_tmpfile(NULL); ut_a(lock_latest_err_file); } } diff --git a/storage/xtradb/os/os0file.cc b/storage/xtradb/os/os0file.cc index 2bb094e115d..4010ef10359 100644 --- a/storage/xtradb/os/os0file.cc +++ b/storage/xtradb/os/os0file.cc @@ -991,19 +991,21 @@ os_io_init_simple(void) #endif } -/***********************************************************************//** -Creates a temporary file. This function is like tmpfile(3), but -the temporary file is created in the MySQL temporary directory. -@return temporary file handle, or NULL on error */ +/** Create a temporary file. This function is like tmpfile(3), but +the temporary file is created in the given parameter path. If the path +is null then it will create the file in the mysql server configuration +parameter (--tmpdir). +@param[in] path location for creating temporary file +@return temporary file handle, or NULL on error */ UNIV_INTERN FILE* -os_file_create_tmpfile(void) -/*========================*/ +os_file_create_tmpfile( + const char* path) { - FILE* file = NULL; - int fd; WAIT_ALLOW_WRITES(); - fd = innobase_mysql_tmpfile(); + + FILE* file = NULL; + int fd = innobase_mysql_tmpfile(path); ut_ad(!srv_read_only_mode); @@ -4067,7 +4069,7 @@ os_aio_native_aio_supported(void) return(FALSE); } else if (!srv_read_only_mode) { /* Now check if tmpdir supports native aio ops. */ - fd = innobase_mysql_tmpfile(); + fd = innobase_mysql_tmpfile(NULL); if (fd < 0) { ib_logf(IB_LOG_LEVEL_WARN, diff --git a/storage/xtradb/row/row0ftsort.cc b/storage/xtradb/row/row0ftsort.cc index c7a5b51ef55..ac0dc844b1b 100644 --- a/storage/xtradb/row/row0ftsort.cc +++ b/storage/xtradb/row/row0ftsort.cc @@ -1,7 +1,7 @@ /***************************************************************************** -Copyright (c) 2010, 2014, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2015, MariaDB Corporation. +Copyright (c) 2010, 2015, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2015, 2016, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -240,6 +240,9 @@ row_fts_psort_info_init( crypt_data = NULL; } + ut_ad(trx->mysql_thd != NULL); + const char* path = thd_innodb_tmpdir(trx->mysql_thd); + /* There will be FTS_NUM_AUX_INDEX number of "sort buckets" for each parallel sort thread. Each "sort bucket" holds records for a particular "FTS index partition" */ @@ -261,8 +264,8 @@ row_fts_psort_info_init( psort_info[j].merge_buf[i] = row_merge_buf_create( dup->index); - if (row_merge_file_create(psort_info[j].merge_file[i]) - < 0) { + if (row_merge_file_create(psort_info[j].merge_file[i], + path) < 0) { goto func_exit; } @@ -662,6 +665,11 @@ fts_parallel_tokenization( dberr_t error = DB_SUCCESS; fil_space_crypt_t* crypt_data = NULL; + ut_ad(psort_info->psort_common->trx->mysql_thd != NULL); + + const char* path = thd_innodb_tmpdir( + psort_info->psort_common->trx->mysql_thd); + ut_ad(psort_info); buf = psort_info->merge_buf; @@ -905,7 +913,7 @@ exit: continue; } - tmpfd[i] = row_merge_file_create_low(); + tmpfd[i] = row_merge_file_create_low(path); if (tmpfd[i] < 0) { error = DB_OUT_OF_MEMORY; goto func_exit; diff --git a/storage/xtradb/row/row0log.cc b/storage/xtradb/row/row0log.cc index b0c6f881f81..f0b200459dd 100644 --- a/storage/xtradb/row/row0log.cc +++ b/storage/xtradb/row/row0log.cc @@ -198,8 +198,25 @@ struct row_log_t { or by index->lock X-latch only */ row_log_buf_t head; /*!< reader context; protected by MDL only; modifiable by row_log_apply_ops() */ + const char* path; /*!< where to create temporary file during + log operation */ }; +/** Create the file or online log if it does not exist. +@param[in,out] log online rebuild log +@return file descriptor. */ +static __attribute__((warn_unused_result)) +int +row_log_tmpfile( + row_log_t* log) +{ + DBUG_ENTER("row_log_tmpfile"); + if (log->fd < 0) { + log->fd = row_merge_file_create_low(log->path); + } + + DBUG_RETURN(log->fd); +} /** Allocate the memory for the log buffer. @param[in,out] log_buf Buffer used for log operation @@ -344,6 +361,12 @@ row_log_online_op( log->tail.buf, avail_size); } UNIV_MEM_ASSERT_RW(log->tail.block, srv_sort_buf_size); + + if (row_log_tmpfile(log) < 0) { + log->error = DB_OUT_OF_MEMORY; + goto err_exit; + } + ret = os_file_write( "(modification log)", OS_FILE_FROM_FD(log->fd), @@ -454,6 +477,12 @@ row_log_table_close_func( log->tail.buf, avail); } UNIV_MEM_ASSERT_RW(log->tail.block, srv_sort_buf_size); + + if (row_log_tmpfile(log) < 0) { + log->error = DB_OUT_OF_MEMORY; + goto err_exit; + } + ret = os_file_write( "(modification log)", OS_FILE_FROM_FD(log->fd), @@ -473,6 +502,7 @@ write_failed: log->tail.total += size; UNIV_MEM_INVALID(log->tail.buf, sizeof log->tail.buf); +err_exit: mutex_exit(&log->mutex); os_atomic_increment_ulint(&onlineddl_rowlog_rows, 1); @@ -2544,7 +2574,8 @@ corruption: if (index->online_log->head.blocks) { #ifdef HAVE_FTRUNCATE /* Truncate the file in order to save space. */ - if (ftruncate(index->online_log->fd, 0) == -1) { + if (index->online_log->fd != -1 + && ftruncate(index->online_log->fd, 0) == -1) { perror("ftruncate"); } #endif /* HAVE_FTRUNCATE */ @@ -2860,8 +2891,9 @@ row_log_allocate( const dtuple_t* add_cols, /*!< in: default values of added columns, or NULL */ - const ulint* col_map)/*!< in: mapping of old column + const ulint* col_map,/*!< in: mapping of old column numbers to new ones, or NULL if !table */ + const char* path) /*!< in: where to create temporary file */ { row_log_t* log; DBUG_ENTER("row_log_allocate"); @@ -2880,11 +2912,7 @@ row_log_allocate( DBUG_RETURN(false); } - log->fd = row_merge_file_create_low(); - if (log->fd < 0) { - ut_free(log); - DBUG_RETURN(false); - } + log->fd = -1; mutex_create(index_online_log_key, &log->mutex, SYNC_INDEX_ONLINE_LOG); log->blobs = NULL; @@ -2899,6 +2927,7 @@ row_log_allocate( log->tail.block = log->head.block = NULL; log->head.blocks = log->head.bytes = 0; log->head.total = 0; + log->path = path; dict_index_set_online_status(index, ONLINE_INDEX_CREATION); index->online_log = log; @@ -3376,7 +3405,8 @@ corruption: if (index->online_log->head.blocks) { #ifdef HAVE_FTRUNCATE /* Truncate the file in order to save space. */ - if (ftruncate(index->online_log->fd, 0) == -1) { + if (index->online_log->fd != -1 + && ftruncate(index->online_log->fd, 0) == -1) { perror("ftruncate"); } #endif /* HAVE_FTRUNCATE */ diff --git a/storage/xtradb/row/row0merge.cc b/storage/xtradb/row/row0merge.cc index 8d7fcd7388c..b45c7f2171b 100644 --- a/storage/xtradb/row/row0merge.cc +++ b/storage/xtradb/row/row0merge.cc @@ -1389,47 +1389,95 @@ row_merge_write_eof( return(&block[0]); } -/********************************************************************//** -Reads clustered index of the table and create temporary files +/** Create a temporary file if it has not been created already. +@param[in,out] tmpfd temporary file handle +@param[in] path path to create temporary file +@return file descriptor, or -1 on failure */ +static __attribute__((warn_unused_result)) +int +row_merge_tmpfile_if_needed( + int* tmpfd, + const char* path) +{ + if (*tmpfd < 0) { + *tmpfd = row_merge_file_create_low(path); + } + + return(*tmpfd); +} + +/** Create a temporary file for merge sort if it was not created already. +@param[in,out] file merge file structure +@param[in,out] tmpfd temporary file structure +@param[in] nrec number of records in the file +@param[in] path path to create temporary files +@return file descriptor, or -1 on failure */ +static __attribute__((warn_unused_result)) +int +row_merge_file_create_if_needed( + merge_file_t* file, + int* tmpfd, + ulint nrec, + const char* path) +{ + ut_ad(file->fd < 0 || *tmpfd >=0); + if (file->fd < 0 && row_merge_file_create(file, path) >= 0) { + if (row_merge_tmpfile_if_needed(tmpfd, path) < 0) { + return(-1); + } + + file->n_rec = nrec; + } + + ut_ad(file->fd < 0 || *tmpfd >=0); + return(file->fd); +} + +/** Reads clustered index of the table and create temporary files containing the index entries for the indexes to be built. -@return DB_SUCCESS or error */ +@param[in] trx transaction +@param[in,out] table MySQL table object, for reporting erroneous + records +@param[in] old_table table where rows are read from +@param[in] new_table table where indexes are created; identical to + old_table unless creating a PRIMARY KEY +@param[in] online true if creating indexes online +@param[in] index indexes to be created +@param[in] fts_sort_idx full-text index to be created, or NULL +@param[in] psort_info parallel sort info for fts_sort_idx creation, + or NULL +@param[in] files temporary files +@param[in] key_numbers MySQL key numbers to create +@param[in] n_index number of indexes to create +@param[in] add_cols default values of added columns, or NULL +@param[in] col_map mapping of old column numbers to new ones, or + NULL if old_table == new_table +@param[in] add_autoinc number of added AUTO_INCREMENT columns, or + ULINT_UNDEFINED if none is added +@param[in,out] sequence autoinc sequence +@param[in,out] block file buffer +@param[in,out] tmpfd temporary file handle +return DB_SUCCESS or error */ static __attribute__((nonnull(1,2,3,4,6,9,10,16), warn_unused_result)) dberr_t row_merge_read_clustered_index( -/*===========================*/ - trx_t* trx, /*!< in: transaction */ - struct TABLE* table, /*!< in/out: MySQL table object, - for reporting erroneous records */ - const dict_table_t* old_table,/*!< in: table where rows are - read from */ - const dict_table_t* new_table,/*!< in: table where indexes are - created; identical to old_table - unless creating a PRIMARY KEY */ - bool online, /*!< in: true if creating indexes - online */ - dict_index_t** index, /*!< in: indexes to be created */ + trx_t* trx, + struct TABLE* table, + const dict_table_t* old_table, + const dict_table_t* new_table, + bool online, + dict_index_t** index, dict_index_t* fts_sort_idx, - /*!< in: full-text index to be created, - or NULL */ fts_psort_t* psort_info, - /*!< in: parallel sort info for - fts_sort_idx creation, or NULL */ - merge_file_t* files, /*!< in: temporary files */ + merge_file_t* files, const ulint* key_numbers, - /*!< in: MySQL key numbers to create */ - ulint n_index,/*!< in: number of indexes to create */ + ulint n_index, const dtuple_t* add_cols, - /*!< in: default values of - added columns, or NULL */ - const ulint* col_map,/*!< in: mapping of old column - numbers to new ones, or NULL - if old_table == new_table */ + const ulint* col_map, ulint add_autoinc, - /*!< in: number of added - AUTO_INCREMENT column, or - ULINT_UNDEFINED if none is added */ - ib_sequence_t& sequence,/*!< in/out: autoinc sequence */ - row_merge_block_t* block, /*!< in/out: file buffer */ + ib_sequence_t& sequence, + row_merge_block_t* block, + int* tmpfd, float pct_cost, /*!< in: percent of task weight out of total alter job */ fil_space_crypt_t* crypt_data,/*!< in: crypt data or NULL */ @@ -1477,6 +1525,9 @@ row_merge_read_clustered_index( DEBUG_FTS_SORT_PRINT("FTS_SORT: Start Create Index\n"); #endif + ut_ad(trx->mysql_thd != NULL); + const char* path = thd_innodb_tmpdir(trx->mysql_thd); + /* Create and initialize memory for record buffers */ merge_buf = static_cast<row_merge_buf_t**>( @@ -1954,13 +2005,26 @@ write_buffers: dict_index_get_lock(buf->index)); } - row_merge_buf_write(buf, file, block); + if (buf->n_tuples > 0) { - if (!row_merge_write(file->fd, file->offset++, block, - crypt_data, crypt_block, new_table->space)) { - err = DB_TEMP_FILE_WRITE_FAILURE; - trx->error_key_num = i; - break; + if (row_merge_file_create_if_needed( + file, tmpfd, buf->n_tuples, path) < 0) { + err = DB_OUT_OF_MEMORY; + trx->error_key_num = i; + break; + } + + ut_ad(file->n_rec > 0); + + row_merge_buf_write(buf, file, block); + + if (!row_merge_write(file->fd, file->offset++, + block, crypt_data, crypt_block, + new_table->space)) { + err = DB_TEMP_FILE_WRITE_FAILURE; + trx->error_key_num = i; + break; + } } UNIV_MEM_INVALID(&block[0], srv_sort_buf_size); @@ -2016,6 +2080,7 @@ write_buffers: func_exit: mtr_commit(&mtr); + mem_heap_free(row_heap); if (nonnull) { @@ -3344,14 +3409,15 @@ row_merge_drop_temp_indexes(void) trx_free_for_background(trx); } -/*********************************************************************//** -Creates temporary merge files, and if UNIV_PFS_IO defined, register -the file descriptor with Performance Schema. -@return file descriptor, or -1 on failure */ + +/** Create temporary merge files in the given paramater path, and if +UNIV_PFS_IO defined, register the file descriptor with Performance Schema. +@param[in] path location for creating temporary merge files. +@return File descriptor */ UNIV_INTERN int -row_merge_file_create_low(void) -/*===========================*/ +row_merge_file_create_low( + const char* path) { int fd; #ifdef UNIV_PFS_IO @@ -3365,7 +3431,7 @@ row_merge_file_create_low(void) "Innodb Merge Temp File", __FILE__, __LINE__); #endif - fd = innobase_mysql_tmpfile(); + fd = innobase_mysql_tmpfile(path); #ifdef UNIV_PFS_IO register_pfs_file_open_end(locker, fd); #endif @@ -3378,16 +3444,18 @@ row_merge_file_create_low(void) return(fd); } -/*********************************************************************//** -Create a merge file. + +/** Create a merge file in the given location. +@param[out] merge_file merge file structure +@param[in] path location for creating temporary file @return file descriptor, or -1 on failure */ UNIV_INTERN int row_merge_file_create( -/*==================*/ - merge_file_t* merge_file) /*!< out: merge file structure */ + merge_file_t* merge_file, + const char* path) { - merge_file->fd = row_merge_file_create_low(); + merge_file->fd = row_merge_file_create_low(path); merge_file->offset = 0; merge_file->n_rec = 0; @@ -3958,10 +4026,6 @@ row_merge_build_indexes( total_dynamic_cost = COST_BUILD_INDEX_DYNAMIC * n_indexes; for (i = 0; i < n_indexes; i++) { - if (row_merge_file_create(&merge_files[i]) < 0) { - error = DB_OUT_OF_MEMORY; - goto func_exit; - } if (indexes[i]->type & DICT_FTS) { ibool opt_doc_id_size = FALSE; @@ -3990,13 +4054,6 @@ row_merge_build_indexes( } } - tmpfd = row_merge_file_create_low(); - - if (tmpfd < 0) { - error = DB_OUT_OF_MEMORY; - goto func_exit; - } - /* Reset the MySQL row buffer that is used when reporting duplicate keys. */ innobase_rec_reset(table); @@ -4025,7 +4082,7 @@ row_merge_build_indexes( trx, table, old_table, new_table, online, indexes, fts_sort_idx, psort_info, merge_files, key_numbers, n_indexes, add_cols, col_map, - add_autoinc, sequence, block, pct_cost, + add_autoinc, sequence, block, &tmpfd, pct_cost, crypt_data, crypt_block); pct_progress += pct_cost; @@ -4119,7 +4176,7 @@ wait_again: #ifdef FTS_INTERNAL_DIAG_PRINT DEBUG_FTS_SORT_PRINT("FTS_SORT: Complete Insert\n"); #endif - } else if (UNIV_LIKELY(merge_files[i].n_rec)) { + } else if (merge_files[i].fd != -1) { char buf[3 * NAME_LEN]; char *bufend; row_merge_dup_t dup = { diff --git a/storage/xtradb/srv/srv0srv.cc b/storage/xtradb/srv/srv0srv.cc index 794b119a090..c98bfc85086 100644 --- a/storage/xtradb/srv/srv0srv.cc +++ b/storage/xtradb/srv/srv0srv.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, 2009 Google Inc. Copyright (c) 2009, Percona Inc. Copyright (c) 2013, 2014, SkySQL Ab. All Rights Reserved. @@ -2243,6 +2243,8 @@ exit_func: /*********************************************************************//** A thread which prints warnings about semaphore waits which have lasted too long. These can be used to track bugs which cause hangs. +Note: In order to make sync_arr_wake_threads_if_sema_free work as expected, +we should avoid waiting any mutexes in this function! @return a dummy parameter */ extern "C" UNIV_INTERN os_thread_ret_t @@ -2282,23 +2284,21 @@ loop: /* Try to track a strange bug reported by Harald Fuchs and others, where the lsn seems to decrease at times */ - /* We have to use nowait to ensure we don't block */ - new_lsn= log_get_lsn_nowait(); - - if (new_lsn && new_lsn < old_lsn) { - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Error: old log sequence number " LSN_PF - " was greater\n" - "InnoDB: than the new log sequence number " LSN_PF "!\n" - "InnoDB: Please submit a bug report" - " to http://bugs.mysql.com\n", - old_lsn, new_lsn); - ut_ad(0); - } + if (log_peek_lsn(&new_lsn)) { + if (new_lsn < old_lsn) { + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: Error: old log sequence number " LSN_PF + " was greater\n" + "InnoDB: than the new log sequence number " LSN_PF "!\n" + "InnoDB: Please submit a bug report" + " to http://bugs.mysql.com\n", + old_lsn, new_lsn); + ut_ad(0); + } - if (new_lsn) old_lsn = new_lsn; + } if (difftime(time(NULL), srv_last_monitor_time) > 60) { /* We referesh InnoDB Monitor values so that averages are diff --git a/storage/xtradb/srv/srv0start.cc b/storage/xtradb/srv/srv0start.cc index 17f9e53e76f..2455dc1c9a9 100644 --- a/storage/xtradb/srv/srv0start.cc +++ b/storage/xtradb/srv/srv0start.cc @@ -2000,7 +2000,7 @@ innobase_start_or_create_for_mysql(void) } } else { srv_monitor_file_name = NULL; - srv_monitor_file = os_file_create_tmpfile(); + srv_monitor_file = os_file_create_tmpfile(NULL); if (!srv_monitor_file) { return(DB_ERROR); @@ -2010,7 +2010,7 @@ innobase_start_or_create_for_mysql(void) mutex_create(srv_dict_tmpfile_mutex_key, &srv_dict_tmpfile_mutex, SYNC_DICT_OPERATION); - srv_dict_tmpfile = os_file_create_tmpfile(); + srv_dict_tmpfile = os_file_create_tmpfile(NULL); if (!srv_dict_tmpfile) { return(DB_ERROR); @@ -2019,7 +2019,7 @@ innobase_start_or_create_for_mysql(void) mutex_create(srv_misc_tmpfile_mutex_key, &srv_misc_tmpfile_mutex, SYNC_ANY_LATCH); - srv_misc_tmpfile = os_file_create_tmpfile(); + srv_misc_tmpfile = os_file_create_tmpfile(NULL); if (!srv_misc_tmpfile) { return(DB_ERROR); |