diff options
40 files changed, 765 insertions, 1006 deletions
diff --git a/storage/xtradb/buf/buf0buf.cc b/storage/xtradb/buf/buf0buf.cc index 1786ba51429..882f0af5aad 100644 --- a/storage/xtradb/buf/buf0buf.cc +++ b/storage/xtradb/buf/buf0buf.cc @@ -1454,6 +1454,7 @@ buf_pool_free_instance( buf_chunk_t* chunk; buf_chunk_t* chunks; buf_page_t* bpage; + ulint i; bpage = UT_LIST_GET_LAST(buf_pool->LRU); while (bpage != NULL) { @@ -1477,10 +1478,29 @@ buf_pool_free_instance( mem_free(buf_pool->watch); buf_pool->watch = NULL; + for (i = BUF_FLUSH_LRU; i < BUF_FLUSH_N_TYPES; i++) { + os_event_free(buf_pool->no_flush[i]); + } + mutex_free(&buf_pool->LRU_list_mutex); + mutex_free(&buf_pool->free_list_mutex); + mutex_free(&buf_pool->zip_free_mutex); + mutex_free(&buf_pool->zip_hash_mutex); + mutex_free(&buf_pool->zip_mutex); + mutex_free(&buf_pool->flush_state_mutex); + mutex_free(&buf_pool->flush_list_mutex); + chunks = buf_pool->chunks; chunk = chunks + buf_pool->n_chunks; while (--chunk >= chunks) { + buf_block_t* block = chunk->blocks; + for (i = 0; i < chunk->size; i++, block++) { + mutex_free(&block->mutex); + rw_lock_free(&block->lock); +#ifdef UNIV_SYNC_DEBUG + rw_lock_free(&block->debug_latch); +#endif + } os_mem_free_large(chunk->mem, chunk->mem_size); } diff --git a/storage/xtradb/buf/buf0lru.cc b/storage/xtradb/buf/buf0lru.cc index af816d36e23..bb0f4d44052 100644 --- a/storage/xtradb/buf/buf0lru.cc +++ b/storage/xtradb/buf/buf0lru.cc @@ -607,6 +607,7 @@ rescan: bpage != NULL; bpage = prev) { + ut_ad(!must_restart); ut_a(buf_page_in_file(bpage)); /* Save the previous link because once we free the @@ -624,9 +625,6 @@ rescan: /* Remove was unsuccessful, we have to try again by scanning the entire list from the end. - This also means that we never released the - flush list mutex. Therefore we can trust the prev - pointer. buf_flush_or_remove_page() released the flush list mutex but not the LRU list mutex. Therefore it is possible that a new page was @@ -643,6 +641,11 @@ rescan: iteration. */ all_freed = false; + if (UNIV_UNLIKELY(must_restart)) { + + /* Cannot trust the prev pointer */ + break; + } } else if (flush) { /* The processing was successful. And during the @@ -650,12 +653,9 @@ rescan: when calling buf_page_flush(). We cannot trust prev pointer. */ goto rescan; - } else if (UNIV_UNLIKELY(must_restart)) { - - ut_ad(!all_freed); - break; } + ut_ad(!must_restart); ++processed; /* Yield if we have hogged the CPU and mutexes for too long. */ @@ -666,6 +666,11 @@ rescan: /* Reset the batch size counter if we had to yield. */ processed = 0; + } else if (UNIV_UNLIKELY(must_restart)) { + + /* Cannot trust the prev pointer */ + all_freed = false; + break; } #ifdef DBUG_OFF diff --git a/storage/xtradb/fil/fil0fil.cc b/storage/xtradb/fil/fil0fil.cc index 981df578acb..1dd5620825f 100644 --- a/storage/xtradb/fil/fil0fil.cc +++ b/storage/xtradb/fil/fil0fil.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -3248,8 +3248,6 @@ fil_create_link_file( const char* tablename, /*!< in: tablename */ const char* filepath) /*!< in: pathname of tablespace */ { - os_file_t file; - ibool success; dberr_t err = DB_SUCCESS; char* link_filepath; char* prev_filepath = fil_read_link_file(tablename); @@ -3268,13 +3266,24 @@ fil_create_link_file( link_filepath = fil_make_isl_name(tablename); - file = os_file_create_simple_no_error_handling( - innodb_file_data_key, link_filepath, - OS_FILE_CREATE, OS_FILE_READ_WRITE, &success); + /** Check if the file already exists. */ + FILE* file = NULL; + ibool exists; + os_file_type_t ftype; - if (!success) { - /* The following call will print an error message */ - ulint error = os_file_get_last_error(true); + bool success = os_file_status(link_filepath, &exists, &ftype); + + ulint error = 0; + if (success && !exists) { + file = fopen(link_filepath, "w"); + if (file == NULL) { + /* This call will print its own error message */ + error = os_file_get_last_error(true); + } + } else { + error = OS_FILE_ALREADY_EXISTS; + } + if (error != 0) { ut_print_timestamp(stderr); fputs(" InnoDB: Cannot create file ", stderr); @@ -3299,13 +3308,17 @@ fil_create_link_file( return(err); } - if (!os_file_write(link_filepath, file, filepath, 0, - strlen(filepath))) { + ulint rbytes = fwrite(filepath, 1, strlen(filepath), file); + if (rbytes != strlen(filepath)) { + os_file_get_last_error(true); + ib_logf(IB_LOG_LEVEL_ERROR, + "cannot write link file " + "%s",filepath); err = DB_ERROR; } /* Close the file, we only need it at startup */ - os_file_close(file); + fclose(file); mem_free(link_filepath); @@ -5185,8 +5198,8 @@ retry: ib_logf(IB_LOG_LEVEL_ERROR, "preallocating file space for file \'%s\' " "failed. Current size " INT64PF - ", len " INT64PF ", desired size " INT64PF - "\n", node->name, start_offset, end_offset, + ", len " INT64PF ", desired size " INT64PF, + node->name, start_offset, end_offset, start_offset + end_offset); } mutex_enter(&fil_system->mutex); @@ -6254,10 +6267,7 @@ void fil_close(void) /*===========*/ { -#ifndef UNIV_HOTBACKUP - /* The mutex should already have been freed. */ - ut_ad(fil_system->mutex.magic_n == 0); -#endif /* !UNIV_HOTBACKUP */ + mutex_free(&fil_system->mutex); hash_table_free(fil_system->spaces); @@ -6781,27 +6791,6 @@ fil_mtr_rename_log( /************************************************************************* functions to access is_corrupt flag of fil_space_t*/ -ibool -fil_space_is_corrupt( -/*=================*/ - ulint space_id) -{ - fil_space_t* space; - ibool ret = FALSE; - - mutex_enter(&fil_system->mutex); - - space = fil_space_get_by_id(space_id); - - if (UNIV_UNLIKELY(space && space->is_corrupt)) { - ret = TRUE; - } - - mutex_exit(&fil_system->mutex); - - return(ret); -} - void fil_space_set_corrupt( /*==================*/ diff --git a/storage/xtradb/fts/fts0fts.cc b/storage/xtradb/fts/fts0fts.cc index 5d6d5ae7cf9..25047b38b9d 100644 --- a/storage/xtradb/fts/fts0fts.cc +++ b/storage/xtradb/fts/fts0fts.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2011, 2015, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2011, 2016, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -260,16 +260,18 @@ static const char* fts_config_table_insert_values_sql = "INSERT INTO \"%s\" VALUES ('" FTS_TABLE_STATE "', '0');\n"; -/****************************************************************//** -Run SYNC on the table, i.e., write out data from the cache to the +/** Run SYNC on the table, i.e., write out data from the cache to the FTS auxiliary INDEX table and clear the cache at the end. -@return DB_SUCCESS if all OK */ +@param[in,out] sync sync state +@param[in] unlock_cache whether unlock cache lock when write node +@param[in] wait whether wait when a sync is in progress +@return DB_SUCCESS if all OK */ static dberr_t fts_sync( -/*=====*/ - fts_sync_t* sync) /*!< in: sync state */ - __attribute__((nonnull)); + fts_sync_t* sync, + bool unlock_cache, + bool wait); /****************************************************************//** Release all resources help by the words rb tree e.g., the node ilist. */ @@ -653,6 +655,7 @@ fts_cache_create( mem_heap_zalloc(heap, sizeof(fts_sync_t))); cache->sync->table = table; + cache->sync->event = os_event_create(); /* Create the index cache vector that will hold the inverted indexes. */ cache->indexes = ib_vector_create( @@ -1207,6 +1210,7 @@ fts_cache_destroy( mutex_free(&cache->optimize_lock); mutex_free(&cache->deleted_lock); mutex_free(&cache->doc_id_lock); + os_event_free(cache->sync->event); if (cache->stopword_info.cached_stopword) { rbt_free(cache->stopword_info.cached_stopword); @@ -1435,7 +1439,7 @@ fts_cache_add_doc( ib_vector_last(word->nodes)); } - if (fts_node == NULL + if (fts_node == NULL || fts_node->synced || fts_node->ilist_size > FTS_ILIST_MAX_SIZE || doc_id < fts_node->last_doc_id) { @@ -2886,35 +2890,28 @@ fts_doc_ids_free( } /*********************************************************************//** -Do commit-phase steps necessary for the insertion of a new row. -@return DB_SUCCESS or error code */ -static __attribute__((nonnull, warn_unused_result)) -dberr_t +Do commit-phase steps necessary for the insertion of a new row. */ +void fts_add( /*====*/ fts_trx_table_t*ftt, /*!< in: FTS trx table */ fts_trx_row_t* row) /*!< in: row */ { dict_table_t* table = ftt->table; - dberr_t error = DB_SUCCESS; doc_id_t doc_id = row->doc_id; ut_a(row->state == FTS_INSERT || row->state == FTS_MODIFY); fts_add_doc_by_id(ftt, doc_id, row->fts_indexes); - if (error == DB_SUCCESS) { - mutex_enter(&table->fts->cache->deleted_lock); - ++table->fts->cache->added; - mutex_exit(&table->fts->cache->deleted_lock); + mutex_enter(&table->fts->cache->deleted_lock); + ++table->fts->cache->added; + mutex_exit(&table->fts->cache->deleted_lock); - if (!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID) - && doc_id >= table->fts->cache->next_doc_id) { - table->fts->cache->next_doc_id = doc_id + 1; - } + if (!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID) + && doc_id >= table->fts->cache->next_doc_id) { + table->fts->cache->next_doc_id = doc_id + 1; } - - return(error); } /*********************************************************************//** @@ -3025,7 +3022,7 @@ fts_modify( error = fts_delete(ftt, row); if (error == DB_SUCCESS) { - error = fts_add(ftt, row); + fts_add(ftt, row); } return(error); @@ -3114,7 +3111,7 @@ fts_commit_table( switch (row->state) { case FTS_INSERT: - error = fts_add(ftt, row); + fts_add(ftt, row); break; case FTS_MODIFY: @@ -3553,16 +3550,34 @@ fts_add_doc_by_id( get_doc->index_cache, doc_id, doc.tokens); + bool need_sync = false; + if ((cache->total_size > fts_max_cache_size / 10 + || fts_need_sync) + && !cache->sync->in_progress) { + need_sync = true; + } + rw_lock_x_unlock(&table->fts->cache->lock); DBUG_EXECUTE_IF( "fts_instrument_sync", - fts_sync(cache->sync); + fts_optimize_request_sync_table(table); + os_event_wait(cache->sync->event); + ); + + DBUG_EXECUTE_IF( + "fts_instrument_sync_debug", + fts_sync(cache->sync, true, true); ); - if (cache->total_size > fts_max_cache_size - || fts_need_sync) { - fts_sync(cache->sync); + DEBUG_SYNC_C("fts_instrument_sync_request"); + DBUG_EXECUTE_IF( + "fts_instrument_sync_request", + fts_optimize_request_sync_table(table); + ); + + if (need_sync) { + fts_optimize_request_sync_table(table); } mtr_start(&mtr); @@ -3933,16 +3948,17 @@ fts_sync_add_deleted_cache( return(error); } -/*********************************************************************//** -Write the words and ilist to disk. +/** Write the words and ilist to disk. +@param[in,out] trx transaction +@param[in] index_cache index cache +@param[in] unlock_cache whether unlock cache when write node @return DB_SUCCESS if all went well else error code */ static __attribute__((nonnull, warn_unused_result)) dberr_t fts_sync_write_words( -/*=================*/ - trx_t* trx, /*!< in: transaction */ - fts_index_cache_t* - index_cache) /*!< in: index cache */ + trx_t* trx, + fts_index_cache_t* index_cache, + bool unlock_cache) { fts_table_t fts_table; ulint n_nodes = 0; @@ -3950,8 +3966,8 @@ fts_sync_write_words( const ib_rbt_node_t* rbt_node; dberr_t error = DB_SUCCESS; ibool print_error = FALSE; -#ifdef FTS_DOC_STATS_DEBUG dict_table_t* table = index_cache->index->table; +#ifdef FTS_DOC_STATS_DEBUG ulint n_new_words = 0; #endif /* FTS_DOC_STATS_DEBUG */ @@ -3964,7 +3980,7 @@ fts_sync_write_words( since we want to free the memory used during caching. */ for (rbt_node = rbt_first(index_cache->words); rbt_node; - rbt_node = rbt_first(index_cache->words)) { + rbt_node = rbt_next(index_cache->words, rbt_node)) { ulint i; ulint selected; @@ -3997,27 +4013,47 @@ fts_sync_write_words( } #endif /* FTS_DOC_STATS_DEBUG */ - n_nodes += ib_vector_size(word->nodes); - - /* We iterate over all the nodes even if there was an error, - this is to free the memory of the fts_node_t elements. */ + /* We iterate over all the nodes even if there was an error */ for (i = 0; i < ib_vector_size(word->nodes); ++i) { fts_node_t* fts_node = static_cast<fts_node_t*>( ib_vector_get(word->nodes, i)); + if (fts_node->synced) { + continue; + } else { + fts_node->synced = true; + } + + /*FIXME: we need to handle the error properly. */ if (error == DB_SUCCESS) { + if (unlock_cache) { + rw_lock_x_unlock( + &table->fts->cache->lock); + } error = fts_write_node( trx, &index_cache->ins_graph[selected], &fts_table, &word->text, fts_node); - } - ut_free(fts_node->ilist); - fts_node->ilist = NULL; + DEBUG_SYNC_C("fts_write_node"); + DBUG_EXECUTE_IF("fts_write_node_crash", + DBUG_SUICIDE();); + + DBUG_EXECUTE_IF("fts_instrument_sync_sleep", + os_thread_sleep(1000000); + ); + + if (unlock_cache) { + rw_lock_x_lock( + &table->fts->cache->lock); + } + } } + n_nodes += ib_vector_size(word->nodes); + if (error != DB_SUCCESS && !print_error) { ut_print_timestamp(stderr); fprintf(stderr, " InnoDB: Error (%s) writing " @@ -4026,9 +4062,6 @@ fts_sync_write_words( print_error = TRUE; } - - /* NOTE: We are responsible for free'ing the node */ - ut_free(rbt_remove_node(index_cache->words, rbt_node)); } #ifdef FTS_DOC_STATS_DEBUG @@ -4329,7 +4362,7 @@ fts_sync_index( ut_ad(rbt_validate(index_cache->words)); - error = fts_sync_write_words(trx, index_cache); + error = fts_sync_write_words(sync->trx, index_cache, sync->unlock_cache); #ifdef FTS_DOC_STATS_DEBUG /* FTS_RESOLVE: the word counter info in auxiliary table "DOC_ID" @@ -4345,6 +4378,36 @@ fts_sync_index( return(error); } +/** Check if index cache has been synced completely +@param[in,out] sync sync state +@param[in,out] index_cache index cache +@return true if index is synced, otherwise false. */ +static +bool +fts_sync_index_check( + fts_sync_t* sync, + fts_index_cache_t* index_cache) +{ + const ib_rbt_node_t* rbt_node; + + for (rbt_node = rbt_first(index_cache->words); + rbt_node != NULL; + rbt_node = rbt_next(index_cache->words, rbt_node)) { + + fts_tokenizer_word_t* word; + word = rbt_value(fts_tokenizer_word_t, rbt_node); + + fts_node_t* fts_node; + fts_node = static_cast<fts_node_t*>(ib_vector_last(word->nodes)); + + if (!fts_node->synced) { + return(false); + } + } + + return(true); +} + /*********************************************************************//** Commit the SYNC, change state of processed doc ids etc. @return DB_SUCCESS if all OK */ @@ -4421,21 +4484,53 @@ fts_sync_rollback( trx_t* trx = sync->trx; fts_cache_t* cache = sync->table->fts->cache; + for (ulint i = 0; i < ib_vector_size(cache->indexes); ++i) { + ulint j; + fts_index_cache_t* index_cache; + + index_cache = static_cast<fts_index_cache_t*>( + ib_vector_get(cache->indexes, i)); + + for (j = 0; fts_index_selector[j].value; ++j) { + + if (index_cache->ins_graph[j] != NULL) { + + fts_que_graph_free_check_lock( + NULL, index_cache, + index_cache->ins_graph[j]); + + index_cache->ins_graph[j] = NULL; + } + + if (index_cache->sel_graph[j] != NULL) { + + fts_que_graph_free_check_lock( + NULL, index_cache, + index_cache->sel_graph[j]); + + index_cache->sel_graph[j] = NULL; + } + } + } + rw_lock_x_unlock(&cache->lock); fts_sql_rollback(trx); trx_free_for_background(trx); } -/****************************************************************//** -Run SYNC on the table, i.e., write out data from the cache to the +/** Run SYNC on the table, i.e., write out data from the cache to the FTS auxiliary INDEX table and clear the cache at the end. +@param[in,out] sync sync state +@param[in] unlock_cache whether unlock cache lock when write node +@param[in] wait whether wait when a sync is in progress @return DB_SUCCESS if all OK */ static dberr_t fts_sync( -/*=====*/ - fts_sync_t* sync) /*!< in: sync state */ + fts_sync_t* sync, + bool unlock_cache, + bool wait) { ulint i; dberr_t error = DB_SUCCESS; @@ -4443,8 +4538,35 @@ fts_sync( rw_lock_x_lock(&cache->lock); + /* Check if cache is being synced. + Note: we release cache lock in fts_sync_write_words() to + avoid long wait for the lock by other threads. */ + while (sync->in_progress) { + rw_lock_x_unlock(&cache->lock); + + if (wait) { + os_event_wait(sync->event); + } else { + return(DB_SUCCESS); + } + + rw_lock_x_lock(&cache->lock); + } + + sync->unlock_cache = unlock_cache; + sync->in_progress = true; + + DEBUG_SYNC_C("fts_sync_begin"); fts_sync_begin(sync); +begin_sync: + if (cache->total_size > fts_max_cache_size) { + /* Avoid the case: sync never finish when + insert/update keeps comming. */ + ut_ad(sync->unlock_cache); + sync->unlock_cache = false; + } + for (i = 0; i < ib_vector_size(cache->indexes); ++i) { fts_index_cache_t* index_cache; @@ -4459,21 +4581,43 @@ fts_sync( if (error != DB_SUCCESS && !sync->interrupted) { - break; + goto end_sync; } } DBUG_EXECUTE_IF("fts_instrument_sync_interrupted", sync->interrupted = true; error = DB_INTERRUPTED; + goto end_sync; ); + /* Make sure all the caches are synced. */ + for (i = 0; i < ib_vector_size(cache->indexes); ++i) { + fts_index_cache_t* index_cache; + + index_cache = static_cast<fts_index_cache_t*>( + ib_vector_get(cache->indexes, i)); + + if (index_cache->index->to_be_dropped + || fts_sync_index_check(sync, index_cache)) { + continue; + } + + goto begin_sync; + } + +end_sync: if (error == DB_SUCCESS && !sync->interrupted) { error = fts_sync_commit(sync); } else { fts_sync_rollback(sync); } + rw_lock_x_lock(&cache->lock); + sync->in_progress = false; + os_event_set(sync->event); + rw_lock_x_unlock(&cache->lock); + /* We need to check whether an optimize is required, for that we make copies of the two variables that control the trigger. These variables can change behind our back and we don't want to hold the @@ -4488,21 +4632,25 @@ fts_sync( return(error); } -/****************************************************************//** -Run SYNC on the table, i.e., write out data from the cache to the -FTS auxiliary INDEX table and clear the cache at the end. */ +/** Run SYNC on the table, i.e., write out data from the cache to the +FTS auxiliary INDEX table and clear the cache at the end. +@param[in,out] table fts table +@param[in] unlock_cache whether unlock cache when write node +@param[in] wait whether wait for existing sync to finish +@return DB_SUCCESS on success, error code on failure. */ UNIV_INTERN dberr_t fts_sync_table( -/*===========*/ - dict_table_t* table) /*!< in: table */ + dict_table_t* table, + bool unlock_cache, + bool wait) { dberr_t err = DB_SUCCESS; ut_ad(table->fts); if (!dict_table_is_discarded(table) && table->fts->cache) { - err = fts_sync(table->fts->cache->sync); + err = fts_sync(table->fts->cache->sync, unlock_cache, wait); } return(err); diff --git a/storage/xtradb/fts/fts0opt.cc b/storage/xtradb/fts/fts0opt.cc index 2a0aa4daf12..711c5f53d01 100644 --- a/storage/xtradb/fts/fts0opt.cc +++ b/storage/xtradb/fts/fts0opt.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2007, 2015, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2007, 2016, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -87,6 +87,7 @@ enum fts_msg_type_t { FTS_MSG_DEL_TABLE, /*!< Remove a table from the optimize threads work queue */ + FTS_MSG_SYNC_TABLE /*!< Sync fts cache of a table */ }; /** Compressed list of words that have been read from FTS INDEX @@ -2652,6 +2653,39 @@ fts_optimize_remove_table( os_event_free(event); } +/** Send sync fts cache for the table. +@param[in] table table to sync */ +UNIV_INTERN +void +fts_optimize_request_sync_table( + dict_table_t* table) +{ + fts_msg_t* msg; + table_id_t* table_id; + + /* if the optimize system not yet initialized, return */ + if (!fts_optimize_wq) { + return; + } + + /* FTS optimizer thread is already exited */ + if (fts_opt_start_shutdown) { + ib_logf(IB_LOG_LEVEL_INFO, + "Try to sync table %s after FTS optimize" + " thread exiting.", table->name); + return; + } + + msg = fts_optimize_create_msg(FTS_MSG_SYNC_TABLE, NULL); + + table_id = static_cast<table_id_t*>( + mem_heap_alloc(msg->heap, sizeof(table_id_t))); + *table_id = table->id; + msg->ptr = table_id; + + ib_wqueue_add(fts_optimize_wq, msg, msg->heap); +} + /**********************************************************************//** Find the slot for a particular table. @return slot if found else NULL. */ @@ -2932,6 +2966,25 @@ fts_optimize_need_sync( } #endif +/** Sync fts cache of a table +@param[in] table_id table id */ +void +fts_optimize_sync_table( + table_id_t table_id) +{ + dict_table_t* table = NULL; + + table = dict_table_open_on_id(table_id, FALSE, DICT_TABLE_OP_NORMAL); + + if (table) { + if (dict_table_has_fts_index(table) && table->fts->cache) { + fts_sync_table(table, true, false); + } + + dict_table_close(table, FALSE, FALSE); + } +} + /**********************************************************************//** Optimize all FTS tables. @return Dummy return */ @@ -3053,6 +3106,11 @@ fts_optimize_thread( ((fts_msg_del_t*) msg->ptr)->event); break; + case FTS_MSG_SYNC_TABLE: + fts_optimize_sync_table( + *static_cast<table_id_t*>(msg->ptr)); + break; + default: ut_error; } @@ -3079,26 +3137,7 @@ fts_optimize_thread( ib_vector_get(tables, i)); if (slot->state != FTS_STATE_EMPTY) { - dict_table_t* table = NULL; - - /*slot->table may be freed, so we try to open - table by slot->table_id.*/ - table = dict_table_open_on_id( - slot->table_id, FALSE, - DICT_TABLE_OP_NORMAL); - - if (table) { - - if (dict_table_has_fts_index(table)) { - fts_sync_table(table); - } - - if (table->fts) { - fts_free(table); - } - - dict_table_close(table, FALSE, FALSE); - } + fts_optimize_sync_table(slot->table_id); } } } diff --git a/storage/xtradb/ha/ha0ha.cc b/storage/xtradb/ha/ha0ha.cc index b79ae922045..3674260f173 100644 --- a/storage/xtradb/ha/ha0ha.cc +++ b/storage/xtradb/ha/ha0ha.cc @@ -155,11 +155,15 @@ ha_clear( switch (table->type) { case HASH_TABLE_SYNC_MUTEX: + for (ulint i = 0; i < table->n_sync_obj; i++) + mutex_free(table->sync_obj.mutexes + i); mem_free(table->sync_obj.mutexes); table->sync_obj.mutexes = NULL; break; case HASH_TABLE_SYNC_RW_LOCK: + for (ulint i = 0; i < table->n_sync_obj; i++) + rw_lock_free(table->sync_obj.rw_locks + i); mem_free(table->sync_obj.rw_locks); table->sync_obj.rw_locks = NULL; break; diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc index 3d10f04fe07..06230a95076 100644 --- a/storage/xtradb/handler/ha_innodb.cc +++ b/storage/xtradb/handler/ha_innodb.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2000, 2015, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2000, 2016, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, 2009 Google Inc. Copyright (c) 2009, Percona Inc. Copyright (c) 2012, Facebook Inc. @@ -3813,6 +3813,16 @@ innobase_change_buffering_inited_ok: innobase_open_files = table_cache_size; } } + + if (innobase_open_files > (long) open_files_limit) { + fprintf(stderr, + "innodb_open_files should not be greater" + " than the open_files_limit.\n"); + if (innobase_open_files > (long) table_cache_size) { + innobase_open_files = table_cache_size; + } + } + srv_max_n_open_files = (ulint) innobase_open_files; srv_innodb_status = (ibool) innobase_create_status_file; @@ -12258,7 +12268,7 @@ ha_innobase::optimize( if (innodb_optimize_fulltext_only) { if (prebuilt->table->fts && prebuilt->table->fts->cache && !dict_table_is_discarded(prebuilt->table)) { - fts_sync_table(prebuilt->table); + fts_sync_table(prebuilt->table, false, true); fts_optimize_table(prebuilt->table); } return(HA_ADMIN_OK); diff --git a/storage/xtradb/ibuf/ibuf0ibuf.cc b/storage/xtradb/ibuf/ibuf0ibuf.cc index 17d9854c30d..4bca5bbdf2a 100644 --- a/storage/xtradb/ibuf/ibuf0ibuf.cc +++ b/storage/xtradb/ibuf/ibuf0ibuf.cc @@ -2892,7 +2892,7 @@ ibuf_contract_in_background( sum_bytes += n_bytes; sum_pages += n_pag2; - srv_inc_activity_count(); + srv_inc_activity_count(true); } return(sum_bytes); diff --git a/storage/xtradb/include/fil0fil.h b/storage/xtradb/include/fil0fil.h index 1c78519cac5..29d3ed98779 100644 --- a/storage/xtradb/include/fil0fil.h +++ b/storage/xtradb/include/fil0fil.h @@ -1048,11 +1048,6 @@ fil_system_hash_nodes(void); /************************************************************************* functions to access is_corrupt flag of fil_space_t*/ -ibool -fil_space_is_corrupt( -/*=================*/ - ulint space_id); - void fil_space_set_corrupt( /*==================*/ diff --git a/storage/xtradb/include/fts0fts.h b/storage/xtradb/include/fts0fts.h index d54ed281d9a..9f7b0216d9b 100644 --- a/storage/xtradb/include/fts0fts.h +++ b/storage/xtradb/include/fts0fts.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2011, 2015, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2011, 2016, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -724,6 +724,13 @@ fts_optimize_remove_table( /*======================*/ dict_table_t* table); /*!< in: table to remove */ +/** Send sync fts cache for the table. +@param[in] table table to sync */ +UNIV_INTERN +void +fts_optimize_request_sync_table( + dict_table_t* table); + /**********************************************************************//** Signal the optimize thread to prepare for shutdown. */ UNIV_INTERN @@ -826,15 +833,18 @@ fts_drop_index_split_tables( dict_index_t* index) /*!< in: fts instance */ __attribute__((nonnull, warn_unused_result)); -/****************************************************************//** -Run SYNC on the table, i.e., write out data from the cache to the -FTS auxiliary INDEX table and clear the cache at the end. */ +/** Run SYNC on the table, i.e., write out data from the cache to the +FTS auxiliary INDEX table and clear the cache at the end. +@param[in,out] table fts table +@param[in] unlock_cache whether unlock cache when write node +@param[in] wait whether wait for existing sync to finish +@return DB_SUCCESS on success, error code on failure. */ UNIV_INTERN dberr_t fts_sync_table( -/*===========*/ - dict_table_t* table) /*!< in: table */ - __attribute__((nonnull)); + dict_table_t* table, + bool unlock_cache, + bool wait); /****************************************************************//** Free the query graph but check whether dict_sys->mutex is already diff --git a/storage/xtradb/include/fts0types.h b/storage/xtradb/include/fts0types.h index 64677428331..e495fe72a60 100644 --- a/storage/xtradb/include/fts0types.h +++ b/storage/xtradb/include/fts0types.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2007, 2011, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2007, 2016, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -122,7 +122,11 @@ struct fts_sync_t { doc_id_t max_doc_id; /*!< The doc id at which the cache was noted as being full, we use this to set the upper_limit field */ - ib_time_t start_time; /*!< SYNC start time */ + ib_time_t start_time; /*!< SYNC start time */ + bool in_progress; /*!< flag whether sync is in progress.*/ + bool unlock_cache; /*!< flag whether unlock cache when + write fts node */ + os_event_t event; /*!< sync finish event */ }; /** The cache for the FTS system. It is a memory-based inverted index @@ -165,7 +169,6 @@ struct fts_cache_t { objects, they are recreated after a SYNC is completed */ - ib_alloc_t* self_heap; /*!< This heap is the heap out of which an instance of the cache itself was created. Objects created using @@ -212,6 +215,7 @@ struct fts_node_t { ulint ilist_size_alloc; /*!< Allocated size of ilist in bytes */ + bool synced; /*!< flag whether the node is synced */ }; /** A tokenizer word. Contains information about one word. */ diff --git a/storage/xtradb/include/log0log.h b/storage/xtradb/include/log0log.h index fbaf0a1e633..c8fe996994d 100644 --- a/storage/xtradb/include/log0log.h +++ b/storage/xtradb/include/log0log.h @@ -322,17 +322,6 @@ log_archive_do( ulint* n_bytes);/*!< out: archive log buffer size, 0 if nothing to archive */ /****************************************************************//** -Writes the log contents to the archive up to the lsn when this function was -called, and stops the archiving. When archiving is started again, the archived -log file numbers start from a number one higher, so that the archiving will -not write again to the archived log files which exist when this function -returns. -@return DB_SUCCESS or DB_ERROR */ -UNIV_INTERN -ulint -log_archive_stop(void); -/*==================*/ -/****************************************************************//** Starts again archiving which has been stopped. @return DB_SUCCESS or DB_ERROR */ UNIV_INTERN @@ -594,10 +583,8 @@ log_mem_free(void); /*==============*/ /****************************************************************//** -Safely reads the log_sys->tracked_lsn value. Uses atomic operations -if available, otherwise this field is protected with the log system -mutex. The writer counterpart function is log_set_tracked_lsn() in -log0online.c. +Safely reads the log_sys->tracked_lsn value. The writer counterpart function +is log_set_tracked_lsn() in log0online.c. @return log_sys->tracked_lsn value. */ UNIV_INLINE diff --git a/storage/xtradb/include/log0log.ic b/storage/xtradb/include/log0log.ic index 6402c7df1e7..f24c369be33 100644 --- a/storage/xtradb/include/log0log.ic +++ b/storage/xtradb/include/log0log.ic @@ -514,10 +514,8 @@ log_free_check(void) #endif /* !UNIV_HOTBACKUP */ /****************************************************************//** -Safely reads the log_sys->tracked_lsn value. Uses atomic operations -if available, otherwise this field is protected with the log system -mutex. The writer counterpart function is log_set_tracked_lsn() in -log0online.c. +Safely reads the log_sys->tracked_lsn value. The writer counterpart function +is log_set_tracked_lsn() in log0online.c. @return log_sys->tracked_lsn value. */ UNIV_INLINE @@ -525,11 +523,7 @@ lsn_t log_get_tracked_lsn(void) /*=====================*/ { -#ifdef HAVE_ATOMIC_BUILTINS_64 - return os_atomic_increment_uint64(&log_sys->tracked_lsn, 0); -#else - ut_ad(mutex_own(&(log_sys->mutex))); + os_rmb; return log_sys->tracked_lsn; -#endif } diff --git a/storage/xtradb/include/log0online.h b/storage/xtradb/include/log0online.h index 1ef4df7d6da..67dc0d72b4b 100644 --- a/storage/xtradb/include/log0online.h +++ b/storage/xtradb/include/log0online.h @@ -73,20 +73,7 @@ UNIV_INTERN ibool log_online_purge_changed_page_bitmaps( /*==================================*/ - ib_uint64_t lsn); /*!<in: LSN to purge files up to */ - -/************************************************************//** -Delete all the bitmap files for data less than the specified LSN. -If called with lsn == 0 (i.e. set by RESET request) or -IB_ULONGLONG_MAX, restart the bitmap file sequence, otherwise -continue it. - -@return FALSE to indicate success, TRUE for failure. */ -UNIV_INTERN -ibool -log_online_purge_changed_page_bitmaps( -/*==================================*/ - ib_uint64_t lsn); /*!<in: LSN to purge files up to */ + lsn_t lsn); /*!<in: LSN to purge files up to */ #define LOG_BITMAP_ITERATOR_START_LSN(i) \ ((i).start_lsn) diff --git a/storage/xtradb/include/log0recv.h b/storage/xtradb/include/log0recv.h index 805b6c66768..674f68bd1dc 100644 --- a/storage/xtradb/include/log0recv.h +++ b/storage/xtradb/include/log0recv.h @@ -100,15 +100,6 @@ UNIV_INLINE ibool recv_recovery_is_on(void); /*=====================*/ -#ifdef UNIV_LOG_ARCHIVE -/*******************************************************************//** -Returns TRUE if recovery from backup is currently running. -@return recv_recovery_from_backup_on */ -UNIV_INLINE -ibool -recv_recovery_from_backup_is_on(void); -/*=================================*/ -#endif /* UNIV_LOG_ARCHIVE */ /************************************************************************//** Applies the hashed log records to the page, if the page lsn is less than the lsn of a log record. This can be called when a buffer page has just been @@ -330,30 +321,6 @@ void recv_apply_log_recs_for_backup(void); /*================================*/ #endif -#ifdef UNIV_LOG_ARCHIVE -/********************************************************//** -Recovers from archived log files, and also from log files, if they exist. -@return error code or DB_SUCCESS */ -UNIV_INTERN -dberr_t -recv_recovery_from_archive_start( -/*=============================*/ - lsn_t min_flushed_lsn,/*!< in: min flushed lsn field from the - data files */ - lsn_t limit_lsn, /*!< in: recover up to this lsn if - possible */ - lsn_t first_log_no); /*!< in: number of the first archived - log file to use in the recovery; the - file will be searched from - INNOBASE_LOG_ARCH_DIR specified in - server config file */ -/********************************************************//** -Completes recovery from archive. */ -UNIV_INTERN -void -recv_recovery_from_archive_finish(void); -/*===================================*/ -#endif /* UNIV_LOG_ARCHIVE */ /** Block of log record data */ struct recv_data_t{ diff --git a/storage/xtradb/include/log0recv.ic b/storage/xtradb/include/log0recv.ic index 32c28dd03e6..b29272f4672 100644 --- a/storage/xtradb/include/log0recv.ic +++ b/storage/xtradb/include/log0recv.ic @@ -35,19 +35,3 @@ recv_recovery_is_on(void) { return(recv_recovery_on); } - -#ifdef UNIV_LOG_ARCHIVE -/** TRUE when applying redo log records from an archived log file */ -extern ibool recv_recovery_from_backup_on; - -/*******************************************************************//** -Returns TRUE if recovery from backup is currently running. -@return recv_recovery_from_backup_on */ -UNIV_INLINE -ibool -recv_recovery_from_backup_is_on(void) -/*=================================*/ -{ - return(recv_recovery_from_backup_on); -} -#endif /* UNIV_LOG_ARCHIVE */ diff --git a/storage/xtradb/include/os0file.h b/storage/xtradb/include/os0file.h index 330067576f6..de2c7287dbe 100644 --- a/storage/xtradb/include/os0file.h +++ b/storage/xtradb/include/os0file.h @@ -449,12 +449,6 @@ os_get_os_version(void); /*===================*/ #endif /* __WIN__ */ #ifndef UNIV_HOTBACKUP -/****************************************************************//** -Creates the seek mutexes used in positioned reads and writes. */ -UNIV_INTERN -void -os_io_init_simple(void); -/*===================*/ /** Create a temporary file. This function is like tmpfile(3), but diff --git a/storage/xtradb/include/os0sync.h b/storage/xtradb/include/os0sync.h index f0bcd493d50..0b3e4edd57a 100644 --- a/storage/xtradb/include/os0sync.h +++ b/storage/xtradb/include/os0sync.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, Google Inc. Portions of this file contain modifications contributed and copyrighted by @@ -42,7 +42,6 @@ Created 9/6/1995 Heikki Tuuri || defined _M_X64 || defined __WIN__ #define IB_STRONG_MEMORY_MODEL -#undef HAVE_IB_GCC_ATOMIC_TEST_AND_SET // Quick-and-dirty fix for bug 1519094 #endif /* __i386__ || __x86_64__ || _M_IX86 || _M_X64 || __WIN__ */ @@ -94,16 +93,62 @@ struct os_event { #endif os_fast_mutex_t os_mutex; /*!< this mutex protects the next fields */ - ibool is_set; /*!< this is TRUE when the event is - in the signaled state, i.e., a thread - does not stop if it tries to wait for - this event */ - ib_int64_t signal_count; /*!< this is incremented each time - the event becomes signaled */ +private: + /** Masks for the event signal count and set flag in the count_and_set + field */ + enum { count_mask = 0x7fffffffffffffffULL, + set_mask = 0x8000000000000000ULL}; + + /** The MSB is set whenever when the event is in the signaled state, + i.e. a thread does not stop if it tries to wait for this event. Lower + bits are incremented each time the event becomes signaled. */ + ib_uint64_t count_and_set; +public: os_cond_t cond_var; /*!< condition variable is used in waiting for the event */ - UT_LIST_NODE_T(os_event_t) os_event_list; - /*!< list of all created events */ + + /** Initialise count_and_set field */ + void init_count_and_set(void) + { + /* We return this value in os_event_reset(), which can then be + be used to pass to the os_event_wait_low(). The value of zero + is reserved in os_event_wait_low() for the case when the + caller does not want to pass any signal_count value. To + distinguish between the two cases we initialize signal_count + to 1 here. */ + count_and_set = 1; + } + + /** Mark this event as set */ + void set(void) + { + count_and_set |= set_mask; + } + + /** Unmark this event as set */ + void reset(void) + { + count_and_set &= count_mask; + } + + /** Return true if this event is set */ + bool is_set(void) const + { + return count_and_set & set_mask; + } + + /** Bump signal count for this event */ + void inc_signal_count(void) + { + ut_ad(static_cast<ib_uint64_t>(signal_count()) < count_mask); + count_and_set++; + } + + /** Return how many times this event has been signalled */ + ib_int64_t signal_count(void) const + { + return (count_and_set & count_mask); + } }; /** Denotes an infinite delay for os_event_wait_time() */ @@ -115,8 +160,7 @@ struct os_event { /** Operating system mutex handle */ typedef struct os_mutex_t* os_ib_mutex_t; -/** Mutex protecting counts and the event and OS 'slow' mutex lists */ -extern os_ib_mutex_t os_sync_mutex; +// All the os_*_count variables are accessed atomically /** This is incremented by 1 in os_thread_create and decremented by 1 in os_thread_exit */ @@ -132,12 +176,15 @@ UNIV_INTERN void os_sync_init(void); /*==============*/ -/*********************************************************//** -Frees created events and OS 'slow' mutexes. */ + +/** Create an event semaphore, i.e., a semaphore which may just have two +states: signaled and nonsignaled. The created event is manual reset: it must be +reset explicitly by calling sync_os_reset_event. +@param[in,out] event memory block where to create the event */ UNIV_INTERN void -os_sync_free(void); -/*==============*/ +os_event_create(os_event_t event); + /*********************************************************//** Creates an event semaphore, i.e., a semaphore which may just have two states: signaled and nonsignaled. The created event is manual reset: it must be reset @@ -173,7 +220,10 @@ UNIV_INTERN void os_event_free( /*==========*/ - os_event_t event); /*!< in: event to free */ + os_event_t event, /*!< in: event to free */ + bool free_memory = true); + /*!< in: if true, deallocate the event memory + block too */ /**********************************************************//** Waits for an event object until it is in the signaled state. @@ -450,28 +500,7 @@ amount to decrement. */ # define os_atomic_decrement_uint64(ptr, amount) \ os_atomic_decrement(ptr, amount) -# if defined(HAVE_IB_GCC_ATOMIC_TEST_AND_SET) - -/** Do an atomic test-and-set. -@param[in,out] ptr Memory location to set to non-zero -@return the previous value */ -inline -lock_word_t -os_atomic_test_and_set(volatile lock_word_t* ptr) -{ - return(__atomic_test_and_set(ptr, __ATOMIC_ACQUIRE)); -} - -/** Do an atomic clear. -@param[in,out] ptr Memory location to set to zero */ -inline -void -os_atomic_clear(volatile lock_word_t* ptr) -{ - __atomic_clear(ptr, __ATOMIC_RELEASE); -} - -# elif defined(IB_STRONG_MEMORY_MODEL) +# if defined(IB_STRONG_MEMORY_MODEL) /** Do an atomic test and set. @param[in,out] ptr Memory location to set to non-zero @@ -500,6 +529,27 @@ os_atomic_clear(volatile lock_word_t* ptr) return(__sync_lock_test_and_set(ptr, 0)); } +# elif defined(HAVE_IB_GCC_ATOMIC_TEST_AND_SET) + +/** Do an atomic test-and-set. +@param[in,out] ptr Memory location to set to non-zero +@return the previous value */ +inline +lock_word_t +os_atomic_test_and_set(volatile lock_word_t* ptr) +{ + return(__atomic_test_and_set(ptr, __ATOMIC_ACQUIRE)); +} + +/** Do an atomic clear. +@param[in,out] ptr Memory location to set to zero */ +inline +void +os_atomic_clear(volatile lock_word_t* ptr) +{ + __atomic_clear(ptr, __ATOMIC_RELEASE); +} + # else # error "Unsupported platform" diff --git a/storage/xtradb/include/srv0srv.h b/storage/xtradb/include/srv0srv.h index 400c3b546cc..480d1a2ac2a 100644 --- a/storage/xtradb/include/srv0srv.h +++ b/storage/xtradb/include/srv0srv.h @@ -380,8 +380,6 @@ extern ulong srv_innodb_stats_method; #ifdef UNIV_LOG_ARCHIVE extern ibool srv_log_archive_on; -extern ibool srv_archive_recovery; -extern ib_uint64_t srv_archive_recovery_limit_lsn; #endif /* UNIV_LOG_ARCHIVE */ extern char* srv_file_flush_method_str; @@ -832,19 +830,29 @@ ulint srv_get_activity_count(void); /*========================*/ /*******************************************************************//** -Check if there has been any activity. +Check if there has been any activity. Considers background change buffer +merge as regular server activity unless a non-default +old_ibuf_merge_activity_count value is passed, in which case the merge will be +treated as keeping server idle. @return FALSE if no change in activity counter. */ UNIV_INTERN ibool srv_check_activity( /*===============*/ - ulint old_activity_count); /*!< old activity count */ + ulint old_activity_count, /*!< old activity count */ + /*!< old change buffer merge + activity count, or + ULINT_UNDEFINED */ + ulint old_ibuf_merge_activity_count = ULINT_UNDEFINED); /******************************************************************//** Increment the server activity counter. */ UNIV_INTERN void -srv_inc_activity_count(void); -/*=========================*/ +srv_inc_activity_count( +/*===================*/ + bool ibuf_merge_activity = false); /*!< whether this activity bump + is caused by the background + change buffer merge */ /**********************************************************************//** Enqueues a task to server task queue and releases a worker thread, if there diff --git a/storage/xtradb/include/sync0rw.h b/storage/xtradb/include/sync0rw.h index 84ac40bab78..93f184b6147 100644 --- a/storage/xtradb/include/sync0rw.h +++ b/storage/xtradb/include/sync0rw.h @@ -737,8 +737,8 @@ struct rw_lock_t { /*!< Thread id of writer thread. Is only guaranteed to have sane and non-stale value iff recursive flag is set. */ - os_event_t event; /*!< Used by sync0arr.cc for thread queueing */ - os_event_t wait_ex_event; + struct os_event event; /*!< Used by sync0arr.cc for thread queueing */ + struct os_event wait_ex_event; /*!< Event for next-writer to wait on. A thread must decrement lock_word before waiting. */ #ifndef INNODB_RW_LOCKS_USE_ATOMICS @@ -788,12 +788,12 @@ struct prio_rw_lock_t { volatile ulint high_priority_s_waiters; /* Number of high priority S waiters */ - os_event_t high_priority_s_event; /* High priority wait + struct os_event high_priority_s_event; /* High priority wait array event for S waiters */ volatile ulint high_priority_x_waiters; /* Number of high priority X waiters */ - os_event_t high_priority_x_event; + struct os_event high_priority_x_event; /* High priority wait arraay event for X waiters */ volatile ulint high_priority_wait_ex_waiter; diff --git a/storage/xtradb/include/sync0rw.ic b/storage/xtradb/include/sync0rw.ic index 8aadc406132..d7898befe8c 100644 --- a/storage/xtradb/include/sync0rw.ic +++ b/storage/xtradb/include/sync0rw.ic @@ -585,7 +585,7 @@ rw_lock_s_unlock_func( /* wait_ex waiter exists. It may not be asleep, but we signal anyway. We do not wake other waiters, because they can't exist without wait_ex waiter and wait_ex waiter goes first.*/ - os_event_set(lock->wait_ex_event); + os_event_set(&lock->wait_ex_event); sync_array_object_signalled(); } @@ -625,7 +625,7 @@ rw_lock_s_unlock_func( /* A waiting next-writer exists, either high priority or regular, sharing the same wait event. */ - os_event_set(lock->base_lock.wait_ex_event); + os_event_set(&lock->base_lock.wait_ex_event); sync_array_object_signalled(); } else if (lock_word == X_LOCK_DECR) { @@ -636,7 +636,7 @@ rw_lock_s_unlock_func( if (lock->base_lock.waiters) { rw_lock_reset_waiter_flag(&lock->base_lock); - os_event_set(lock->base_lock.event); + os_event_set(&lock->base_lock.event); sync_array_object_signalled(); } } @@ -718,7 +718,7 @@ rw_lock_x_unlock_func( if (lock->waiters) { rw_lock_reset_waiter_flag(lock); - os_event_set(lock->event); + os_event_set(&lock->event); sync_array_object_signalled(); } } @@ -761,16 +761,16 @@ rw_lock_x_unlock_func( if (lock->high_priority_x_waiters) { - os_event_set(lock->high_priority_x_event); + os_event_set(&lock->high_priority_x_event); sync_array_object_signalled(); } else if (lock->high_priority_s_waiters) { - os_event_set(lock->high_priority_s_event); + os_event_set(&lock->high_priority_s_event); sync_array_object_signalled(); } else if (lock->base_lock.waiters) { rw_lock_reset_waiter_flag(&lock->base_lock); - os_event_set(lock->base_lock.event); + os_event_set(&lock->base_lock.event); sync_array_object_signalled(); } } diff --git a/storage/xtradb/include/sync0sync.h b/storage/xtradb/include/sync0sync.h index 33bf1305e38..93f37e6208b 100644 --- a/storage/xtradb/include/sync0sync.h +++ b/storage/xtradb/include/sync0sync.h @@ -922,7 +922,7 @@ implementation of a mutual exclusion semaphore. */ /** InnoDB mutex */ struct ib_mutex_t { - os_event_t event; /*!< Used by sync0arr.cc for the wait queue */ + struct os_event event; /*!< Used by sync0arr.cc for the wait queue */ volatile lock_word_t lock_word; /*!< lock_word is the target of the atomic test-and-set instruction when atomic operations are enabled. */ @@ -969,14 +969,13 @@ struct ib_mutex_t { struct ib_prio_mutex_t { ib_mutex_t base_mutex; /* The regular mutex provides the lock word etc. for the priority mutex */ - os_event_t high_priority_event; /* High priority wait array + struct os_event high_priority_event; /* High priority wait array event */ volatile ulint high_priority_waiters; /* Number of threads that asked for this mutex to be acquired with high priority in the global wait array waiting for this mutex to be released. */ - UT_LIST_NODE_T(ib_prio_mutex_t) list; }; /** Constant determining how long spin wait is continued before suspending diff --git a/storage/xtradb/include/sync0sync.ic b/storage/xtradb/include/sync0sync.ic index d6561a76cdb..5227bd86964 100644 --- a/storage/xtradb/include/sync0sync.ic +++ b/storage/xtradb/include/sync0sync.ic @@ -225,7 +225,7 @@ mutex_exit_func( /* Wake up any high priority waiters first. */ if (mutex->high_priority_waiters != 0) { - os_event_set(mutex->high_priority_event); + os_event_set(&mutex->high_priority_event); sync_array_object_signalled(); } else if (mutex_get_waiters(&mutex->base_mutex) != 0) { diff --git a/storage/xtradb/include/univ.i b/storage/xtradb/include/univ.i index 6db589355e2..592265c15bd 100644 --- a/storage/xtradb/include/univ.i +++ b/storage/xtradb/include/univ.i @@ -47,7 +47,7 @@ Created 1/20/1994 Heikki Tuuri #define INNODB_VERSION_BUGFIX MYSQL_VERSION_PATCH #ifndef PERCONA_INNODB_VERSION -#define PERCONA_INNODB_VERSION 76.2 +#define PERCONA_INNODB_VERSION 76.3 #endif /* Enable UNIV_LOG_ARCHIVE in XtraDB */ diff --git a/storage/xtradb/lock/lock0lock.cc b/storage/xtradb/lock/lock0lock.cc index c523b09afc6..38b9257f5ea 100644 --- a/storage/xtradb/lock/lock0lock.cc +++ b/storage/xtradb/lock/lock0lock.cc @@ -641,6 +641,17 @@ lock_sys_close(void) mutex_free(&lock_sys->mutex); mutex_free(&lock_sys->wait_mutex); + os_event_free(lock_sys->timeout_event); + + for (srv_slot_t* slot = lock_sys->waiting_threads; + slot < lock_sys->waiting_threads + OS_THREAD_MAX_N; slot++) { + + ut_ad(!slot->in_use); + ut_ad(!slot->thr); + if (slot->event != NULL) + os_event_free(slot->event); + } + mem_free(lock_stack); mem_free(lock_sys); diff --git a/storage/xtradb/log/log0log.cc b/storage/xtradb/log/log0log.cc index 5a23a083ab3..4c5a8b37076 100644 --- a/storage/xtradb/log/log0log.cc +++ b/storage/xtradb/log/log0log.cc @@ -1050,8 +1050,7 @@ log_group_init( ulint space_id, /*!< in: space id of the file space which contains the log files of this group */ - ulint archive_space_id __attribute__((unused))) - /*!< in: space id of the file space + ulint archive_space_id) /*!< in: space id of the file space which contains some archived log files for this group; currently, only for the first log group this is @@ -3128,10 +3127,9 @@ log_archive_close_groups( Writes the log contents to the archive up to the lsn when this function was called, and stops the archiving. When archiving is started again, the archived log file numbers start from 2 higher, so that the archiving will not write -again to the archived log files which exist when this function returns. -@return DB_SUCCESS or DB_ERROR */ -UNIV_INTERN -ulint +again to the archived log files which exist when this function returns. */ +static +void log_archive_stop(void) /*==================*/ { @@ -3139,13 +3137,7 @@ log_archive_stop(void) mutex_enter(&(log_sys->mutex)); - if (log_sys->archiving_state != LOG_ARCH_ON) { - - mutex_exit(&(log_sys->mutex)); - - return(DB_ERROR); - } - + ut_ad(log_sys->archiving_state == LOG_ARCH_ON); log_sys->archiving_state = LOG_ARCH_STOPPING; mutex_exit(&(log_sys->mutex)); @@ -3187,8 +3179,6 @@ log_archive_stop(void) log_sys->archiving_state = LOG_ARCH_STOPPED; mutex_exit(&(log_sys->mutex)); - - return(DB_SUCCESS); } /****************************************************************//** @@ -3930,6 +3920,7 @@ log_shutdown(void) rw_lock_free(&log_sys->checkpoint_lock); mutex_free(&log_sys->mutex); + mutex_free(&log_sys->log_flush_order_mutex); #ifdef UNIV_LOG_ARCHIVE rw_lock_free(&log_sys->archive_lock); diff --git a/storage/xtradb/log/log0online.cc b/storage/xtradb/log/log0online.cc index a6ecd57ef69..0d6140f137e 100644 --- a/storage/xtradb/log/log0online.cc +++ b/storage/xtradb/log/log0online.cc @@ -298,7 +298,7 @@ log_online_read_bitmap_page( /* The following call prints an error message */ os_file_get_last_error(TRUE); ib_logf(IB_LOG_LEVEL_WARN, - "failed reading changed page bitmap file \'%s\'\n", + "failed reading changed page bitmap file \'%s\'", bitmap_file->name); return FALSE; } @@ -358,7 +358,7 @@ log_online_read_last_tracked_lsn(void) ib_logf(IB_LOG_LEVEL_WARN, "corruption detected in \'%s\' at offset " - UINT64PF "\n", + UINT64PF, log_bmp_sys->out.name, read_offset); } }; @@ -372,7 +372,7 @@ log_online_read_last_tracked_lsn(void) log_bmp_sys->out.offset)) { ib_logf(IB_LOG_LEVEL_WARN, "failed truncating changed page bitmap file \'%s\' to " - UINT64PF " bytes\n", + UINT64PF " bytes", log_bmp_sys->out.name, log_bmp_sys->out.offset); result = 0; } @@ -390,16 +390,8 @@ log_set_tracked_lsn( /*================*/ lsn_t tracked_lsn) /*!<in: new value */ { -#ifdef HAVE_ATOMIC_BUILTINS_64 - /* Single writer, no data race here */ - lsn_t old_value = os_atomic_increment_uint64(&log_sys->tracked_lsn, 0); - (void) os_atomic_increment_uint64(&log_sys->tracked_lsn, - tracked_lsn - old_value); -#else - mutex_enter(&log_sys->mutex); log_sys->tracked_lsn = tracked_lsn; - mutex_exit(&log_sys->mutex); -#endif + os_wmb; } /*********************************************************************//** @@ -424,7 +416,7 @@ log_online_can_track_missing( ib_logf(IB_LOG_LEVEL_ERROR, "last tracked LSN " LSN_PF " is ahead of tracking " "start LSN " LSN_PF ". This can be caused by " - "mismatched bitmap files.\n", + "mismatched bitmap files.", last_tracked_lsn, tracking_start_lsn); exit(1); } @@ -452,7 +444,7 @@ log_online_track_missing_on_startup( ib_logf(IB_LOG_LEVEL_WARN, "last tracked LSN in \'%s\' is " LSN_PF ", but the last checkpoint LSN is " LSN_PF ". This might be " - "due to a server crash or a very fast shutdown. ", + "due to a server crash or a very fast shutdown.", log_bmp_sys->out.name, last_tracked_lsn, tracking_start_lsn); /* See if we can fully recover the missing interval */ @@ -460,7 +452,7 @@ log_online_track_missing_on_startup( tracking_start_lsn)) { ib_logf(IB_LOG_LEVEL_INFO, - "reading the log to advance the last tracked LSN.\n"); + "reading the log to advance the last tracked LSN."); log_bmp_sys->start_lsn = ut_max(last_tracked_lsn, MIN_TRACKED_LSN); @@ -471,22 +463,22 @@ log_online_track_missing_on_startup( ut_ad(log_bmp_sys->end_lsn >= tracking_start_lsn); ib_logf(IB_LOG_LEVEL_INFO, - "continuing tracking changed pages from LSN " LSN_PF - "\n", log_bmp_sys->end_lsn); + "continuing tracking changed pages from LSN " LSN_PF, + log_bmp_sys->end_lsn); } else { ib_logf(IB_LOG_LEVEL_WARN, "the age of last tracked LSN exceeds log capacity, " "tracking-based incremental backups will work only " - "from the higher LSN!\n"); + "from the higher LSN!"); log_bmp_sys->end_lsn = log_bmp_sys->start_lsn = tracking_start_lsn; log_set_tracked_lsn(log_bmp_sys->start_lsn); ib_logf(IB_LOG_LEVEL_INFO, - "starting tracking changed pages from LSN " LSN_PF - "\n", log_bmp_sys->end_lsn); + "starting tracking changed pages from LSN " LSN_PF, + log_bmp_sys->end_lsn); } } @@ -554,7 +546,7 @@ log_online_start_bitmap_file(void) /* The following call prints an error message */ os_file_get_last_error(TRUE); ib_logf(IB_LOG_LEVEL_ERROR, - "cannot create \'%s\'\n", log_bmp_sys->out.name); + "cannot create \'%s\'", log_bmp_sys->out.name); return FALSE; } @@ -690,7 +682,7 @@ log_online_read_init(void) if (os_file_closedir(bitmap_dir)) { os_file_get_last_error(TRUE); - ib_logf(IB_LOG_LEVEL_ERROR, "cannot close \'%s\'\n", + ib_logf(IB_LOG_LEVEL_ERROR, "cannot close \'%s\'", log_bmp_sys->bmp_file_home); exit(1); } @@ -730,7 +722,7 @@ log_online_read_init(void) ib_logf(IB_LOG_LEVEL_WARN, "truncated block detected in \'%s\' at offset " - UINT64PF "\n", + UINT64PF, log_bmp_sys->out.name, log_bmp_sys->out.offset); log_bmp_sys->out.offset -= @@ -768,14 +760,14 @@ log_online_read_init(void) "last tracked LSN is " LSN_PF ", but the last " "checkpoint LSN is " LSN_PF ". The " "tracking-based incremental backups will work " - "only from the latter LSN!\n", + "only from the latter LSN!", last_tracked_lsn, tracking_start_lsn); } } ib_logf(IB_LOG_LEVEL_INFO, "starting tracking changed pages from LSN " - LSN_PF "\n", tracking_start_lsn); + LSN_PF, tracking_start_lsn); log_bmp_sys->start_lsn = tracking_start_lsn; log_set_tracked_lsn(tracking_start_lsn); } @@ -919,7 +911,7 @@ log_online_is_valid_log_seg( ib_logf(IB_LOG_LEVEL_ERROR, "log block checksum mismatch: expected " ULINTPF ", " - "calculated checksum " ULINTPF "\n", + "calculated checksum " ULINTPF, log_block_get_checksum(log_block), log_block_calc_checksum(log_block)); } @@ -1118,7 +1110,7 @@ log_online_write_bitmap_page( /* The following call prints an error message */ os_file_get_last_error(TRUE); ib_logf(IB_LOG_LEVEL_ERROR, "failed writing changed page " - "bitmap file \'%s\'\n", log_bmp_sys->out.name); + "bitmap file \'%s\'", log_bmp_sys->out.name); return FALSE; } @@ -1128,7 +1120,7 @@ log_online_write_bitmap_page( /* The following call prints an error message */ os_file_get_last_error(TRUE); ib_logf(IB_LOG_LEVEL_ERROR, "failed flushing changed page " - "bitmap file \'%s\'\n", log_bmp_sys->out.name); + "bitmap file \'%s\'", log_bmp_sys->out.name); return FALSE; } @@ -1275,8 +1267,7 @@ log_online_diagnose_inconsistent_dir( ib_logf(IB_LOG_LEVEL_WARN, "InnoDB: Warning: inconsistent bitmap file " "directory for a " - "INFORMATION_SCHEMA.INNODB_CHANGED_PAGES query" - "\n"); + "INFORMATION_SCHEMA.INNODB_CHANGED_PAGES query"); free(bitmap_files->files); } @@ -1318,7 +1309,7 @@ log_online_setup_bitmap_file_range( if (UNIV_UNLIKELY(!bitmap_dir)) { ib_logf(IB_LOG_LEVEL_ERROR, - "failed to open bitmap directory \'%s\'\n", + "failed to open bitmap directory \'%s\'", srv_data_home); return FALSE; } @@ -1368,7 +1359,7 @@ log_online_setup_bitmap_file_range( if (UNIV_UNLIKELY(os_file_closedir(bitmap_dir))) { os_file_get_last_error(TRUE); - ib_logf(IB_LOG_LEVEL_ERROR, "cannot close \'%s\'\n", + ib_logf(IB_LOG_LEVEL_ERROR, "cannot close \'%s\'", srv_data_home); return FALSE; } @@ -1389,7 +1380,7 @@ log_online_setup_bitmap_file_range( if (UNIV_UNLIKELY(!bitmap_dir)) { ib_logf(IB_LOG_LEVEL_ERROR, - "failed to open bitmap directory \'%s\'\n", + "failed to open bitmap directory \'%s\'", srv_data_home); return FALSE; } @@ -1440,7 +1431,7 @@ log_online_setup_bitmap_file_range( if (UNIV_UNLIKELY(os_file_closedir(bitmap_dir))) { os_file_get_last_error(TRUE); - ib_logf(IB_LOG_LEVEL_ERROR, "cannot close \'%s\'\n", + ib_logf(IB_LOG_LEVEL_ERROR, "cannot close \'%s\'", srv_data_home); free(bitmap_files->files); return FALSE; @@ -1515,7 +1506,7 @@ log_online_open_bitmap_file_read_only( /* Here and below assume that bitmap file names do not contain apostrophes, thus no need for ut_print_filename(). */ ib_logf(IB_LOG_LEVEL_WARN, - "error opening the changed page bitmap \'%s\'\n", + "error opening the changed page bitmap \'%s\'", bitmap_file->name); return FALSE; } @@ -1561,7 +1552,7 @@ log_online_diagnose_bitmap_eof( ib_logf(IB_LOG_LEVEL_WARN, "junk at the end of changed page bitmap file " - "\'%s\'.\n", bitmap_file->name); + "\'%s\'.", bitmap_file->name); } if (UNIV_UNLIKELY(!last_page_in_run)) { @@ -1572,7 +1563,7 @@ log_online_diagnose_bitmap_eof( for the whole server */ ib_logf(IB_LOG_LEVEL_WARN, "changed page bitmap file \'%s\' does not " - "contain a complete run at the end.\n", + "contain a complete run at the end.", bitmap_file->name); return FALSE; } @@ -1765,7 +1756,7 @@ log_online_bitmap_iterator_next( os_file_get_last_error(TRUE); ib_logf(IB_LOG_LEVEL_WARN, "failed reading changed page bitmap file " - "\'%s\'\n", i->in_files.files[i->in_i].name); + "\'%s\'", i->in_files.files[i->in_i].name); i->failed = TRUE; return FALSE; } diff --git a/storage/xtradb/log/log0recv.cc b/storage/xtradb/log/log0recv.cc index cc0bb515988..a99527f53ca 100644 --- a/storage/xtradb/log/log0recv.cc +++ b/storage/xtradb/log/log0recv.cc @@ -77,10 +77,6 @@ UNIV_INTERN recv_sys_t* recv_sys = NULL; otherwise. Note that this is FALSE while a background thread is rolling back incomplete transactions. */ UNIV_INTERN ibool recv_recovery_on; -#ifdef UNIV_LOG_ARCHIVE -/** TRUE when applying redo log records from an archived log file */ -UNIV_INTERN ibool recv_recovery_from_backup_on; -#endif /* UNIV_LOG_ARCHIVE */ #ifndef UNIV_HOTBACKUP /** TRUE when recv_init_crash_recovery() has been called. */ @@ -293,10 +289,6 @@ recv_sys_var_init(void) recv_recovery_on = FALSE; -#ifdef UNIV_LOG_ARCHIVE - recv_recovery_from_backup_on = FALSE; -#endif /* UNIV_LOG_ARCHIVE */ - recv_needed_recovery = FALSE; recv_lsn_checks_on = FALSE; @@ -3748,327 +3740,6 @@ recv_reset_log_files_for_backup( } #endif /* UNIV_HOTBACKUP */ -#ifdef UNIV_LOG_ARCHIVE -/******************************************************//** -Reads from the archive of a log group and performs recovery. -@return TRUE if no more complete consistent archive files */ -static -ibool -log_group_recover_from_archive_file( -/*================================*/ - log_group_t* group) /*!< in: log group */ -{ - os_file_t file_handle; - ib_uint64_t start_lsn; - ib_uint64_t file_end_lsn; - ib_uint64_t dummy_lsn; - ib_uint64_t scanned_lsn; - ulint len; - ibool ret; - byte* buf; - os_offset_t read_offset; - os_offset_t file_size; - int input_char; - char name[OS_FILE_MAX_PATH]; - - ut_a(0); - -try_open_again: - buf = log_sys->buf; - - /* Add the file to the archive file space; open the file */ - - log_archived_file_name_gen(name, sizeof(name), - group->id, group->archived_file_no); - - file_handle = os_file_create(innodb_file_log_key, - name, OS_FILE_OPEN, - OS_FILE_LOG, OS_FILE_AIO, &ret); - - if (ret == FALSE) { -ask_again: - fprintf(stderr, - "InnoDB: Do you want to copy additional" - " archived log files\n" - "InnoDB: to the directory\n"); - fprintf(stderr, - "InnoDB: or were these all the files needed" - " in recovery?\n"); - fprintf(stderr, - "InnoDB: (Y == copy more files; N == this is all)?"); - - input_char = getchar(); - - if (input_char == (int) 'N') { - - return(TRUE); - } else if (input_char == (int) 'Y') { - - goto try_open_again; - } else { - goto ask_again; - } - } - - file_size = os_file_get_size(file_handle); - ut_a(file_size != (os_offset_t) -1); - - fprintf(stderr, "InnoDB: Opened archived log file %s\n", name); - - ret = os_file_close(file_handle); - - if (file_size < LOG_FILE_HDR_SIZE) { - fprintf(stderr, - "InnoDB: Archive file header incomplete %s\n", name); - - return(TRUE); - } - - ut_a(ret); - - /* Add the archive file as a node to the space */ - - ut_a(fil_node_create(name, 1 + file_size / UNIV_PAGE_SIZE, - group->archive_space_id, FALSE)); - ut_a(RECV_SCAN_SIZE >= LOG_FILE_HDR_SIZE); - - /* Read the archive file header */ - fil_io(OS_FILE_READ | OS_FILE_LOG, true, group->archive_space_id, 0, - 0, 0, - LOG_FILE_HDR_SIZE, buf, NULL); - - /* Check if the archive file header is consistent */ - - if (mach_read_from_4(buf + LOG_GROUP_ID) != group->id - || mach_read_from_8(buf + LOG_FILE_START_LSN) - != group->archived_file_no) { - fprintf(stderr, - "InnoDB: Archive file header inconsistent %s\n", name); - - return(TRUE); - } - - if (!mach_read_from_4(buf + LOG_FILE_ARCH_COMPLETED)) { - fprintf(stderr, - "InnoDB: Archive file not completely written %s\n", - name); - - return(TRUE); - } - - start_lsn = mach_read_from_8(buf + LOG_FILE_START_LSN); - file_end_lsn = mach_read_from_8(buf + LOG_FILE_END_LSN); - - if (!recv_sys->scanned_lsn) { - - if (recv_sys->parse_start_lsn < start_lsn) { - fprintf(stderr, - "InnoDB: Archive log file %s" - " starts from too big a lsn\n", - name); - return(TRUE); - } - - recv_sys->scanned_lsn = start_lsn; - } - - if (recv_sys->scanned_lsn != start_lsn) { - - fprintf(stderr, - "InnoDB: Archive log file %s starts from" - " a wrong lsn\n", - name); - return(TRUE); - } - - read_offset = LOG_FILE_HDR_SIZE; - - for (;;) { - len = RECV_SCAN_SIZE; - - if (read_offset + len > file_size) { - len = ut_calc_align_down(file_size - read_offset, - OS_FILE_LOG_BLOCK_SIZE); - } - - if (len == 0) { - - break; - } - -#ifdef UNIV_DEBUG - if (log_debug_writes) { - fprintf(stderr, - "InnoDB: Archive read starting at" - " lsn " LSN_PF ", len %lu from file %s\n", - start_lsn, - (ulong) len, name); - } -#endif /* UNIV_DEBUG */ - - fil_io(OS_FILE_READ | OS_FILE_LOG, true, - group->archive_space_id, 0, - read_offset / UNIV_PAGE_SIZE, - read_offset % UNIV_PAGE_SIZE, len, buf, NULL); - - ret = recv_scan_log_recs( - (buf_pool_get_n_pages() - - (recv_n_pool_free_frames * srv_buf_pool_instances)) - * UNIV_PAGE_SIZE, TRUE, buf, len, start_lsn, - &dummy_lsn, &scanned_lsn); - - if (scanned_lsn == file_end_lsn) { - - return(FALSE); - } - - if (ret) { - fprintf(stderr, - "InnoDB: Archive log file %s" - " does not scan right\n", - name); - return(TRUE); - } - - read_offset += len; - start_lsn += len; - - ut_ad(start_lsn == scanned_lsn); - } - - return(FALSE); -} - -/********************************************************//** -Recovers from archived log files, and also from log files, if they exist. -@return error code or DB_SUCCESS */ -UNIV_INTERN -dberr_t -recv_recovery_from_archive_start( -/*=============================*/ - ib_uint64_t min_flushed_lsn,/*!< in: min flushed lsn field from the - data files */ - ib_uint64_t limit_lsn, /*!< in: recover up to this lsn if - possible */ - lsn_t first_log_no) /*!< in: number of the first archived - log file to use in the recovery; the - file will be searched from - INNOBASE_LOG_ARCH_DIR specified in - server config file */ -{ - log_group_t* group; - ulint group_id; - ulint trunc_len; - ibool ret; - dberr_t err; - - ut_a(0); - - recv_sys_create(); - recv_sys_init(buf_pool_get_curr_size()); - - recv_recovery_on = TRUE; - recv_recovery_from_backup_on = TRUE; - - recv_sys->limit_lsn = limit_lsn; - - group_id = 0; - - group = UT_LIST_GET_FIRST(log_sys->log_groups); - - while (group) { - if (group->id == group_id) { - - break; - } - - group = UT_LIST_GET_NEXT(log_groups, group); - } - - if (!group) { - fprintf(stderr, - "InnoDB: There is no log group defined with id %lu!\n", - (ulong) group_id); - return(DB_ERROR); - } - - group->archived_file_no = first_log_no; - - recv_sys->parse_start_lsn = min_flushed_lsn; - - recv_sys->scanned_lsn = 0; - recv_sys->scanned_checkpoint_no = 0; - recv_sys->recovered_lsn = recv_sys->parse_start_lsn; - - recv_sys->archive_group = group; - - ret = FALSE; - - mutex_enter(&(log_sys->mutex)); - - while (!ret) { - ret = log_group_recover_from_archive_file(group); - - /* Close and truncate a possible processed archive file - from the file space */ - - trunc_len = UNIV_PAGE_SIZE - * fil_space_get_size(group->archive_space_id); - if (trunc_len > 0) { - fil_space_truncate_start(group->archive_space_id, - trunc_len); - } - - group->archived_file_no += group->file_size - LOG_FILE_HDR_SIZE; - } - - if (recv_sys->recovered_lsn < limit_lsn) { - - if (!recv_sys->scanned_lsn) { - - recv_sys->scanned_lsn = recv_sys->parse_start_lsn; - } - - mutex_exit(&(log_sys->mutex)); - - err = recv_recovery_from_checkpoint_start(LOG_ARCHIVE, - limit_lsn, - LSN_MAX, - LSN_MAX); - if (err != DB_SUCCESS) { - - return(err); - } - - mutex_enter(&(log_sys->mutex)); - } - - if (limit_lsn != LSN_MAX) { - - recv_apply_hashed_log_recs(FALSE); - - recv_reset_logs(0, FALSE, recv_sys->recovered_lsn); - } - - mutex_exit(&(log_sys->mutex)); - - return(DB_SUCCESS); -} - -/********************************************************//** -Completes recovery from archive. */ -UNIV_INTERN -void -recv_recovery_from_archive_finish(void) -/*===================================*/ -{ - recv_recovery_from_checkpoint_finish(); - - recv_recovery_from_backup_on = FALSE; -} -#endif /* UNIV_LOG_ARCHIVE */ - - void recv_dblwr_t::add(byte* page) { pages.push_back(page); diff --git a/storage/xtradb/mem/mem0pool.cc b/storage/xtradb/mem/mem0pool.cc index fe9a84d21fa..42d0417c768 100644 --- a/storage/xtradb/mem/mem0pool.cc +++ b/storage/xtradb/mem/mem0pool.cc @@ -280,6 +280,7 @@ mem_pool_free( /*==========*/ mem_pool_t* pool) /*!< in, own: memory pool */ { + mutex_free(&pool->mutex); ut_free(pool->buf); ut_free(pool); } diff --git a/storage/xtradb/os/os0file.cc b/storage/xtradb/os/os0file.cc index a28555821b7..0d94534d139 100644 --- a/storage/xtradb/os/os0file.cc +++ b/storage/xtradb/os/os0file.cc @@ -378,6 +378,42 @@ os_get_os_version(void) #endif /* __WIN__ */ /***********************************************************************//** +For an EINVAL I/O error, prints a diagnostic message if innodb_flush_method +== ALL_O_DIRECT. +@return true if the diagnostic message was printed +@return false if the diagnostic message does not apply */ +static +bool +os_diagnose_all_o_direct_einval( +/*============================*/ + ulint err) /*!< in: C error code */ +{ + if ((err == EINVAL) + && (srv_unix_file_flush_method == SRV_UNIX_ALL_O_DIRECT)) { + ib_logf(IB_LOG_LEVEL_INFO, + "The error might be caused by redo log I/O not " + "satisfying innodb_flush_method=ALL_O_DIRECT " + "requirements by the underlying file system."); + if (srv_log_block_size != 512) + ib_logf(IB_LOG_LEVEL_INFO, + "This might be caused by an incompatible " + "non-default innodb_log_block_size value %lu.", + srv_log_block_size); + ib_logf(IB_LOG_LEVEL_INFO, + "Please file a bug at https://bugs.percona.com and " + "include this error message, my.cnf settings, and " + "information about the file system where the redo log " + "resides."); + ib_logf(IB_LOG_LEVEL_INFO, + "A possible workaround is to change " + "innodb_flush_method value to something else " + "than ALL_O_DIRECT."); + return(true); + } + return(false); +} + +/***********************************************************************//** Retrieves the last error number if an error occurs in a file io function. The number should be retrieved before any other OS calls (because they may overwrite the error number). If the number is not known to this program, @@ -512,7 +548,7 @@ os_file_get_last_error_low( "InnoDB: The error means mysqld does not have" " the access rights to\n" "InnoDB: the directory.\n"); - } else { + } else if (!os_diagnose_all_o_direct_einval(err)) { if (strerror(err) != NULL) { fprintf(stderr, "InnoDB: Error number %d" @@ -750,7 +786,7 @@ os_file_lock( #ifndef UNIV_HOTBACKUP /****************************************************************//** Creates the seek mutexes used in positioned reads and writes. */ -UNIV_INTERN +static void os_io_init_simple(void) /*===================*/ @@ -1585,7 +1621,7 @@ os_file_set_atomic_writes( #else ib_logf(IB_LOG_LEVEL_ERROR, "trying to enable atomic writes on non-supported platform! " - "Please restart with innodb_use_atomic_writes disabled.\n"); + "Please restart with innodb_use_atomic_writes disabled."); return(FALSE); #endif } @@ -2204,7 +2240,7 @@ os_file_set_size( ib_logf(IB_LOG_LEVEL_ERROR, "preallocating file " "space for file \'%s\' failed. Current size " - INT64PF ", desired size " INT64PF "\n", + INT64PF ", desired size " INT64PF, name, current_size, size); os_file_handle_error_no_exit (name, "posix_fallocate", FALSE); @@ -2672,6 +2708,9 @@ os_file_pwrite( /* Handle partial writes and signal interruptions correctly */ for (ret = 0; ret < (ssize_t) n; ) { n_written = pwrite(file, buf, (ssize_t)n - ret, offs); + DBUG_EXECUTE_IF("xb_simulate_all_o_direct_write_failure", + n_written = -1; + errno = EINVAL;); if (n_written >= 0) { ret += n_written; offs += n_written; @@ -2844,6 +2883,10 @@ try_again: try_again: ret = os_file_pread(file, buf, n, offset, trx); + DBUG_EXECUTE_IF("xb_simulate_all_o_direct_read_failure", + ret = -1; + errno = EINVAL;); + if ((ulint) ret == n) { return(TRUE); } else if (ret == -1) { @@ -3220,6 +3263,8 @@ retry: "InnoDB: " REFMAN "operating-system-error-codes.html\n"); + os_diagnose_all_o_direct_einval(errno); + os_has_said_disk_full = TRUE; } @@ -4196,6 +4241,14 @@ os_aio_free(void) os_event_free(os_aio_segment_wait_events[i]); } +#if !defined(HAVE_ATOMIC_BUILTINS) || UNIV_WORD_SIZE < 8 + os_mutex_free(os_file_count_mutex); +#endif /* !HAVE_ATOMIC_BUILTINS || UNIV_WORD_SIZE < 8 */ + + for (ulint i = 0; i < OS_FILE_N_SEEK_MUTEXES; i++) { + os_mutex_free(os_file_seek_mutexes[i]); + } + ut_free(os_aio_segment_wait_events); os_aio_segment_wait_events = 0; os_aio_n_segments = 0; @@ -5900,7 +5953,7 @@ os_aio_print( srv_io_thread_function[i]); #ifndef __WIN__ - if (os_aio_segment_wait_events[i]->is_set) { + if (os_aio_segment_wait_events[i]->is_set()) { fprintf(file, " ev set"); } #endif /* __WIN__ */ diff --git a/storage/xtradb/os/os0sync.cc b/storage/xtradb/os/os0sync.cc index cd57abd0623..9a92112d37e 100644 --- a/storage/xtradb/os/os0sync.cc +++ b/storage/xtradb/os/os0sync.cc @@ -47,27 +47,14 @@ struct os_mutex_t{ do not assume that the OS mutex supports recursive locking, though NT seems to do that */ - UT_LIST_NODE_T(os_mutex_t) os_mutex_list; - /* list of all 'slow' OS mutexes created */ }; -/** Mutex protecting counts and the lists of OS mutexes and events */ -UNIV_INTERN os_ib_mutex_t os_sync_mutex; -/** TRUE if os_sync_mutex has been initialized */ -static ibool os_sync_mutex_inited = FALSE; -/** TRUE when os_sync_free() is being executed */ -static ibool os_sync_free_called = FALSE; +// All the os_*_count variables are accessed atomically /** This is incremented by 1 in os_thread_create and decremented by 1 in -os_thread_exit */ +os_thread_exit. */ UNIV_INTERN ulint os_thread_count = 0; -/** The list of all events created */ -static UT_LIST_BASE_NODE_T(os_event) os_event_list; - -/** The list of all OS 'slow' mutexes */ -static UT_LIST_BASE_NODE_T(os_mutex_t) os_mutex_list; - UNIV_INTERN ulint os_event_count = 0; UNIV_INTERN ulint os_mutex_count = 0; UNIV_INTERN ulint os_fast_mutex_count = 0; @@ -80,11 +67,6 @@ UNIV_INTERN mysql_pfs_key_t event_os_mutex_key; UNIV_INTERN mysql_pfs_key_t os_mutex_key; #endif -/* Because a mutex is embedded inside an event and there is an -event embedded inside a mutex, on free, this generates a recursive call. -This version of the free event function doesn't acquire the global lock */ -static void os_event_free_internal(os_event_t event); - /* On Windows (Vista and later), load function pointers for condition variable handling. Those functions are not available in prior versions, so we have to use them via runtime loading, as long as we support XP. */ @@ -289,74 +271,21 @@ void os_sync_init(void) /*==============*/ { - UT_LIST_INIT(os_event_list); - UT_LIST_INIT(os_mutex_list); - - os_sync_mutex = NULL; - os_sync_mutex_inited = FALSE; - /* Now for Windows only */ os_cond_module_init(); - - os_sync_mutex = os_mutex_create(); - - os_sync_mutex_inited = TRUE; } -/*********************************************************//** -Frees created events and OS 'slow' mutexes. */ +/** Create an event semaphore, i.e., a semaphore which may just have two +states: signaled and nonsignaled. The created event is manual reset: it must be +reset explicitly by calling sync_os_reset_event. +@param[in,out] event memory block where to create the event */ UNIV_INTERN void -os_sync_free(void) -/*==============*/ -{ - os_event_t event; - os_ib_mutex_t mutex; - - os_sync_free_called = TRUE; - event = UT_LIST_GET_FIRST(os_event_list); - - while (event) { - - os_event_free(event); - - event = UT_LIST_GET_FIRST(os_event_list); - } - - mutex = UT_LIST_GET_FIRST(os_mutex_list); - - while (mutex) { - if (mutex == os_sync_mutex) { - /* Set the flag to FALSE so that we do not try to - reserve os_sync_mutex any more in remaining freeing - operations in shutdown */ - os_sync_mutex_inited = FALSE; - } - - os_mutex_free(mutex); - - mutex = UT_LIST_GET_FIRST(os_mutex_list); - } - os_sync_free_called = FALSE; -} - -/*********************************************************//** -Creates an event semaphore, i.e., a semaphore which may just have two -states: signaled and nonsignaled. The created event is manual reset: it -must be reset explicitly by calling sync_os_reset_event. -@return the event handle */ -UNIV_INTERN -os_event_t -os_event_create(void) -/*==================*/ +os_event_create(os_event_t event) { - os_event_t event; - #ifdef __WIN__ if(!srv_use_native_conditions) { - event = static_cast<os_event_t>(ut_malloc(sizeof(*event))); - event->handle = CreateEvent(NULL, TRUE, FALSE, NULL); if (!event->handle) { fprintf(stderr, @@ -367,8 +296,6 @@ os_event_create(void) } else /* Windows with condition variables */ #endif { - event = static_cast<os_event_t>(ut_malloc(sizeof *event)); - #ifndef PFS_SKIP_EVENT_MUTEX os_fast_mutex_init(event_os_mutex_key, &event->os_mutex); #else @@ -377,32 +304,25 @@ os_event_create(void) os_cond_init(&(event->cond_var)); - event->is_set = FALSE; - - /* We return this value in os_event_reset(), which can then be - be used to pass to the os_event_wait_low(). The value of zero - is reserved in os_event_wait_low() for the case when the - caller does not want to pass any signal_count value. To - distinguish between the two cases we initialize signal_count - to 1 here. */ - event->signal_count = 1; + event->init_count_and_set(); } - /* The os_sync_mutex can be NULL because during startup an event - can be created [ because it's embedded in the mutex/rwlock ] before - this module has been initialized */ - if (os_sync_mutex != NULL) { - os_mutex_enter(os_sync_mutex); - } - - /* Put to the list of events */ - UT_LIST_ADD_FIRST(os_event_list, os_event_list, event); + os_atomic_increment_ulint(&os_event_count, 1); +} - os_event_count++; +/*********************************************************//** +Creates an event semaphore, i.e., a semaphore which may just have two +states: signaled and nonsignaled. The created event is manual reset: it +must be reset explicitly by calling sync_os_reset_event. +@return the event handle */ +UNIV_INTERN +os_event_t +os_event_create(void) +/*==================*/ +{ + os_event_t event = static_cast<os_event_t>(ut_malloc(sizeof(*event)));; - if (os_sync_mutex != NULL) { - os_mutex_exit(os_sync_mutex); - } + os_event_create(event); return(event); } @@ -427,11 +347,11 @@ os_event_set( os_fast_mutex_lock(&(event->os_mutex)); - if (event->is_set) { + if (UNIV_UNLIKELY(event->is_set())) { /* Do nothing */ } else { - event->is_set = TRUE; - event->signal_count += 1; + event->set(); + event->inc_signal_count(); os_cond_broadcast(&(event->cond_var)); } @@ -465,55 +385,26 @@ os_event_reset( os_fast_mutex_lock(&(event->os_mutex)); - if (!event->is_set) { + if (UNIV_UNLIKELY(!event->is_set())) { /* Do nothing */ } else { - event->is_set = FALSE; + event->reset(); } - ret = event->signal_count; + ret = event->signal_count(); os_fast_mutex_unlock(&(event->os_mutex)); return(ret); } /**********************************************************//** -Frees an event object, without acquiring the global lock. */ -static -void -os_event_free_internal( -/*===================*/ - os_event_t event) /*!< in: event to free */ -{ -#ifdef __WIN__ - if(!srv_use_native_conditions) { - ut_a(event); - ut_a(CloseHandle(event->handle)); - } else -#endif - { - ut_a(event); - - /* This is to avoid freeing the mutex twice */ - os_fast_mutex_free(&(event->os_mutex)); - - os_cond_destroy(&(event->cond_var)); - } - - /* Remove from the list of events */ - UT_LIST_REMOVE(os_event_list, os_event_list, event); - - os_event_count--; - - ut_free(event); -} - -/**********************************************************//** Frees an event object. */ UNIV_INTERN void os_event_free( /*==========*/ - os_event_t event) /*!< in: event to free */ + os_event_t event, /*!< in: event to free */ + bool free_memory)/*!< in: if true, deallocate the event + memory block too */ { ut_a(event); @@ -528,16 +419,10 @@ os_event_free( os_cond_destroy(&(event->cond_var)); } - /* Remove from the list of events */ - os_mutex_enter(os_sync_mutex); + os_atomic_decrement_ulint(&os_event_count, 1); - UT_LIST_REMOVE(os_event_list, os_event_list, event); - - os_event_count--; - - os_mutex_exit(os_sync_mutex); - - ut_free(event); + if (free_memory) + ut_free(event); } /**********************************************************//** @@ -585,10 +470,10 @@ os_event_wait_low( os_fast_mutex_lock(&event->os_mutex); if (!reset_sig_count) { - reset_sig_count = event->signal_count; + reset_sig_count = event->signal_count(); } - while (!event->is_set && event->signal_count == reset_sig_count) { + while (!event->is_set() && event->signal_count() == reset_sig_count) { os_cond_wait(&(event->cond_var), &(event->os_mutex)); /* Solaris manual said that spurious wakeups may occur: we @@ -686,11 +571,12 @@ os_event_wait_time_low( os_fast_mutex_lock(&event->os_mutex); if (!reset_sig_count) { - reset_sig_count = event->signal_count; + reset_sig_count = event->signal_count(); } do { - if (event->is_set || event->signal_count != reset_sig_count) { + if (event->is_set() + || event->signal_count() != reset_sig_count) { break; } @@ -734,18 +620,7 @@ os_mutex_create(void) mutex_str->count = 0; mutex_str->event = os_event_create(); - if (UNIV_LIKELY(os_sync_mutex_inited)) { - /* When creating os_sync_mutex itself we cannot reserve it */ - os_mutex_enter(os_sync_mutex); - } - - UT_LIST_ADD_FIRST(os_mutex_list, os_mutex_list, mutex_str); - - os_mutex_count++; - - if (UNIV_LIKELY(os_sync_mutex_inited)) { - os_mutex_exit(os_sync_mutex); - } + os_atomic_increment_ulint(&os_mutex_count, 1); return(mutex_str); } @@ -791,21 +666,9 @@ os_mutex_free( { ut_a(mutex); - if (UNIV_LIKELY(!os_sync_free_called)) { - os_event_free_internal(mutex->event); - } - - if (UNIV_LIKELY(os_sync_mutex_inited)) { - os_mutex_enter(os_sync_mutex); - } - - UT_LIST_REMOVE(os_mutex_list, os_mutex_list, mutex); + os_event_free(mutex->event); - os_mutex_count--; - - if (UNIV_LIKELY(os_sync_mutex_inited)) { - os_mutex_exit(os_sync_mutex); - } + os_atomic_decrement_ulint(&os_mutex_count, 1); os_fast_mutex_free(static_cast<os_fast_mutex_t*>(mutex->handle)); ut_free(mutex->handle); @@ -827,18 +690,7 @@ os_fast_mutex_init_func( #else ut_a(0 == pthread_mutex_init(fast_mutex, MY_MUTEX_INIT_FAST)); #endif - if (UNIV_LIKELY(os_sync_mutex_inited)) { - /* When creating os_sync_mutex itself (in Unix) we cannot - reserve it */ - - os_mutex_enter(os_sync_mutex); - } - - os_fast_mutex_count++; - - if (UNIV_LIKELY(os_sync_mutex_inited)) { - os_mutex_exit(os_sync_mutex); - } + os_atomic_increment_ulint(&os_fast_mutex_count, 1); } /**********************************************************//** @@ -900,17 +752,6 @@ os_fast_mutex_free_func( putc('\n', stderr); } #endif - if (UNIV_LIKELY(os_sync_mutex_inited)) { - /* When freeing the last mutexes, we have - already freed os_sync_mutex */ - - os_mutex_enter(os_sync_mutex); - } - ut_ad(os_fast_mutex_count > 0); - os_fast_mutex_count--; - - if (UNIV_LIKELY(os_sync_mutex_inited)) { - os_mutex_exit(os_sync_mutex); - } + os_atomic_decrement_ulint(&os_fast_mutex_count, 1); } diff --git a/storage/xtradb/os/os0thread.cc b/storage/xtradb/os/os0thread.cc index a862022693c..1d417f9823c 100644 --- a/storage/xtradb/os/os0thread.cc +++ b/storage/xtradb/os/os0thread.cc @@ -145,9 +145,7 @@ os_thread_create_func( os_thread_t thread; DWORD win_thread_id; - os_mutex_enter(os_sync_mutex); - os_thread_count++; - os_mutex_exit(os_sync_mutex); + os_atomic_increment_ulint(&os_thread_count, 1); thread = CreateThread(NULL, /* no security attributes */ 0, /* default size stack */ @@ -186,9 +184,8 @@ os_thread_create_func( exit(1); } #endif - os_mutex_enter(os_sync_mutex); - os_thread_count++; - os_mutex_exit(os_sync_mutex); + ulint new_count = os_atomic_increment_ulint(&os_thread_count, 1); + ut_a(new_count <= OS_THREAD_MAX_N); #ifdef UNIV_HPUX10 ret = pthread_create(&pthread, pthread_attr_default, func, arg); @@ -205,8 +202,6 @@ os_thread_create_func( pthread_attr_destroy(&attr); #endif - ut_a(os_thread_count <= OS_THREAD_MAX_N); - if (thread_id) { *thread_id = pthread; } @@ -233,9 +228,7 @@ os_thread_exit( pfs_delete_thread(); #endif - os_mutex_enter(os_sync_mutex); - os_thread_count--; - os_mutex_exit(os_sync_mutex); + os_atomic_decrement_ulint(&os_thread_count, 1); #ifdef __WIN__ ExitThread((DWORD) exit_value); diff --git a/storage/xtradb/row/row0merge.cc b/storage/xtradb/row/row0merge.cc index ecf7e5bb116..48c165bbc54 100644 --- a/storage/xtradb/row/row0merge.cc +++ b/storage/xtradb/row/row0merge.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2005, 2015, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2005, 2016, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -1992,7 +1992,8 @@ wait_again: if (max_doc_id && err == DB_SUCCESS) { /* Sync fts cache for other fts indexes to keep all fts indexes consistent in sync_doc_id. */ - err = fts_sync_table(const_cast<dict_table_t*>(new_table)); + err = fts_sync_table(const_cast<dict_table_t*>(new_table), + false, true); if (err == DB_SUCCESS) { fts_update_next_doc_id( diff --git a/storage/xtradb/srv/srv0conc.cc b/storage/xtradb/srv/srv0conc.cc index 6c15753246a..63268e3a266 100644 --- a/storage/xtradb/srv/srv0conc.cc +++ b/storage/xtradb/srv/srv0conc.cc @@ -158,6 +158,10 @@ srv_conc_free(void) { #ifndef HAVE_ATOMIC_BUILTINS os_fast_mutex_free(&srv_conc_mutex); + + for (ulint i = 0; i < OS_THREAD_MAX_N; i++) + os_event_free(srv_conc_slots[i].event); + mem_free(srv_conc_slots); srv_conc_slots = NULL; #endif /* !HAVE_ATOMIC_BUILTINS */ diff --git a/storage/xtradb/srv/srv0srv.cc b/storage/xtradb/srv/srv0srv.cc index 2b086a2fb1f..14d272ac4c0 100644 --- a/storage/xtradb/srv/srv0srv.cc +++ b/storage/xtradb/srv/srv0srv.cc @@ -319,8 +319,6 @@ UNIV_INTERN ulong srv_read_ahead_threshold = 56; #ifdef UNIV_LOG_ARCHIVE UNIV_INTERN ibool srv_log_archive_on = FALSE; -UNIV_INTERN ibool srv_archive_recovery = 0; -UNIV_INTERN ib_uint64_t srv_archive_recovery_limit_lsn; #endif /* UNIV_LOG_ARCHIVE */ /* This parameter is used to throttle the number of insert buffers that are @@ -711,6 +709,10 @@ struct srv_sys_t{ srv_stats_t::ulint_ctr_1_t activity_count; /*!< For tracking server activity */ + srv_stats_t::ulint_ctr_1_t + ibuf_merge_activity_count;/*!< For tracking change + buffer merge activity, a subset + of overall server activity */ }; #ifndef HAVE_ATOMIC_BUILTINS @@ -1101,8 +1103,9 @@ srv_init(void) srv_checkpoint_completed_event = os_event_create(); + srv_redo_log_tracked_event = os_event_create(); + if (srv_track_changed_pages) { - srv_redo_log_tracked_event = os_event_create(); os_event_set(srv_redo_log_tracked_event); } @@ -1144,17 +1147,30 @@ srv_free(void) { srv_conc_free(); - /* The mutexes srv_sys->mutex and srv_sys->tasks_mutex should have - been freed by sync_close() already. */ - mem_free(srv_sys); - srv_sys = NULL; + if (!srv_read_only_mode) { - trx_i_s_cache_free(trx_i_s_cache); + for (ulint i = 0; i < srv_sys->n_sys_threads; i++) + os_event_free(srv_sys->sys_threads[i].event); - if (!srv_read_only_mode) { + os_event_free(srv_error_event); + os_event_free(srv_monitor_event); os_event_free(srv_buf_dump_event); - srv_buf_dump_event = NULL; + os_event_free(srv_checkpoint_completed_event); + os_event_free(srv_redo_log_tracked_event); + mutex_free(&srv_sys->mutex); + mutex_free(&srv_sys->tasks_mutex); } + +#ifndef HAVE_ATOMIC_BUILTINS + mutex_free(&server_mutex); +#endif + mutex_free(&srv_innodb_monitor_mutex); + mutex_free(&page_zip_stat_per_index_mutex); + + mem_free(srv_sys); + srv_sys = NULL; + + trx_i_s_cache_free(trx_i_s_cache); } /*********************************************************************//** @@ -2195,10 +2211,15 @@ rescan_idle: Increment the server activity count. */ UNIV_INTERN void -srv_inc_activity_count(void) -/*========================*/ +srv_inc_activity_count( +/*===================*/ + bool ibuf_merge_activity) /*!< whether this activity bump + is caused by the background + change buffer merge */ { srv_sys->activity_count.inc(); + if (ibuf_merge_activity) + srv_sys->ibuf_merge_activity_count.inc(); } /**********************************************************************//** @@ -2314,7 +2335,7 @@ DECLARE_THREAD(srv_redo_log_follow_thread)( /* TODO: sync with I_S log tracking status? */ ib_logf(IB_LOG_LEVEL_ERROR, "log tracking bitmap write failed, " - "stopping log tracking thread!\n"); + "stopping log tracking thread!"); break; } os_event_set(srv_redo_log_tracked_event); @@ -2356,7 +2377,7 @@ purge_archived_logs( if (!dir) { ib_logf(IB_LOG_LEVEL_WARN, "opening archived log directory %s failed. " - "Purge archived logs are not available\n", + "Purge archived logs are not available", srv_arch_dir); /* failed to open directory */ return(DB_ERROR); @@ -2444,7 +2465,7 @@ purge_archived_logs( archived_log_filename)) { ib_logf(IB_LOG_LEVEL_WARN, - "can't delete archived log file %s.\n", + "can't delete archived log file %s.", archived_log_filename); mutex_exit(&log_sys->mutex); @@ -2552,16 +2573,49 @@ srv_get_activity_count(void) return(srv_sys->activity_count); } +/** Get current server ibuf merge activity count. +@return ibuf merge activity count */ +static +ulint +srv_get_ibuf_merge_activity_count(void) +{ + return(srv_sys->ibuf_merge_activity_count); +} + /*******************************************************************//** -Check if there has been any activity. +Check if there has been any activity. Considers background change buffer +merge as regular server activity unless a non-default +old_ibuf_merge_activity_count value is passed, in which case the merge will be +treated as keeping server idle. @return FALSE if no change in activity counter. */ UNIV_INTERN ibool srv_check_activity( /*===============*/ - ulint old_activity_count) /*!< in: old activity count */ + ulint old_activity_count, /*!< in: old activity count */ + /*!< old change buffer merge + activity count, or + ULINT_UNDEFINED */ + ulint old_ibuf_merge_activity_count) { - return(srv_sys->activity_count != old_activity_count); + ulint new_activity_count = srv_sys->activity_count; + if (old_ibuf_merge_activity_count == ULINT_UNDEFINED) + return(new_activity_count != old_activity_count); + + /* If we care about ibuf merge activity, then the server is considered + idle if all activity, if any, was due to ibuf merge. */ + ulint new_ibuf_merge_activity_count + = srv_sys->ibuf_merge_activity_count; + + ut_ad(new_ibuf_merge_activity_count <= new_activity_count); + ut_ad(new_ibuf_merge_activity_count >= old_ibuf_merge_activity_count); + ut_ad(new_activity_count >= old_activity_count); + + ulint ibuf_merge_activity_delta = + new_ibuf_merge_activity_count - old_ibuf_merge_activity_count; + ulint activity_delta = new_activity_count - old_activity_count; + + return (activity_delta > ibuf_merge_activity_delta); } /********************************************************************//** @@ -2919,6 +2973,8 @@ DECLARE_THREAD(srv_master_thread)( { srv_slot_t* slot; ulint old_activity_count = srv_get_activity_count(); + ulint old_ibuf_merge_activity_count + = srv_get_ibuf_merge_activity_count(); ib_time_t last_print_time; ut_ad(!srv_read_only_mode); @@ -2956,8 +3012,12 @@ loop: srv_current_thread_priority = srv_master_thread_priority; - if (srv_check_activity(old_activity_count)) { + if (srv_check_activity(old_activity_count, + old_ibuf_merge_activity_count)) { + old_activity_count = srv_get_activity_count(); + old_ibuf_merge_activity_count + = srv_get_ibuf_merge_activity_count(); srv_master_do_active_tasks(); } else { srv_master_do_idle_tasks(); diff --git a/storage/xtradb/srv/srv0start.cc b/storage/xtradb/srv/srv0start.cc index f4d404e43a5..20a5e5e80f6 100644 --- a/storage/xtradb/srv/srv0start.cc +++ b/storage/xtradb/srv/srv0start.cc @@ -2409,40 +2409,6 @@ files_checked: create_log_files_rename(logfilename, dirnamelen, max_flushed_lsn, logfile0); -#ifdef UNIV_LOG_ARCHIVE - } else if (srv_archive_recovery) { - - ib_logf(IB_LOG_LEVEL_INFO, - " Starting archive recovery from a backup..."); - - err = recv_recovery_from_archive_start( - min_flushed_lsn, srv_archive_recovery_limit_lsn, - min_arch_log_no); - if (err != DB_SUCCESS) { - - return(DB_ERROR); - } - /* Since ibuf init is in dict_boot, and ibuf is needed - in any disk i/o, first call dict_boot */ - - err = dict_boot(); - - if (err != DB_SUCCESS) { - return(err); - } - - ib_bh = trx_sys_init_at_db_start(); - n_recovered_trx = UT_LIST_GET_LEN(trx_sys->rw_trx_list); - - /* The purge system needs to create the purge view and - therefore requires that the trx_sys is inited. */ - - trx_purge_sys_create(srv_n_purge_threads, ib_bh); - - srv_startup_is_before_trx_rollback_phase = FALSE; - - recv_recovery_from_archive_finish(); -#endif /* UNIV_LOG_ARCHIVE */ } else { /* Check if we support the max format that is stamped @@ -3058,8 +3024,7 @@ innobase_shutdown_for_mysql(void) logs_empty_and_mark_files_at_shutdown() and should have already quit or is quitting right now. */ - os_mutex_enter(os_sync_mutex); - + os_rmb; if (os_thread_count == 0) { /* All the threads have exited or are just exiting; NOTE that the threads may not have completed their @@ -3069,15 +3034,11 @@ innobase_shutdown_for_mysql(void) os_thread_exit(). Now we just sleep 0.1 seconds and hope that is enough! */ - os_mutex_exit(os_sync_mutex); - os_thread_sleep(100000); break; } - os_mutex_exit(os_sync_mutex); - os_thread_sleep(100000); } @@ -3138,26 +3099,23 @@ innobase_shutdown_for_mysql(void) que_close(); row_mysql_close(); srv_mon_free(); - sync_close(); srv_free(); fil_close(); - /* 4. Free the os_conc_mutex and all os_events and os_mutexes */ - - os_sync_free(); - - /* 5. Free all allocated memory */ + /* 4. Free all allocated memory */ pars_lexer_close(); log_mem_free(); buf_pool_free(srv_buf_pool_instances); mem_close(); + sync_close(); /* ut_free_all_mem() frees all allocated memory not freed yet in shutdown, and it will also free the ut_list_mutex, so it should be the last one for all operation */ ut_free_all_mem(); + os_rmb; if (os_thread_count != 0 || os_event_count != 0 || os_mutex_count != 0 diff --git a/storage/xtradb/sync/sync0arr.cc b/storage/xtradb/sync/sync0arr.cc index 0b01b0836b0..c2eb4543fb2 100644 --- a/storage/xtradb/sync/sync0arr.cc +++ b/storage/xtradb/sync/sync0arr.cc @@ -295,21 +295,21 @@ sync_cell_get_event( ulint type = cell->request_type; if (type == SYNC_MUTEX) { - return(((ib_mutex_t*) cell->wait_object)->event); + return(&((ib_mutex_t*) cell->wait_object)->event); } else if (type == SYNC_PRIO_MUTEX) { - return(((ib_prio_mutex_t*) cell->wait_object) + return(&((ib_prio_mutex_t*) cell->wait_object) ->high_priority_event); } else if (type == RW_LOCK_WAIT_EX) { - return(((rw_lock_t*) cell->wait_object)->wait_ex_event); + return(&((rw_lock_t*) cell->wait_object)->wait_ex_event); } else if (type == PRIO_RW_LOCK_SHARED) { - return(((prio_rw_lock_t *) cell->wait_object) + return(&((prio_rw_lock_t *) cell->wait_object) ->high_priority_s_event); } else if (type == PRIO_RW_LOCK_EX) { - return(((prio_rw_lock_t *) cell->wait_object) + return(&((prio_rw_lock_t *) cell->wait_object) ->high_priority_x_event); } else { /* RW_LOCK_SHARED and RW_LOCK_EX wait on the same event */ ut_ad(type == RW_LOCK_SHARED || type == RW_LOCK_EX); - return(((rw_lock_t*) cell->wait_object)->event); + return(&((rw_lock_t*) cell->wait_object)->event); } } diff --git a/storage/xtradb/sync/sync0rw.cc b/storage/xtradb/sync/sync0rw.cc index a72730e1877..7e964fd510f 100644 --- a/storage/xtradb/sync/sync0rw.cc +++ b/storage/xtradb/sync/sync0rw.cc @@ -264,8 +264,8 @@ rw_lock_create_func( lock->last_x_file_name = "not yet reserved"; lock->last_s_line = 0; lock->last_x_line = 0; - lock->event = os_event_create(); - lock->wait_ex_event = os_event_create(); + os_event_create(&lock->event); + os_event_create(&lock->wait_ex_event); mutex_enter(&rw_lock_list_mutex); @@ -306,9 +306,9 @@ rw_lock_create_func( #endif cmutex_name); lock->high_priority_s_waiters = 0; - lock->high_priority_s_event = os_event_create(); + os_event_create(&lock->high_priority_s_event); lock->high_priority_x_waiters = 0; - lock->high_priority_x_event = os_event_create(); + os_event_create(&lock->high_priority_x_event); lock->high_priority_wait_ex_waiter = 0; } @@ -336,9 +336,9 @@ rw_lock_free_func( mutex = rw_lock_get_mutex(lock); #endif /* !INNODB_RW_LOCKS_USE_ATOMICS */ - os_event_free(lock->event); + os_event_free(&lock->event, false); - os_event_free(lock->wait_ex_event); + os_event_free(&lock->wait_ex_event, false); ut_ad(UT_LIST_GET_PREV(list, lock) == NULL || UT_LIST_GET_PREV(list, lock)->magic_n == RW_LOCK_MAGIC_N); @@ -368,8 +368,8 @@ rw_lock_free_func( /*==============*/ prio_rw_lock_t* lock) /*!< in: rw-lock */ { - os_event_free(lock->high_priority_s_event); - os_event_free(lock->high_priority_x_event); + os_event_free(&lock->high_priority_s_event, false); + os_event_free(&lock->high_priority_x_event, false); rw_lock_free_func(&lock->base_lock); } diff --git a/storage/xtradb/sync/sync0sync.cc b/storage/xtradb/sync/sync0sync.cc index 7fc992bf972..fe50e17f106 100644 --- a/storage/xtradb/sync/sync0sync.cc +++ b/storage/xtradb/sync/sync0sync.cc @@ -209,10 +209,7 @@ UNIV_INTERN mysql_pfs_key_t sync_thread_mutex_key; /** Global list of database mutexes (not OS mutexes) created. */ UNIV_INTERN ut_list_base_node_t mutex_list; -/** Global list of priority mutexes. A subset of mutex_list */ -UNIV_INTERN UT_LIST_BASE_NODE_T(ib_prio_mutex_t) prio_mutex_list; - -/** Mutex protecting the mutex_list and prio_mutex_list variables */ +/** Mutex protecting the mutex_list variable */ UNIV_INTERN ib_mutex_t mutex_list_mutex; #ifdef UNIV_PFS_MUTEX @@ -283,7 +280,7 @@ mutex_create_func( os_fast_mutex_init(PFS_NOT_INSTRUMENTED, &mutex->os_fast_mutex); mutex->lock_word = 0; #endif - mutex->event = os_event_create(); + os_event_create(&mutex->event); mutex_set_waiters(mutex, 0); #ifdef UNIV_DEBUG mutex->magic_n = MUTEX_MAGIC_N; @@ -355,11 +352,7 @@ mutex_create_func( #endif /* UNIV_DEBUG */ cmutex_name); mutex->high_priority_waiters = 0; - mutex->high_priority_event = os_event_create(); - - mutex_enter(&mutex_list_mutex); - UT_LIST_ADD_FIRST(list, prio_mutex_list, mutex); - mutex_exit(&mutex_list_mutex); + os_event_create(&mutex->high_priority_event); } /******************************************************************//** @@ -406,7 +399,7 @@ mutex_free_func( mutex_exit(&mutex_list_mutex); } - os_event_free(mutex->event); + os_event_free(&mutex->event, false); #ifdef UNIV_MEM_DEBUG func_exit: #endif /* UNIV_MEM_DEBUG */ @@ -433,12 +426,8 @@ mutex_free_func( /*============*/ ib_prio_mutex_t* mutex) /*!< in: mutex */ { - mutex_enter(&mutex_list_mutex); - UT_LIST_REMOVE(list, prio_mutex_list, mutex); - mutex_exit(&mutex_list_mutex); - ut_a(mutex->high_priority_waiters == 0); - os_event_free(mutex->high_priority_event); + os_event_free(&mutex->high_priority_event, false); mutex_free_func(&mutex->base_mutex); } @@ -703,7 +692,7 @@ mutex_signal_object( /* The memory order of resetting the waiters field and signaling the object is important. See LEMMA 1 above. */ - os_event_set(mutex->event); + os_event_set(&mutex->event); sync_array_object_signalled(); } @@ -1584,7 +1573,6 @@ sync_init(void) /* Init the mutex list and create the mutex to protect it. */ UT_LIST_INIT(mutex_list); - UT_LIST_INIT(prio_mutex_list); mutex_create(mutex_list_mutex_key, &mutex_list_mutex, SYNC_NO_ORDER_CHECK); #ifdef UNIV_SYNC_DEBUG @@ -1636,22 +1624,21 @@ sync_thread_level_arrays_free(void) #endif /* UNIV_SYNC_DEBUG */ /******************************************************************//** -Frees the resources in InnoDB's own synchronization data structures. Use -os_sync_free() after calling this. */ +Frees the resources in InnoDB's own synchronization data structures. */ UNIV_INTERN void sync_close(void) /*===========*/ { ib_mutex_t* mutex; - ib_prio_mutex_t* prio_mutex; sync_array_close(); - for (prio_mutex = UT_LIST_GET_FIRST(prio_mutex_list); prio_mutex;) { - mutex_free(prio_mutex); - prio_mutex = UT_LIST_GET_FIRST(prio_mutex_list); - } +#ifdef UNIV_SYNC_DEBUG + os_event_free(rw_lock_debug_event); + mutex_free(&rw_lock_debug_mutex); +#endif + mutex_free(&rw_lock_list_mutex); for (mutex = UT_LIST_GET_FIRST(mutex_list); mutex != NULL; @@ -1669,7 +1656,6 @@ sync_close(void) mutex = UT_LIST_GET_FIRST(mutex_list); } - mutex_free(&mutex_list_mutex); #ifdef UNIV_SYNC_DEBUG mutex_free(&sync_thread_mutex); @@ -1679,6 +1665,8 @@ sync_close(void) sync_thread_level_arrays_free(); #endif /* UNIV_SYNC_DEBUG */ + mutex_free(&mutex_list_mutex); + sync_initialized = FALSE; } diff --git a/storage/xtradb/trx/trx0i_s.cc b/storage/xtradb/trx/trx0i_s.cc index 794ee432ca4..fe1a615693b 100644 --- a/storage/xtradb/trx/trx0i_s.cc +++ b/storage/xtradb/trx/trx0i_s.cc @@ -1466,6 +1466,8 @@ trx_i_s_cache_free( /*===============*/ trx_i_s_cache_t* cache) /*!< in, own: cache to free */ { + rw_lock_free(&cache->rw_lock); + mutex_free(&cache->last_read_mutex); hash_table_free(cache->locks_hash); ha_storage_free(cache->storage); table_cache_free(&cache->innodb_trx); |