diff options
author | Marko Mäkelä <marko.makela@mariadb.com> | 2019-07-25 18:42:06 +0300 |
---|---|---|
committer | Marko Mäkelä <marko.makela@mariadb.com> | 2019-07-25 18:42:06 +0300 |
commit | e9c1701e11e2441435223cc7c00c467f58aaff19 (patch) | |
tree | 6be7d0e8fe87272e1abb2704fdb9859481d8acc2 /storage/innobase | |
parent | 17794fb9aac9ca4fcb0b1e5904671a025a1b6b74 (diff) | |
parent | f3eb82f048d342c11fc3869eca2e6faed9a4835d (diff) | |
download | mariadb-git-e9c1701e11e2441435223cc7c00c467f58aaff19.tar.gz |
Merge 10.3 into 10.4
Diffstat (limited to 'storage/innobase')
62 files changed, 662 insertions, 1106 deletions
diff --git a/storage/innobase/CMakeLists.txt b/storage/innobase/CMakeLists.txt index dfa6e032176..cbd280af223 100644 --- a/storage/innobase/CMakeLists.txt +++ b/storage/innobase/CMakeLists.txt @@ -144,8 +144,7 @@ SET(INNOBASE_SOURCES ut/ut0rnd.cc ut/ut0ut.cc ut/ut0vec.cc - ut/ut0wqueue.cc - ut/ut0timer.cc) + ut/ut0wqueue.cc) MYSQL_ADD_PLUGIN(innobase ${INNOBASE_SOURCES} STORAGE_ENGINE MODULE_OUTPUT_NAME ha_innodb diff --git a/storage/innobase/btr/btr0defragment.cc b/storage/innobase/btr/btr0defragment.cc index 369a0bf6181..6a1d23fb472 100644 --- a/storage/innobase/btr/btr0defragment.cc +++ b/storage/innobase/btr/btr0defragment.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (C) 2013, 2014 Facebook, Inc. All Rights Reserved. +Copyright (C) 2012, 2014 Facebook, Inc. All Rights Reserved. Copyright (C) 2014, 2019, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under @@ -36,7 +36,6 @@ Modified 30/07/2014 Jan Lindström jan.lindstrom@mariadb.com #include "ibuf0ibuf.h" #include "lock0lock.h" #include "srv0start.h" -#include "ut0timer.h" #include <list> @@ -100,8 +99,7 @@ Initialize defragmentation. */ void btr_defragment_init() { - srv_defragment_interval = ut_microseconds_to_timer( - (ulonglong) (1000000.0 / srv_defragment_frequency)); + srv_defragment_interval = 1000000000ULL / srv_defragment_frequency; mutex_create(LATCH_ID_BTR_DEFRAGMENT_MUTEX, &btr_defragment_mutex); } @@ -728,7 +726,7 @@ DECLARE_THREAD(btr_defragment_thread)(void*) } pcur = item->pcur; - ulonglong now = ut_timer_now(); + ulonglong now = my_interval_timer(); ulonglong elapsed = now - item->last_processed; if (elapsed < srv_defragment_interval) { @@ -738,11 +736,12 @@ DECLARE_THREAD(btr_defragment_thread)(void*) defragmentation of all indices queue up on a single thread, it's likely other indices that follow this one don't need to sleep again. */ - os_thread_sleep(((ulint)ut_timer_to_microseconds( - srv_defragment_interval - elapsed))); + os_thread_sleep(static_cast<ulint> + ((srv_defragment_interval - elapsed) + / 1000)); } - now = ut_timer_now(); + now = my_interval_timer(); mtr_start(&mtr); cursor = btr_pcur_get_btr_cur(pcur); index = btr_cur_get_index(cursor); diff --git a/storage/innobase/buf/buf0buddy.cc b/storage/innobase/buf/buf0buddy.cc index e8c3ab7f02c..7a7c3189add 100644 --- a/storage/innobase/buf/buf0buddy.cc +++ b/storage/innobase/buf/buf0buddy.cc @@ -635,7 +635,7 @@ buf_buddy_relocate( if (buf_page_can_relocate(bpage)) { /* Relocate the compressed page. */ - uintmax_t usec = ut_time_us(NULL); + const ulonglong ns = my_interval_timer(); ut_a(bpage->zip.data == src); @@ -651,7 +651,7 @@ buf_buddy_relocate( buf_buddy_stat_t* buddy_stat = &buf_pool->buddy_stat[i]; buddy_stat->relocated++; - buddy_stat->relocated_usec += ut_time_us(NULL) - usec; + buddy_stat->relocated_usec+= (my_interval_timer() - ns) / 1000; return(true); } diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc index c270a709f2a..0701ec31f5f 100644 --- a/storage/innobase/buf/buf0buf.cc +++ b/storage/innobase/buf/buf0buf.cc @@ -1132,25 +1132,30 @@ buf_page_is_corrupted( /* A page filled with NUL bytes is considered not corrupted. The FIL_PAGE_FILE_FLUSH_LSN field may be written nonzero for the first page of each file of the system tablespace. - Ignore it for the system tablespace. */ + We want to ignore it for the system tablespace, but because + we do not know the expected tablespace here, we ignore the + field for all data files, except for + innodb_checksum_algorithm=full_crc32 which we handled above. */ if (!checksum_field1 && !checksum_field2) { - ulint i = 0; - do { - if (read_buf[i]) { - return true; + /* Checksum fields can have valid value as zero. + If the page is not empty then do the checksum + calculation for the page. */ + bool all_zeroes = true; + for (size_t i = 0; i < srv_page_size; i++) { +#ifndef UNIV_INNOCHECKSUM + if (i == FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION) { + i += 8; } - } while (++i < FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION); - - /* Ignore FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION unless - innodb_checksum_algorithm=full_crc32. */ - i += 8; - - do { +#endif if (read_buf[i]) { - return true; + all_zeroes = false; + break; } - } while (++i < srv_page_size); - return false; + } + + if (all_zeroes) { + return false; + } } switch (curr_algo) { @@ -1991,7 +1996,7 @@ buf_pool_init_instance( buf_pool->zip_hash = hash_create(2 * buf_pool->curr_size); - buf_pool->last_printout_time = ut_time(); + buf_pool->last_printout_time = time(NULL); } /* 2. Initialize flushing fields -------------------------------- */ @@ -2805,7 +2810,7 @@ buf_pool_resize() buf_resize_status("Withdrawing blocks to be shrunken."); - ib_time_t withdraw_started = ut_time(); + time_t withdraw_started = time(NULL); ulint message_interval = 60; ulint retry_interval = 1; @@ -2831,8 +2836,10 @@ withdraw_retry: /* abort buffer pool load */ buf_load_abort(); + const time_t current_time = time(NULL); + if (should_retry_withdraw - && ut_difftime(ut_time(), withdraw_started) >= message_interval) { + && difftime(current_time, withdraw_started) >= message_interval) { if (message_interval > 900) { message_interval = 1800; @@ -2848,8 +2855,7 @@ withdraw_retry: trx = UT_LIST_GET_NEXT(trx_list, trx)) { if (trx->state != TRX_STATE_NOT_STARTED && trx->mysql_thd != NULL - && ut_difftime(withdraw_started, - trx->start_time) > 0) { + && withdraw_started > trx->start_time) { if (!found) { ib::warn() << "The following trx might hold" @@ -2862,13 +2868,13 @@ withdraw_retry: } lock_trx_print_wait_and_mvcc_state( - stderr, trx); + stderr, trx, current_time); } } mutex_exit(&trx_sys.mutex); lock_mutex_exit(); - withdraw_started = ut_time(); + withdraw_started = current_time; } if (should_retry_withdraw) { @@ -6347,7 +6353,7 @@ void buf_refresh_io_stats( buf_pool_t* buf_pool) { - buf_pool->last_printout_time = ut_time(); + buf_pool->last_printout_time = time(NULL); buf_pool->old_stat = buf_pool->stat; } diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc index 373f6eb4539..65febb14a76 100644 --- a/storage/innobase/buf/buf0flu.cc +++ b/storage/innobase/buf/buf0flu.cc @@ -2402,7 +2402,7 @@ page_cleaner_flush_pages_recommendation( if (prev_lsn == 0) { /* First time around. */ prev_lsn = cur_lsn; - prev_time = ut_time(); + prev_time = time(NULL); return(0); } @@ -2412,7 +2412,7 @@ page_cleaner_flush_pages_recommendation( sum_pages += last_pages_in; - time_t curr_time = ut_time(); + time_t curr_time = time(NULL); double time_elapsed = difftime(curr_time, prev_time); /* We update our variables every srv_flushing_avg_loops diff --git a/storage/innobase/data/data0data.cc b/storage/innobase/data/data0data.cc index 02f921f716c..7cc9c2bc09e 100644 --- a/storage/innobase/data/data0data.cc +++ b/storage/innobase/data/data0data.cc @@ -601,7 +601,6 @@ dtuple_convert_big_rec( dfield_t* dfield; ulint size; ulint n_fields; - ulint local_len; ulint local_prefix_len; if (!dict_index_is_clust(index)) { @@ -612,6 +611,7 @@ dtuple_convert_big_rec( return NULL; } + ulint local_len = index->table->get_overflow_field_local_len(); const auto zip_size = index->table->space->zip_size(); ut_ad(index->n_uniq > 0); diff --git a/storage/innobase/dict/dict0defrag_bg.cc b/storage/innobase/dict/dict0defrag_bg.cc index 31fb05e59c6..7de50f19217 100644 --- a/storage/innobase/dict/dict0defrag_bg.cc +++ b/storage/innobase/dict/dict0defrag_bg.cc @@ -238,7 +238,6 @@ dict_stats_save_defrag_summary( dict_index_t* index) /*!< in: index */ { dberr_t ret=DB_SUCCESS; - lint now = (lint) ut_time(); if (dict_index_is_ibuf(index)) { return DB_SUCCESS; @@ -246,7 +245,7 @@ dict_stats_save_defrag_summary( dict_sys_lock(); - ret = dict_stats_save_index_stat(index, now, "n_pages_freed", + ret = dict_stats_save_index_stat(index, time(NULL), "n_pages_freed", index->stat_defrag_n_pages_freed, NULL, "Number of pages freed during" @@ -276,7 +275,7 @@ dict_stats_save_defrag_stats( return dict_stats_report_error(index->table, true); } - lint now = (lint) ut_time(); + const time_t now = time(NULL); mtr_t mtr; ulint n_leaf_pages; ulint n_leaf_reserved; diff --git a/storage/innobase/dict/dict0dict.cc b/storage/innobase/dict/dict0dict.cc index 2ba98a933e2..f6a0ac4f7dd 100644 --- a/storage/innobase/dict/dict0dict.cc +++ b/storage/innobase/dict/dict0dict.cc @@ -1768,20 +1768,10 @@ dict_col_name_is_reserved( return(FALSE); } -/****************************************************************//** -If a record of this index might not fit on a single B-tree page, -return TRUE. -@return TRUE if the index record could become too big */ -static -ibool -dict_index_too_big_for_tree( -/*========================*/ - const dict_table_t* table, /*!< in: table */ - const dict_index_t* new_index, /*!< in: index */ - bool strict) /*!< in: TRUE=report error if - records could be too big to - fit in an B-tree page */ +bool dict_index_t::rec_potentially_too_big(bool strict) const { + ut_ad(table); + ulint comp; ulint i; /* maximum possible storage size of a record */ @@ -1793,8 +1783,8 @@ dict_index_too_big_for_tree( /* FTS index consists of auxiliary tables, they shall be excluded from index row size check */ - if (new_index->type & DICT_FTS) { - return(false); + if (type & DICT_FTS) { + return false; } DBUG_EXECUTE_IF( @@ -1815,8 +1805,7 @@ dict_index_too_big_for_tree( an empty page, minus a byte for recoding the heap number in the page modification log. The maximum allowed node pointer size is half that. */ - page_rec_max = page_zip_empty_size(new_index->n_fields, - zip_size); + page_rec_max = page_zip_empty_size(n_fields, zip_size); if (page_rec_max) { page_rec_max--; } @@ -1844,25 +1833,24 @@ dict_index_too_big_for_tree( if (comp) { /* Include the "null" flags in the maximum possible record size. */ - rec_max_size += UT_BITS_IN_BYTES( - unsigned(new_index->n_nullable)); + rec_max_size += UT_BITS_IN_BYTES(unsigned(n_nullable)); } else { - /* For each column, include a 2-byte offset and a + /* For each column, include a 2-byte offset and a "null" flag. The 1-byte format is only used in short records that do not contain externally stored columns. Such records could never exceed the page limit, even when using the 2-byte format. */ - rec_max_size += 2 * unsigned(new_index->n_fields); + rec_max_size += 2 * unsigned(n_fields); } - /* Compute the maximum possible record size. */ - for (i = 0; i < new_index->n_fields; i++) { + const ulint max_local_len = table->get_overflow_field_local_len(); + + /* Compute the maximum possible record size. */ + for (i = 0; i < n_fields; i++) { const dict_field_t* field - = dict_index_get_nth_field(new_index, i); + = dict_index_get_nth_field(this, i); const dict_col_t* col = dict_field_get_col(field); - ulint field_max_size; - ulint field_ext_max_size; /* In dtuple_convert_big_rec(), variable-length columns that are longer than BTR_EXTERN_LOCAL_STORED_MAX_SIZE @@ -1876,26 +1864,28 @@ dict_index_too_big_for_tree( case in rec_get_converted_size_comp() for REC_STATUS_ORDINARY records. */ - field_max_size = dict_col_get_fixed_size(col, comp); + size_t field_max_size = dict_col_get_fixed_size(col, comp); if (field_max_size && field->fixed_len != 0) { /* dict_index_add_col() should guarantee this */ ut_ad(!field->prefix_len || field->fixed_len == field->prefix_len); /* Fixed lengths are not encoded in ROW_FORMAT=COMPACT. */ - field_ext_max_size = 0; goto add_field_size; } field_max_size = dict_col_get_max_size(col); - field_ext_max_size = field_max_size < 256 ? 1 : 2; if (field->prefix_len) { if (field->prefix_len < field_max_size) { field_max_size = field->prefix_len; } - } else if (field_max_size > BTR_EXTERN_LOCAL_STORED_MAX_SIZE - && dict_index_is_clust(new_index)) { + + // those conditions were copied from dtuple_convert_big_rec() + } else if (field_max_size > max_local_len + && field_max_size > BTR_EXTERN_LOCAL_STORED_MAX_SIZE + && DATA_BIG_COL(col) + && dict_index_is_clust(this)) { /* In the worst case, we have a locally stored column of BTR_EXTERN_LOCAL_STORED_MAX_SIZE bytes. @@ -1903,21 +1893,26 @@ dict_index_too_big_for_tree( column were stored externally, the lengths in the clustered index page would be BTR_EXTERN_FIELD_REF_SIZE and 2. */ - field_max_size = BTR_EXTERN_LOCAL_STORED_MAX_SIZE; - field_ext_max_size = 1; + field_max_size = max_local_len; } if (comp) { /* Add the extra size for ROW_FORMAT=COMPACT. For ROW_FORMAT=REDUNDANT, these bytes were added to rec_max_size before this loop. */ - rec_max_size += field_ext_max_size; + rec_max_size += field_max_size < 256 ? 1 : 2; } add_field_size: rec_max_size += field_max_size; /* Check the size limit on leaf pages. */ if (rec_max_size >= page_rec_max) { + // with 4k page size innodb_index_stats becomes too big + // this crutch allows server bootstrapping to continue + if (table->is_system_db) { + return false; + } + ib::error_or_warn(strict) << "Cannot add field " << field->name << " in table " << table->name @@ -1927,7 +1922,7 @@ add_field_size: " size (" << page_rec_max << ") for a record on index leaf page."; - return(TRUE); + return true; } /* Check the size limit on non-leaf pages. Records @@ -1936,14 +1931,14 @@ add_field_size: and a node pointer field. When we have processed the unique columns, rec_max_size equals the size of the node pointer record minus the node pointer column. */ - if (i + 1 == dict_index_get_n_unique_in_tree(new_index) + if (i + 1 == dict_index_get_n_unique_in_tree(this) && rec_max_size + REC_NODE_PTR_SIZE >= page_ptr_max) { - return(TRUE); + return true; } } - return(FALSE); + return false; } /** Adds an index to the dictionary cache, with possible indexing newly @@ -2013,7 +2008,7 @@ dict_index_add_to_cache( new_index->disable_ahi = index->disable_ahi; #endif - if (dict_index_too_big_for_tree(index->table, new_index, strict)) { + if (new_index->rec_potentially_too_big(strict)) { if (strict) { dict_mem_index_free(new_index); diff --git a/storage/innobase/dict/dict0stats.cc b/storage/innobase/dict/dict0stats.cc index 67a9d2de5e1..bd4bb261320 100644 --- a/storage/innobase/dict/dict0stats.cc +++ b/storage/innobase/dict/dict0stats.cc @@ -956,7 +956,7 @@ dict_stats_update_transient( table->stat_sum_of_other_index_sizes = sum_of_index_sizes - index->stat_index_size; - table->stats_last_recalc = ut_time(); + table->stats_last_recalc = time(NULL); table->stat_modified_counter = 0; @@ -2267,7 +2267,7 @@ dict_stats_update_persistent( += index->stat_index_size; } - table->stats_last_recalc = ut_time(); + table->stats_last_recalc = time(NULL); table->stat_modified_counter = 0; @@ -2296,7 +2296,7 @@ rolled back only in the case of error, but not freed. dberr_t dict_stats_save_index_stat( dict_index_t* index, - ib_time_t last_update, + time_t last_update, const char* stat_name, ib_uint64_t stat_value, ib_uint64_t* sample_size, @@ -2424,7 +2424,6 @@ dict_stats_save( const index_id_t* only_for_index) { pars_info_t* pinfo; - ib_time_t now; dberr_t ret; dict_table_t* table; char db_utf8[MAX_DB_UTF8_LEN]; @@ -2443,7 +2442,7 @@ dict_stats_save( dict_fs2utf8(table->name.m_name, db_utf8, sizeof(db_utf8), table_utf8, sizeof(table_utf8)); - now = ut_time(); + const time_t now = time(NULL); dict_sys_lock(); pinfo = pars_info_create(); diff --git a/storage/innobase/dict/dict0stats_bg.cc b/storage/innobase/dict/dict0stats_bg.cc index 6702a884dcf..2985b6faf35 100644 --- a/storage/innobase/dict/dict0stats_bg.cc +++ b/storage/innobase/dict/dict0stats_bg.cc @@ -393,14 +393,14 @@ dict_stats_process_entry_from_recalc_pool() mutex_exit(&dict_sys.mutex); - /* ut_time() could be expensive, the current function + /* time() could be expensive, the current function is called once every time a table has been changed more than 10% and on a system with lots of small tables, this could become hot. If we find out that this is a problem, then the check below could eventually be replaced with something else, though a time interval is the natural approach. */ - if (ut_difftime(ut_time(), table->stats_last_recalc) + if (difftime(time(NULL), table->stats_last_recalc) < MIN_RECALC_INTERVAL) { /* Stats were (re)calculated not long ago. To avoid diff --git a/storage/innobase/eval/eval0eval.cc b/storage/innobase/eval/eval0eval.cc index fc16b9defb5..577157d2eb9 100644 --- a/storage/innobase/eval/eval0eval.cc +++ b/storage/innobase/eval/eval0eval.cc @@ -1,6 +1,7 @@ /***************************************************************************** Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2019, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -814,7 +815,7 @@ eval_predefined( dfield_get_data(que_node_get_val(arg1))); } else if (func == PARS_SYSDATE_TOKEN) { - int_val = (lint) ut_time(); + int_val = (lint) time(NULL); } else { eval_predefined_2(func_node); diff --git a/storage/innobase/fil/fil0crypt.cc b/storage/innobase/fil/fil0crypt.cc index 837c8d0825e..50b73222607 100644 --- a/storage/innobase/fil/fil0crypt.cc +++ b/storage/innobase/fil/fil0crypt.cc @@ -1838,19 +1838,18 @@ fil_crypt_get_page_throttle_func( state->crypt_stat.pages_read_from_disk++; - uintmax_t start = ut_time_us(NULL); + const ulonglong start = my_interval_timer(); block = buf_page_get_gen(page_id, zip_size, RW_X_LATCH, NULL, BUF_GET_POSSIBLY_FREED, file, line, mtr, &err); - uintmax_t end = ut_time_us(NULL); - - if (end < start) { - end = start; // safety... - } + const ulonglong end = my_interval_timer(); state->cnt_waited++; - state->sum_waited_us += (end - start); + + if (end > start) { + state->sum_waited_us += (end - start) / 1000; + } /* average page load */ ulint add_sleeptime_ms = 0; @@ -2174,7 +2173,7 @@ fil_crypt_flush_space( bool success = false; ulint n_pages = 0; ulint sum_pages = 0; - uintmax_t start = ut_time_us(NULL); + const ulonglong start = my_interval_timer(); do { success = buf_flush_lists(ULINT_MAX, end_lsn, &n_pages); @@ -2182,11 +2181,11 @@ fil_crypt_flush_space( sum_pages += n_pages; } while (!success && !space->is_stopping()); - uintmax_t end = ut_time_us(NULL); + const ulonglong end = my_interval_timer(); if (sum_pages && end > start) { state->cnt_waited += sum_pages; - state->sum_waited_us += (end - start); + state->sum_waited_us += (end - start) / 1000; /* statistics */ state->crypt_stat.pages_flushed += sum_pages; diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc index 8a44aa83418..a8ac91a9903 100644 --- a/storage/innobase/fil/fil0fil.cc +++ b/storage/innobase/fil/fil0fil.cc @@ -2858,9 +2858,15 @@ fil_rename_tablespace( space->n_pending_ops--; ut_ad(space->name == old_space_name); ut_ad(node->name == old_file_name); - - bool success = os_file_rename( - innodb_data_file_key, old_file_name, new_file_name); + bool success; + DBUG_EXECUTE_IF("fil_rename_tablespace_failure_2", + goto skip_second_rename; ); + success = os_file_rename(innodb_data_file_key, + old_file_name, + new_file_name); + DBUG_EXECUTE_IF("fil_rename_tablespace_failure_2", +skip_second_rename: + success = false; ); ut_ad(node->name == old_file_name); diff --git a/storage/innobase/fts/fts0fts.cc b/storage/innobase/fts/fts0fts.cc index 37819e37426..246f1ce574c 100644 --- a/storage/innobase/fts/fts0fts.cc +++ b/storage/innobase/fts/fts0fts.cc @@ -74,8 +74,8 @@ ulong fts_min_token_size; // FIXME: testing -static ib_time_t elapsed_time = 0; -static ulint n_nodes = 0; +static time_t elapsed_time; +static ulint n_nodes; #ifdef FTS_CACHE_SIZE_DEBUG /** The cache size permissible lower limit (1K) */ @@ -194,15 +194,13 @@ FTS auxiliary INDEX table and clear the cache at the end. @param[in,out] sync sync state @param[in] unlock_cache whether unlock cache lock when write node @param[in] wait whether wait when a sync is in progress -@param[in] has_dict whether has dict operation lock @return DB_SUCCESS if all OK */ static dberr_t fts_sync( fts_sync_t* sync, bool unlock_cache, - bool wait, - bool has_dict); + bool wait); /****************************************************************//** Release all resources help by the words rb tree e.g., the node ilist. */ @@ -3389,7 +3387,7 @@ fts_add_doc_from_tuple( if (cache->total_size > fts_max_cache_size / 5 || fts_need_sync) { - fts_sync(cache->sync, true, false, false); + fts_sync(cache->sync, true, false); } mtr_start(&mtr); @@ -3567,7 +3565,7 @@ fts_add_doc_by_id( DBUG_EXECUTE_IF( "fts_instrument_sync_debug", - fts_sync(cache->sync, true, true, false); + fts_sync(cache->sync, true, true); ); DEBUG_SYNC_C("fts_instrument_sync_request"); @@ -3826,7 +3824,7 @@ fts_write_node( pars_info_t* info; dberr_t error; ib_uint32_t doc_count; - ib_time_t start_time; + time_t start_time; doc_id_t last_doc_id; doc_id_t first_doc_id; char table_name[MAX_FULL_NAME_LEN]; @@ -3875,9 +3873,9 @@ fts_write_node( " :last_doc_id, :doc_count, :ilist);"); } - start_time = ut_time(); + start_time = time(NULL); error = fts_eval_sql(trx, *graph); - elapsed_time += ut_time() - start_time; + elapsed_time += time(NULL) - start_time; ++n_nodes; return(error); @@ -4054,7 +4052,7 @@ fts_sync_begin( n_nodes = 0; elapsed_time = 0; - sync->start_time = ut_time(); + sync->start_time = time(NULL); sync->trx = trx_create(); trx_start_internal(sync->trx); @@ -4193,7 +4191,7 @@ fts_sync_commit( if (fts_enable_diag_print && elapsed_time) { ib::info() << "SYNC for table " << sync->table->name << ": SYNC time: " - << (ut_time() - sync->start_time) + << (time(NULL) - sync->start_time) << " secs: elapsed " << (double) n_nodes / elapsed_time << " ins/sec"; @@ -4263,15 +4261,13 @@ FTS auxiliary INDEX table and clear the cache at the end. @param[in,out] sync sync state @param[in] unlock_cache whether unlock cache lock when write node @param[in] wait whether wait when a sync is in progress -@param[in] has_dict whether has dict operation lock @return DB_SUCCESS if all OK */ static dberr_t fts_sync( fts_sync_t* sync, bool unlock_cache, - bool wait, - bool has_dict) + bool wait) { if (srv_read_only_mode) { return DB_READ_ONLY; @@ -4304,12 +4300,6 @@ fts_sync( DEBUG_SYNC_C("fts_sync_begin"); fts_sync_begin(sync); - /* When sync in background, we hold dict operation lock - to prevent DDL like DROP INDEX, etc. */ - if (has_dict) { - sync->trx->dict_operation_lock_mode = RW_S_LATCH; - } - begin_sync: if (cache->total_size > fts_max_cache_size) { /* Avoid the case: sync never finish when @@ -4400,16 +4390,9 @@ end_sync: /** Run SYNC on the table, i.e., write out data from the cache to the FTS auxiliary INDEX table and clear the cache at the end. @param[in,out] table fts table -@param[in] unlock_cache whether unlock cache when write node @param[in] wait whether wait for existing sync to finish -@param[in] has_dict whether has dict operation lock @return DB_SUCCESS on success, error code on failure. */ -dberr_t -fts_sync_table( - dict_table_t* table, - bool unlock_cache, - bool wait, - bool has_dict) +dberr_t fts_sync_table(dict_table_t* table, bool wait) { dberr_t err = DB_SUCCESS; @@ -4417,8 +4400,7 @@ fts_sync_table( if (table->space && table->fts->cache && !dict_table_is_corrupted(table)) { - err = fts_sync(table->fts->cache->sync, - unlock_cache, wait, has_dict); + err = fts_sync(table->fts->cache->sync, !wait, wait); } return(err); diff --git a/storage/innobase/fts/fts0opt.cc b/storage/innobase/fts/fts0opt.cc index f45ed70f374..6a3e52d8ace 100644 --- a/storage/innobase/fts/fts0opt.cc +++ b/storage/innobase/fts/fts0opt.cc @@ -58,7 +58,7 @@ static os_event_t fts_opt_shutdown_event = NULL; static const ulint FTS_WORD_NODES_INIT_SIZE = 64; /** Last time we did check whether system need a sync */ -static ib_time_t last_check_sync_time; +static time_t last_check_sync_time; /** FTS optimize thread message types. */ enum fts_msg_type_t { @@ -180,12 +180,11 @@ struct fts_slot_t { ulint deleted; /*!< Number of doc ids deleted since the last time this table was optimized */ - ib_time_t last_run; /*!< Time last run completed */ + /** time(NULL) of completing fts_optimize_table_bk() */ + time_t last_run; - ib_time_t completed; /*!< Optimize finish time */ - - ib_time_t interval_time; /*!< Minimum time to wait before - optimizing the table again. */ + /** time(NULL) of latest successful fts_optimize_table() */ + time_t completed; }; /** A table remove message for the FTS optimize thread. */ @@ -217,8 +216,8 @@ char fts_enable_diag_print; /** ZLib compressed block size.*/ static ulint FTS_ZIP_BLOCK_SIZE = 1024; -/** The amount of time optimizing in a single pass, in milliseconds. */ -static ib_time_t fts_optimize_time_limit = 0; +/** The amount of time optimizing in a single pass, in seconds. */ +static ulint fts_optimize_time_limit; /** It's defined in fts0fts.cc */ extern const char* fts_common_tables[]; @@ -1530,7 +1529,7 @@ fts_optimize_compact( /*=================*/ fts_optimize_t* optim, /*!< in: optimize state data */ dict_index_t* index, /*!< in: current FTS being optimized */ - ib_time_t start_time) /*!< in: optimize start time */ + time_t start_time) /*!< in: optimize start time */ { ulint i; dberr_t error = DB_SUCCESS; @@ -1563,8 +1562,11 @@ fts_optimize_compact( /* Free the word that was optimized. */ fts_word_free(word); + ulint interval = ulint(time(NULL) - start_time); + if (fts_optimize_time_limit > 0 - && (ut_time() - start_time) > fts_optimize_time_limit) { + && (lint(interval) < 0 + || interval > fts_optimize_time_limit)) { optim->done = TRUE; } @@ -1624,7 +1626,7 @@ fts_optimize_get_index_start_time( /*==============================*/ trx_t* trx, /*!< in: transaction */ dict_index_t* index, /*!< in: FTS index */ - ib_time_t* start_time) /*!< out: time in secs */ + time_t* start_time) /*!< out: time in secs */ { return(fts_config_get_index_ulint( trx, index, FTS_OPTIMIZE_START_TIME, @@ -1640,7 +1642,7 @@ fts_optimize_set_index_start_time( /*==============================*/ trx_t* trx, /*!< in: transaction */ dict_index_t* index, /*!< in: FTS index */ - ib_time_t start_time) /*!< in: start time */ + time_t start_time) /*!< in: start time */ { return(fts_config_set_index_ulint( trx, index, FTS_OPTIMIZE_START_TIME, @@ -1656,7 +1658,7 @@ fts_optimize_get_index_end_time( /*============================*/ trx_t* trx, /*!< in: transaction */ dict_index_t* index, /*!< in: FTS index */ - ib_time_t* end_time) /*!< out: time in secs */ + time_t* end_time) /*!< out: time in secs */ { return(fts_config_get_index_ulint( trx, index, FTS_OPTIMIZE_END_TIME, (ulint*) end_time)); @@ -1671,7 +1673,7 @@ fts_optimize_set_index_end_time( /*============================*/ trx_t* trx, /*!< in: transaction */ dict_index_t* index, /*!< in: FTS index */ - ib_time_t end_time) /*!< in: end time */ + time_t end_time) /*!< in: end time */ { return(fts_config_set_index_ulint( trx, index, FTS_OPTIMIZE_END_TIME, (ulint) end_time)); @@ -1734,22 +1736,23 @@ fts_optimize_free( Get the max time optimize should run in millisecs. @return max optimize time limit in millisecs. */ static -ib_time_t +ulint fts_optimize_get_time_limit( /*========================*/ trx_t* trx, /*!< in: transaction */ fts_table_t* fts_table) /*!< in: aux table */ { - ib_time_t time_limit = 0; + ulint time_limit = 0; fts_config_get_ulint( trx, fts_table, - FTS_OPTIMIZE_LIMIT_IN_SECS, (ulint*) &time_limit); + FTS_OPTIMIZE_LIMIT_IN_SECS, &time_limit); + /* FIXME: This is returning milliseconds, while the variable + is being stored and interpreted as seconds! */ return(time_limit * 1000); } - /**********************************************************************//** Run OPTIMIZE on the given table. Note: this can take a very long time (hours). */ @@ -1762,7 +1765,6 @@ fts_optimize_words( fts_string_t* word) /*!< in: the starting word to optimize */ { fts_fetch_t fetch; - ib_time_t start_time; que_t* graph = NULL; CHARSET_INFO* charset = optim->fts_index_table.charset; @@ -1772,7 +1774,7 @@ fts_optimize_words( fts_optimize_time_limit = fts_optimize_get_time_limit( optim->trx, &optim->fts_common_table); - start_time = ut_time(); + const time_t start_time = time(NULL); /* Setup the callback to use for fetching the word ilist etc. */ fetch.read_arg = optim->words; @@ -1858,7 +1860,7 @@ fts_optimize_index_completed( dberr_t error; byte buf[sizeof(ulint)]; #ifdef FTS_OPTIMIZE_DEBUG - ib_time_t end_time = ut_time(); + time_t end_time = time(NULL); error = fts_optimize_set_index_end_time(optim->trx, index, end_time); #endif @@ -2249,8 +2251,8 @@ fts_optimize_indexes( dict_index_t* index; #ifdef FTS_OPTIMIZE_DEBUG - ib_time_t end_time; - ib_time_t start_time; + time_t end_time; + time_t start_time; /* Get the start and end optimize times for this index. */ error = fts_optimize_get_index_start_time( @@ -2270,14 +2272,14 @@ fts_optimize_indexes( /* Start time will be 0 only for the first time or after completing the optimization of all FTS indexes. */ if (start_time == 0) { - start_time = ut_time(); + start_time = time(NULL); error = fts_optimize_set_index_start_time( optim->trx, index, start_time); } /* Check if this index needs to be optimized or not. */ - if (ut_difftime(end_time, start_time) < 0) { + if (difftime(end_time, start_time) < 0) { error = fts_optimize_index(optim, index); if (error != DB_SUCCESS) { @@ -2349,7 +2351,7 @@ fts_optimize_reset_start_time( for (uint i = 0; i < ib_vector_size(fts->indexes); ++i) { dict_index_t* index; - ib_time_t start_time = 0; + time_t start_time = 0; /* Reset the start time to 0 for this index. */ error = fts_optimize_set_index_start_time( @@ -2378,11 +2380,13 @@ fts_optimize_table_bk( /*==================*/ fts_slot_t* slot) /*!< in: table to optimiza */ { - dberr_t error; + const time_t now = time(NULL); + const ulint interval = ulint(now - slot->last_run); /* Avoid optimizing tables that were optimized recently. */ if (slot->last_run > 0 - && (ut_time() - slot->last_run) < slot->interval_time) { + && lint(interval) >= 0 + && interval < FTS_OPTIMIZE_INTERVAL_IN_SECS) { return(DB_SUCCESS); } @@ -2390,12 +2394,19 @@ fts_optimize_table_bk( dict_table_t* table = dict_table_open_on_id( slot->table_id, FALSE, DICT_TABLE_OP_NORMAL); - if (table && fil_table_accessible(table) + if (!table) { + slot->last_run = now; + return DB_SUCCESS; + } + + dberr_t error; + + if (fil_table_accessible(table) && table->fts && table->fts->cache && table->fts->cache->deleted >= FTS_OPTIMIZE_THRESHOLD) { error = fts_optimize_table(table); - slot->last_run = ut_time(); + slot->last_run = time(NULL); if (error == DB_SUCCESS) { slot->running = false; @@ -2403,7 +2414,7 @@ fts_optimize_table_bk( } } else { /* Note time this run completed. */ - slot->last_run = ut_time(); + slot->last_run = now; error = DB_SUCCESS; } @@ -2653,7 +2664,6 @@ static bool fts_optimize_new_table(dict_table_t* table) slot->table_id = table->id; slot->running = false; - slot->interval_time = FTS_OPTIMIZE_INTERVAL_IN_SECS; return(TRUE); } @@ -2689,37 +2699,23 @@ Calculate how many tables in fts_slots need to be optimized. @return no. of tables to optimize */ static ulint fts_optimize_how_many() { - ulint i; - ib_time_t delta; - ulint n_tables = 0; - ib_time_t current_time; - - current_time = ut_time(); + ulint n_tables = 0; + const time_t current_time = time(NULL); - for (i = 0; i < ib_vector_size(fts_slots); ++i) { + for (ulint i = 0; i < ib_vector_size(fts_slots); ++i) { const fts_slot_t* slot = static_cast<const fts_slot_t*>( ib_vector_get_const(fts_slots, i)); if (slot->table_id == 0) { continue; } - if (!slot->running) { - ut_a(slot->completed <= current_time); - - delta = current_time - slot->completed; + const time_t end = slot->running + ? slot->last_run : slot->completed; + ulint interval = ulint(current_time - end); - /* Skip slots that have been optimized recently. */ - if (delta >= slot->interval_time) { - ++n_tables; - } - } else { - ut_a(slot->last_run <= current_time); - - delta = current_time - slot->last_run; - - if (delta > slot->interval_time) { - ++n_tables; - } + if (lint(interval) < 0 + || interval >= FTS_OPTIMIZE_INTERVAL_IN_SECS) { + ++n_tables; } } @@ -2731,14 +2727,15 @@ Check if the total memory used by all FTS table exceeds the maximum limit. @return true if a sync is needed, false otherwise */ static bool fts_is_sync_needed() { - ulint total_memory = 0; - double time_diff = difftime(ut_time(), last_check_sync_time); + ulint total_memory = 0; + const time_t now = time(NULL); + double time_diff = difftime(now, last_check_sync_time); - if (fts_need_sync || time_diff < 5) { + if (fts_need_sync || (time_diff >= 0 && time_diff < 5)) { return(false); } - last_check_sync_time = ut_time(); + last_check_sync_time = now; for (ulint i = 0; i < ib_vector_size(fts_slots); ++i) { const fts_slot_t* slot = static_cast<const fts_slot_t*>( @@ -2776,7 +2773,7 @@ static void fts_optimize_sync_table(table_id_t table_id) table_id, FALSE, DICT_TABLE_OP_NORMAL)) { if (fil_table_accessible(table) && table->fts && table->fts->cache) { - fts_sync_table(table, true, false, false); + fts_sync_table(table, false); } dict_table_close(table, FALSE, FALSE); @@ -2969,7 +2966,7 @@ fts_optimize_init(void) table_vector.clear(); fts_opt_shutdown_event = os_event_create(0); - last_check_sync_time = ut_time(); + last_check_sync_time = time(NULL); os_thread_create(fts_optimize_thread, fts_optimize_wq, NULL); } diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 5fad1e3e080..7b0ddceca0d 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -100,7 +100,6 @@ this program; if not, write to the Free Software Foundation, Inc., #include "row0sel.h" #include "row0upd.h" #include "fil0crypt.h" -#include "ut0timer.h" #include "srv0mon.h" #include "srv0srv.h" #include "srv0start.h" @@ -1661,18 +1660,6 @@ thd_trx_is_auto_commit( && thd_is_select(thd)); } -extern "C" time_t thd_start_time(const THD* thd); - -/******************************************************************//** -Get the thread start time. -@return the thread start time in seconds since the epoch. */ -ulint thd_start_time_in_secs(THD*) -{ - // FIXME: This function should be added to the server code. - //return(thd_start_time(thd)); - return(ulint(ut_time())); -} - /** Enter InnoDB engine after checking the max number of user threads allowed, else the thread is put into sleep. @param[in,out] prebuilt row prebuilt handler */ @@ -1700,12 +1687,13 @@ innobase_srv_conc_enter_innodb( } else if (trx->mysql_thd != NULL && thd_is_replication_slave_thread(trx->mysql_thd)) { - - UT_WAIT_FOR( - srv_conc_get_active_threads() - < srv_thread_concurrency, - srv_replication_delay * 1000); - + const ulonglong end = my_interval_timer() + + ulonglong(srv_replication_delay) * 1000000; + while (srv_conc_get_active_threads() + >= srv_thread_concurrency + || my_interval_timer() >= end) { + os_thread_sleep(2000 /* 2 ms */); + } } else { srv_conc_enter_innodb(prebuilt); } @@ -5798,6 +5786,7 @@ innobase_build_v_templ( ulint n_v_col = ib_table->n_v_cols; bool marker[REC_MAX_N_FIELDS]; + DBUG_ENTER("innobase_build_v_templ"); ut_ad(ncol < REC_MAX_N_FIELDS); if (add_v != NULL) { @@ -5814,7 +5803,7 @@ innobase_build_v_templ( if (!locked) { mutex_exit(&dict_sys.mutex); } - return; + DBUG_VOID_RETURN; } memset(marker, 0, sizeof(bool) * ncol); @@ -5825,7 +5814,8 @@ innobase_build_v_templ( s_templ->n_col = ncol; s_templ->n_v_col = n_v_col; s_templ->rec_len = table->s->reclength; - s_templ->default_rec = table->s->default_values; + s_templ->default_rec = UT_NEW_ARRAY_NOKEY(uchar, s_templ->rec_len); + memcpy(s_templ->default_rec, table->s->default_values, s_templ->rec_len); /* Mark those columns could be base columns */ for (ulint i = 0; i < ib_table->n_v_cols; i++) { @@ -5922,6 +5912,7 @@ innobase_build_v_templ( s_templ->db_name = table->s->db.str; s_templ->tb_name = table->s->table_name.str; + DBUG_VOID_RETURN; } /** Check consistency between .frm indexes and InnoDB indexes. @@ -6127,6 +6118,8 @@ ha_innobase::open(const char* name, int, uint) ib_table = open_dict_table(name, norm_name, is_part, ignore_err); + DEBUG_SYNC(thd, "ib_open_after_dict_open"); + if (NULL == ib_table) { if (is_part) { @@ -6232,14 +6225,6 @@ no_such_table: mutex_enter(&dict_sys.mutex); if (ib_table->vc_templ == NULL) { ib_table->vc_templ = UT_NEW_NOKEY(dict_vcol_templ_t()); - } else if (ib_table->get_ref_count() == 1) { - /* Clean and refresh the template if no one else - get hold on it */ - dict_free_vc_templ(ib_table->vc_templ); - ib_table->vc_templ->vtempl = NULL; - } - - if (ib_table->vc_templ->vtempl == NULL) { innobase_build_v_templ( table, ib_table, ib_table->vc_templ, NULL, true); @@ -12350,6 +12335,21 @@ int create_table_info_t::prepare_create_table(const char* name, bool strict) DBUG_RETURN(HA_ERR_UNSUPPORTED); } + for (uint i = 0; i < m_form->s->keys; i++) { + const size_t max_field_len + = DICT_MAX_FIELD_LEN_BY_FORMAT_FLAG(m_flags); + const KEY& key = m_form->key_info[i]; + + if (key.algorithm == HA_KEY_ALG_FULLTEXT) { + continue; + } + + if (too_big_key_part_length(max_field_len, key)) { + DBUG_RETURN(convert_error_code_to_mysql( + DB_TOO_BIG_INDEX_COL, m_flags, NULL)); + } + } + DBUG_RETURN(parse_table_name(name)); } @@ -14449,7 +14449,7 @@ ha_innobase::optimize( if (innodb_optimize_fulltext_only) { if (m_prebuilt->table->fts && m_prebuilt->table->fts->cache && m_prebuilt->table->space) { - fts_sync_table(m_prebuilt->table, false, true, false); + fts_sync_table(m_prebuilt->table); fts_optimize_table(m_prebuilt->table); } try_alter = false; @@ -17982,8 +17982,7 @@ innodb_defragment_frequency_update(THD*, st_mysql_sys_var*, void*, const void* save) { srv_defragment_frequency = (*static_cast<const uint*>(save)); - srv_defragment_interval = ut_microseconds_to_timer( - (ulonglong) (1000000.0 / srv_defragment_frequency)); + srv_defragment_interval = 1000000000ULL / srv_defragment_frequency; } static inline char *my_strtok_r(char *str, const char *delim, char **saveptr) @@ -20365,6 +20364,7 @@ TABLE* innobase_init_vc_templ(dict_table_t* table) if (table->vc_templ != NULL) { return NULL; } + DBUG_ENTER("innobase_init_vc_templ"); table->vc_templ = UT_NEW_NOKEY(dict_vcol_templ_t()); @@ -20372,13 +20372,13 @@ TABLE* innobase_init_vc_templ(dict_table_t* table) ut_ad(mysql_table); if (!mysql_table) { - return NULL; + DBUG_RETURN(NULL); } mutex_enter(&dict_sys.mutex); innobase_build_v_templ(mysql_table, table, table->vc_templ, NULL, true); mutex_exit(&dict_sys.mutex); - return mysql_table; + DBUG_RETURN(mysql_table); } /** Change dbname and table name in table->vc_templ. @@ -20423,7 +20423,7 @@ innobase_rename_vc_templ( given col_no. @param[in] foreign foreign key information @param[in] update updated parent vector. -@param[in] col_no column position of the table +@param[in] col_no base column position of the child table to check @return updated field from the parent update vector, else NULL */ static dfield_t* @@ -20439,6 +20439,10 @@ innobase_get_field_from_update_vector( ulint prefix_col_no; for (ulint i = 0; i < foreign->n_fields; i++) { + if (dict_index_get_nth_col_no(foreign->foreign_index, i) + != col_no) { + continue; + } parent_col_no = dict_index_get_nth_col_no(parent_index, i); parent_field_no = dict_table_get_nth_col_pos( @@ -20448,8 +20452,7 @@ innobase_get_field_from_update_vector( upd_field_t* parent_ufield = &update->fields[j]; - if (parent_ufield->field_no == parent_field_no - && parent_col_no == col_no) { + if (parent_ufield->field_no == parent_field_no) { return(&parent_ufield->new_val); } } @@ -20580,6 +20583,7 @@ innobase_get_computed_value( ut_ad(thd != NULL); ut_ad(mysql_table); + DBUG_ENTER("innobase_get_computed_value"); const mysql_row_templ_t* vctempl = index->table->vc_templ->vtempl[ index->table->vc_templ->n_col + col->v_pos]; @@ -20668,7 +20672,7 @@ innobase_get_computed_value( stderr); dtuple_print(stderr, row); #endif /* INNODB_VIRTUAL_DEBUG */ - return(NULL); + DBUG_RETURN(NULL); } if (vctempl->mysql_null_bit_mask @@ -20676,7 +20680,7 @@ innobase_get_computed_value( & vctempl->mysql_null_bit_mask)) { dfield_set_null(field); field->type.prtype |= DATA_VIRTUAL; - return(field); + DBUG_RETURN(field); } row_mysql_store_col_in_innobase_format( @@ -20708,7 +20712,7 @@ innobase_get_computed_value( dfield_dup(field, heap); } - return(field); + DBUG_RETURN(field); } diff --git a/storage/innobase/handler/ha_innodb.h b/storage/innobase/handler/ha_innodb.h index a0ce93bec3c..28e1a1e36f1 100644 --- a/storage/innobase/handler/ha_innodb.h +++ b/storage/innobase/handler/ha_innodb.h @@ -944,3 +944,10 @@ ib_push_frm_error( TABLE* table, /*!< in: MySQL table */ ulint n_keys, /*!< in: InnoDB #keys */ bool push_warning); /*!< in: print warning ? */ + +/** Check each index part length whether they not exceed the max limit +@param[in] max_field_len maximum allowed key part length +@param[in] key MariaDB key definition +@return true if index column length exceeds limit */ +MY_ATTRIBUTE((warn_unused_result)) +bool too_big_key_part_length(size_t max_field_len, const KEY& key); diff --git a/storage/innobase/handler/handler0alter.cc b/storage/innobase/handler/handler0alter.cc index 73260da824f..19c35c66885 100644 --- a/storage/innobase/handler/handler0alter.cc +++ b/storage/innobase/handler/handler0alter.cc @@ -3990,22 +3990,15 @@ created_clustered: DBUG_RETURN(indexdefs); } -/*******************************************************************//** -Check each index column size, make sure they do not exceed the max limit -@return true if index column size exceeds limit */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -bool -innobase_check_column_length( -/*=========================*/ - ulint max_col_len, /*!< in: maximum column length */ - const KEY* key_info) /*!< in: Indexes to be created */ +MY_ATTRIBUTE((warn_unused_result)) +bool too_big_key_part_length(size_t max_field_len, const KEY& key) { - for (ulint key_part = 0; key_part < key_info->user_defined_key_parts; key_part++) { - if (key_info->key_part[key_part].length > max_col_len) { - return(true); + for (ulint i = 0; i < key.user_defined_key_parts; i++) { + if (key.key_part[i].length > max_field_len) { + return true; } } - return(false); + return false; } /********************************************************************//** @@ -7665,7 +7658,7 @@ check_if_ok_to_rename: continue; } - if (innobase_check_column_length(max_col_len, key)) { + if (too_big_key_part_length(max_col_len, *key)) { my_error(ER_INDEX_COLUMN_TOO_LONG, MYF(0), max_col_len); goto err_exit_no_heap; @@ -8799,13 +8792,13 @@ innobase_drop_foreign_try( } /** Rename a column in the data dictionary tables. -@param[in] user_table InnoDB table that was being altered -@param[in] trx data dictionary transaction -@param[in] table_name Table name in MySQL -@param[in] from old column name -@param[in] to new column name -@param[in] new_clustered whether the table has been rebuilt -@param[in] is_virtual whether it is a virtual column +@param[in] user_table InnoDB table that was being altered +@param[in] trx Data dictionary transaction +@param[in] table_name Table name in MySQL +@param[in] from old column name +@param[in] to new column name +@param[in] new_clustered whether the table has been rebuilt +@param[in] evict_fk_cache Evict the fk info from cache @retval true Failure @retval false Success */ static MY_ATTRIBUTE((nonnull, warn_unused_result)) @@ -8816,7 +8809,8 @@ innobase_rename_column_try( const char* table_name, const char* from, const char* to, - bool new_clustered) + bool new_clustered, + bool evict_fk_cache) { dberr_t error; @@ -8978,7 +8972,8 @@ rename_foreign: } } - if (new_clustered) { + /* Reload the foreign key info for instant table too. */ + if (new_clustered || evict_fk_cache) { std::for_each(fk_evict.begin(), fk_evict.end(), dict_foreign_remove_from_cache); } @@ -9027,7 +9022,9 @@ innobase_rename_columns_try( if (innobase_rename_column_try( ctx->old_table, trx, table_name, cf->field->field_name.str, - cf->field_name.str, true)) { + cf->field_name.str, + ctx->need_rebuild(), + ctx->is_instant())) { return(true); } goto processed_field; @@ -9076,7 +9073,7 @@ static void get_type(const Field& f, ulint& prtype, ulint& mtype, ulint& len) } /** Enlarge a column in the data dictionary tables. -@param user_table InnoDB table that was being altered +@param ctx In-place ALTER TABLE context @param trx data dictionary transaction @param table_name Table name in MySQL @param pos 0-based index to user_table->cols[] or user_table->v_cols[] @@ -9087,7 +9084,7 @@ static void get_type(const Field& f, ulint& prtype, ulint& mtype, ulint& len) static MY_ATTRIBUTE((nonnull, warn_unused_result)) bool innobase_rename_or_enlarge_column_try( - const dict_table_t* user_table, + ha_innobase_inplace_ctx*ctx, trx_t* trx, const char* table_name, ulint pos, @@ -9095,8 +9092,10 @@ innobase_rename_or_enlarge_column_try( bool is_v) { dict_col_t* col; + dict_table_t* user_table = ctx->old_table; DBUG_ENTER("innobase_rename_or_enlarge_column_try"); + DBUG_ASSERT(!ctx->need_rebuild()); DBUG_ASSERT(trx_get_dict_operation(trx) == TRX_DICT_OP_INDEX); ut_ad(trx->dict_operation_lock_mode == RW_X_LATCH); @@ -9156,7 +9155,7 @@ innobase_rename_or_enlarge_column_try( if (!same_name && innobase_rename_column_try(user_table, trx, table_name, col_name, f.field_name.str, - false)) { + false, ctx->is_instant())) { DBUG_RETURN(true); } @@ -9214,7 +9213,7 @@ innobase_rename_or_enlarge_columns_try( while (Create_field* cf = cf_it++) { if (cf->field == *fp) { if (innobase_rename_or_enlarge_column_try( - ctx->old_table, trx, table_name, + ctx, trx, table_name, idx, **af, is_v)) { DBUG_RETURN(true); } @@ -9824,74 +9823,6 @@ commit_try_rebuild( index->to_be_dropped = 0; } - /* We copied the table. Any indexes that were requested to be - dropped were not created in the copy of the table. Apply any - last bit of the rebuild log and then rename the tables. */ - - if (ctx->online) { - DEBUG_SYNC_C("row_log_table_apply2_before"); - - dict_vcol_templ_t* s_templ = NULL; - - if (ctx->new_table->n_v_cols > 0) { - s_templ = UT_NEW_NOKEY( - dict_vcol_templ_t()); - s_templ->vtempl = NULL; - - innobase_build_v_templ( - altered_table, ctx->new_table, s_templ, - NULL, true); - ctx->new_table->vc_templ = s_templ; - } - - error = row_log_table_apply( - ctx->thr, user_table, altered_table, - static_cast<ha_innobase_inplace_ctx*>( - ha_alter_info->handler_ctx)->m_stage, - ctx->new_table); - - if (s_templ) { - ut_ad(ctx->need_rebuild()); - dict_free_vc_templ(s_templ); - UT_DELETE(s_templ); - ctx->new_table->vc_templ = NULL; - } - - ulint err_key = thr_get_trx(ctx->thr)->error_key_num; - - switch (error) { - KEY* dup_key; - case DB_SUCCESS: - break; - case DB_DUPLICATE_KEY: - if (err_key == ULINT_UNDEFINED) { - /* This should be the hidden index on - FTS_DOC_ID. */ - dup_key = NULL; - } else { - DBUG_ASSERT(err_key < - ha_alter_info->key_count); - dup_key = &ha_alter_info - ->key_info_buffer[err_key]; - } - print_keydup_error(altered_table, dup_key, MYF(0)); - DBUG_RETURN(true); - case DB_ONLINE_LOG_TOO_BIG: - my_error(ER_INNODB_ONLINE_LOG_TOO_BIG, MYF(0), - get_error_key_name(err_key, ha_alter_info, - rebuilt_table)); - DBUG_RETURN(true); - case DB_INDEX_CORRUPT: - my_error(ER_INDEX_CORRUPT, MYF(0), - get_error_key_name(err_key, ha_alter_info, - rebuilt_table)); - DBUG_RETURN(true); - default: - my_error_innodb(error, table_name, user_table->flags); - DBUG_RETURN(true); - } - } - if ((ha_alter_info->handler_flags & ALTER_COLUMN_NAME) && innobase_rename_columns_try(ha_alter_info, ctx, old_table, @@ -10642,6 +10573,91 @@ do { \ # define DBUG_INJECT_CRASH(prefix, count) #endif +/** Apply the log for the table rebuild operation. +@param[in] ctx Inplace Alter table context +@param[in] altered_table MySQL table that is being altered +@return true Failure, else false. */ +static bool alter_rebuild_apply_log( + ha_innobase_inplace_ctx* ctx, + Alter_inplace_info* ha_alter_info, + TABLE* altered_table) +{ + DBUG_ENTER("alter_rebuild_apply_log"); + + if (!ctx->online) { + DBUG_RETURN(false); + } + + /* We copied the table. Any indexes that were requested to be + dropped were not created in the copy of the table. Apply any + last bit of the rebuild log and then rename the tables. */ + dict_table_t* user_table = ctx->old_table; + dict_table_t* rebuilt_table = ctx->new_table; + + DEBUG_SYNC_C("row_log_table_apply2_before"); + + dict_vcol_templ_t* s_templ = NULL; + + if (ctx->new_table->n_v_cols > 0) { + s_templ = UT_NEW_NOKEY( + dict_vcol_templ_t()); + s_templ->vtempl = NULL; + + innobase_build_v_templ(altered_table, ctx->new_table, s_templ, + NULL, true); + ctx->new_table->vc_templ = s_templ; + } + + dberr_t error = row_log_table_apply( + ctx->thr, user_table, altered_table, + static_cast<ha_innobase_inplace_ctx*>( + ha_alter_info->handler_ctx)->m_stage, + ctx->new_table); + + if (s_templ) { + ut_ad(ctx->need_rebuild()); + dict_free_vc_templ(s_templ); + UT_DELETE(s_templ); + ctx->new_table->vc_templ = NULL; + } + + ulint err_key = thr_get_trx(ctx->thr)->error_key_num; + + switch (error) { + KEY* dup_key; + case DB_SUCCESS: + break; + case DB_DUPLICATE_KEY: + if (err_key == ULINT_UNDEFINED) { + /* This should be the hidden index on + FTS_DOC_ID. */ + dup_key = NULL; + } else { + DBUG_ASSERT(err_key < ha_alter_info->key_count); + dup_key = &ha_alter_info->key_info_buffer[err_key]; + } + + print_keydup_error(altered_table, dup_key, MYF(0)); + DBUG_RETURN(true); + case DB_ONLINE_LOG_TOO_BIG: + my_error(ER_INNODB_ONLINE_LOG_TOO_BIG, MYF(0), + get_error_key_name(err_key, ha_alter_info, + rebuilt_table)); + DBUG_RETURN(true); + case DB_INDEX_CORRUPT: + my_error(ER_INDEX_CORRUPT, MYF(0), + get_error_key_name(err_key, ha_alter_info, + rebuilt_table)); + DBUG_RETURN(true); + default: + my_error_innodb(error, ctx->old_table->name.m_name, + user_table->flags); + DBUG_RETURN(true); + } + + DBUG_RETURN(false); +} + /** Commit or rollback the changes made during prepare_inplace_alter_table() and inplace_alter_table() inside the storage engine. Note that the allowed level of concurrency @@ -10786,6 +10802,19 @@ ha_innobase::commit_inplace_alter_table( ut_ad(!ctx->new_table->fts->add_wq); fts_optimize_remove_table(ctx->new_table); } + + /* Apply the online log of the table before acquiring + data dictionary latches. Here alter thread already acquired + MDL_EXCLUSIVE on the table. So there can't be anymore DDLs, DMLs + for the altered table. By applying the log here, InnoDB + makes sure that concurrent DDLs, purge thread or any other + background thread doesn't wait for the dict_operation_lock + for longer time. */ + if (new_clustered && commit + && alter_rebuild_apply_log( + ctx, ha_alter_info, altered_table)) { + DBUG_RETURN(true); + } } if (!trx) { diff --git a/storage/innobase/include/dict0dict.h b/storage/innobase/include/dict0dict.h index 568320457f9..1dfb427e2b5 100644 --- a/storage/innobase/include/dict0dict.h +++ b/storage/innobase/include/dict0dict.h @@ -826,6 +826,18 @@ dict_table_has_atomic_blobs(const dict_table_t* table) return(DICT_TF_HAS_ATOMIC_BLOBS(table->flags)); } +/** @return potential max length stored inline for externally stored fields */ +inline size_t dict_table_t::get_overflow_field_local_len() const +{ + if (dict_table_has_atomic_blobs(this)) { + /* ROW_FORMAT=DYNAMIC or ROW_FORMAT=COMPRESSED: do not + store any BLOB prefix locally */ + return BTR_EXTERN_FIELD_REF_SIZE; + } + /* up to MySQL 5.1: store a 768-byte prefix locally */ + return BTR_EXTERN_FIELD_REF_SIZE + DICT_ANTELOPE_MAX_INDEX_COL_LEN; +} + /** Set the various values in a dict_table_t::flags pointer. @param[in,out] flags, Pointer to a 4 byte Table Flags @param[in] format, File Format diff --git a/storage/innobase/include/dict0dict.ic b/storage/innobase/include/dict0dict.ic index ca2e5a5c52b..41c5c2220a4 100644 --- a/storage/innobase/include/dict0dict.ic +++ b/storage/innobase/include/dict0dict.ic @@ -1274,6 +1274,9 @@ void dict_free_vc_templ( dict_vcol_templ_t* vc_templ) { + UT_DELETE_ARRAY(vc_templ->default_rec); + vc_templ->default_rec = NULL; + if (vc_templ->vtempl != NULL) { ut_ad(vc_templ->n_v_col > 0); for (ulint i = 0; i < vc_templ->n_col diff --git a/storage/innobase/include/dict0mem.h b/storage/innobase/include/dict0mem.h index f507007fee9..e72b24da9f5 100644 --- a/storage/innobase/include/dict0mem.h +++ b/storage/innobase/include/dict0mem.h @@ -1214,6 +1214,12 @@ struct dict_index_t { bool vers_history_row(const rec_t* rec, bool &history_row); + /** If a record of this index might not fit on a single B-tree page, + return true. + @param[in] strict issue error or warning + @return true if the index record could become too big */ + bool rec_potentially_too_big(bool strict) const; + /** Reconstruct the clustered index fields. */ inline void reconstruct_fields(); @@ -1776,6 +1782,9 @@ struct dict_table_t { ut_ad(fk_checks > 0); } + /** For overflow fields returns potential max length stored inline */ + inline size_t get_overflow_field_local_len() const; + private: /** Initialize instant->field_map. @tparam replace_dropped whether to point clustered index fields @@ -1979,7 +1988,7 @@ public: unsigned stat_initialized:1; /** Timestamp of last recalc of the stats. */ - ib_time_t stats_last_recalc; + time_t stats_last_recalc; /** The two bits below are set in the 'stat_persistent' member. They have the following meaning: diff --git a/storage/innobase/include/dict0stats.h b/storage/innobase/include/dict0stats.h index 5bd921b1b8b..ab001130364 100644 --- a/storage/innobase/include/dict0stats.h +++ b/storage/innobase/include/dict0stats.h @@ -216,7 +216,7 @@ rolled back only in the case of error, but not freed. dberr_t dict_stats_save_index_stat( dict_index_t* index, - ib_time_t last_update, + time_t last_update, const char* stat_name, ib_uint64_t stat_value, ib_uint64_t* sample_size, diff --git a/storage/innobase/include/fts0fts.h b/storage/innobase/include/fts0fts.h index b5c81250c28..07be853efad 100644 --- a/storage/innobase/include/fts0fts.h +++ b/storage/innobase/include/fts0fts.h @@ -780,16 +780,9 @@ fts_drop_orphaned_tables(void); /** Run SYNC on the table, i.e., write out data from the cache to the FTS auxiliary INDEX table and clear the cache at the end. @param[in,out] table fts table -@param[in] unlock_cache whether unlock cache when write node -@param[in] wait whether wait for existing sync to finish -@param[in] has_dict whether has dict operation lock +@param[in] wait whether to wait for existing sync to finish @return DB_SUCCESS on success, error code on failure. */ -dberr_t -fts_sync_table( - dict_table_t* table, - bool unlock_cache, - bool wait, - bool has_dict); +dberr_t fts_sync_table(dict_table_t* table, bool wait = true); /****************************************************************//** Free the query graph but check whether dict_sys.mutex is already diff --git a/storage/innobase/include/fts0types.h b/storage/innobase/include/fts0types.h index 26f18cc3d1d..a08a60b9e95 100644 --- a/storage/innobase/include/fts0types.h +++ b/storage/innobase/include/fts0types.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 2007, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, MariaDB Corporation. +Copyright (c) 2017, 2019, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -123,7 +123,8 @@ struct fts_sync_t { doc_id_t max_doc_id; /*!< The doc id at which the cache was noted as being full, we use this to set the upper_limit field */ - ib_time_t start_time; /*!< SYNC start time */ + time_t start_time; /*!< SYNC start time; only used if + fts_enable_diag_print */ bool in_progress; /*!< flag whether sync is in progress.*/ bool unlock_cache; /*!< flag whether unlock cache when write fts node */ diff --git a/storage/innobase/include/ha_prototypes.h b/storage/innobase/include/ha_prototypes.h index b529f37d76e..f37dff44b2f 100644 --- a/storage/innobase/include/ha_prototypes.h +++ b/storage/innobase/include/ha_prototypes.h @@ -361,14 +361,6 @@ thd_trx_is_auto_commit( /*===================*/ THD* thd); /*!< in: thread handle, or NULL */ -/******************************************************************//** -Get the thread start time. -@return the thread start time in seconds since the epoch. */ -ulint -thd_start_time_in_secs( -/*===================*/ - THD* thd); /*!< in: thread handle, or NULL */ - /*****************************************************************//** A wrapper function of innobase_convert_name(), convert a table name to the MySQL system_charset_info (UTF-8) and quote it if needed. diff --git a/storage/innobase/include/lock0lock.h b/storage/innobase/include/lock0lock.h index 0481ecab3a2..91ee6b07c40 100644 --- a/storage/innobase/include/lock0lock.h +++ b/storage/innobase/include/lock0lock.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2018, MariaDB Corporation. +Copyright (c) 2017, 2019, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -566,11 +566,10 @@ lock_print_info_summary( /** Prints transaction lock wait and MVCC state. @param[in,out] file file where to print -@param[in] trx transaction */ +@param[in] trx transaction +@param[in] now current time */ void -lock_trx_print_wait_and_mvcc_state( - FILE* file, - const trx_t* trx); +lock_trx_print_wait_and_mvcc_state(FILE* file, const trx_t* trx, time_t now); /*********************************************************************//** Prints info of locks for each transaction. This function assumes that the diff --git a/storage/innobase/include/lock0types.h b/storage/innobase/include/lock0types.h index bdd03c49554..cb04afdf9db 100644 --- a/storage/innobase/include/lock0types.h +++ b/storage/innobase/include/lock0types.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2018, MariaDB Corporation. +Copyright (c) 2018, 2019, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -190,10 +190,14 @@ struct ib_lock_t lock. The link node in a singly linked list, used during hashing. */ - /* Statistics for how long lock has been held and time - how long this lock had to be waited before it was granted */ - time_t requested_time; /*!< Lock request time */ - ulint wait_time; /*!< Time waited this lock or 0 */ + /** time(NULL) of the lock request creation. + Used for computing wait_time and diagnostics only. + Note: bogus durations may be reported + when the system time is adjusted! */ + time_t requested_time; + /** Cumulated wait time in seconds. + Note: may be bogus when the system time is adjusted! */ + ulint wait_time; union { lock_table_t tab_lock;/*!< table lock */ diff --git a/storage/innobase/include/log0recv.h b/storage/innobase/include/log0recv.h index 347d33e0907..5a687b5df08 100644 --- a/storage/innobase/include/log0recv.h +++ b/storage/innobase/include/log0recv.h @@ -231,7 +231,7 @@ struct recv_sys_t{ /*!< the LSN of a MLOG_CHECKPOINT record, or 0 if none was parsed */ /** the time when progress was last reported */ - ib_time_t progress_time; + time_t progress_time; mem_heap_t* heap; /*!< memory heap of log records and file addresses*/ hash_table_t* addr_hash;/*!< hash table of file addresses of pages */ @@ -282,7 +282,7 @@ struct recv_sys_t{ @param[in] time the current time @return whether progress should be reported (the last report was at least 15 seconds ago) */ - bool report(ib_time_t time) + bool report(time_t time) { if (time - progress_time < 15) { return false; diff --git a/storage/innobase/include/mem0mem.h b/storage/innobase/include/mem0mem.h index fa4c2526ae3..6d0f95cba19 100644 --- a/storage/innobase/include/mem0mem.h +++ b/storage/innobase/include/mem0mem.h @@ -73,7 +73,7 @@ allocations of small buffers. */ /** If a memory heap is allowed to grow into the buffer pool, the following is the maximum size for a single allocated buffer: */ -#define MEM_MAX_ALLOC_IN_BUF (srv_page_size - 200) +#define MEM_MAX_ALLOC_IN_BUF (srv_page_size - 200 + REDZONE_SIZE) /** Space needed when allocating for a user a field of length N. The space is allocated only in multiples of UNIV_MEM_ALIGNMENT. */ diff --git a/storage/innobase/include/mem0mem.ic b/storage/innobase/include/mem0mem.ic index e00e814571c..fae2aaf4d04 100644 --- a/storage/innobase/include/mem0mem.ic +++ b/storage/innobase/include/mem0mem.ic @@ -183,13 +183,15 @@ mem_heap_alloc( ulint n) { mem_block_t* block; - void* buf; + byte* buf; ulint free; ut_d(mem_block_validate(heap)); block = UT_LIST_GET_LAST(heap->base); + n += REDZONE_SIZE; + ut_ad(!(block->type & MEM_HEAP_BUFFER) || (n <= MEM_MAX_ALLOC_IN_BUF)); /* Check if there is enough space in block. If not, create a new @@ -212,7 +214,8 @@ mem_heap_alloc( mem_block_set_free(block, free + MEM_SPACE_NEEDED(n)); - TRASH_ALLOC(buf, n); + buf = buf + REDZONE_SIZE; + UNIV_MEM_ALLOC(buf, n - REDZONE_SIZE); return(buf); } @@ -342,6 +345,8 @@ mem_heap_free_top( ut_d(mem_block_validate(heap)); + n += REDZONE_SIZE; + block = UT_LIST_GET_LAST(heap->base); /* Subtract the free field of block */ diff --git a/storage/innobase/include/row0ftsort.h b/storage/innobase/include/row0ftsort.h index b2c5651c9c5..beb2f8c2bfb 100644 --- a/storage/innobase/include/row0ftsort.h +++ b/storage/innobase/include/row0ftsort.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 2010, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2015, 2018, MariaDB Corporation. +Copyright (c) 2015, 2019, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -59,6 +59,8 @@ struct fts_psort_t; struct fts_psort_common_t { row_merge_dup_t* dup; /*!< descriptor of FTS index */ dict_table_t* new_table; /*!< source table */ + /** Old table page size */ + ulint old_zip_size; trx_t* trx; /*!< transaction */ fts_psort_t* all_info; /*!< all parallel sort info */ os_event_t sort_event; /*!< sort event */ @@ -190,26 +192,27 @@ row_merge_create_fts_sort_index( instead of 8 bytes integer to store Doc ID during sort */ -/********************************************************************//** -Initialize FTS parallel sort structures. -@return TRUE if all successful */ -ibool +/** Initialize FTS parallel sort structures. +@param[in] trx transaction +@param[in,out] dup descriptor of FTS index being created +@param[in] new_table table where indexes are created +@param[in] opt_doc_id_size whether to use 4 bytes instead of 8 bytes + integer to store Doc ID during sort +@param[in] old_zip_size page size of the old table during alter +@param[out] psort parallel sort info to be instantiated +@param[out] merge parallel merge info to be instantiated +@return true if all successful */ +bool row_fts_psort_info_init( -/*====================*/ - trx_t* trx, /*!< in: transaction */ - row_merge_dup_t* dup, /*!< in,own: descriptor of - FTS index being created */ - const dict_table_t* new_table,/*!< in: table where indexes are - created */ - ibool opt_doc_id_size, - /*!< in: whether to use 4 bytes - instead of 8 bytes integer to - store Doc ID during sort */ - fts_psort_t** psort, /*!< out: parallel sort info to be - instantiated */ - fts_psort_t** merge) /*!< out: parallel merge info - to be instantiated */ + trx_t* trx, + row_merge_dup_t*dup, + dict_table_t* new_table, + bool opt_doc_id_size, + ulint old_zip_size, + fts_psort_t** psort, + fts_psort_t** merge) MY_ATTRIBUTE((nonnull)); + /********************************************************************//** Clean up and deallocate FTS parallel sort structures, and close temparary merge sort files */ diff --git a/storage/innobase/include/srv0mon.h b/storage/innobase/include/srv0mon.h index 5651c70a1ba..5a4d424981e 100644 --- a/storage/innobase/include/srv0mon.h +++ b/storage/innobase/include/srv0mon.h @@ -2,7 +2,7 @@ Copyright (c) 2010, 2015, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2012, Facebook Inc. -Copyright (c) 2013, 2018, MariaDB Corporation. +Copyright (c) 2013, 2019, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the @@ -64,9 +64,9 @@ create the internal counter ID in "monitor_id_t". */ /** Structure containing the actual values of a monitor counter. */ struct monitor_value_t { - ib_time_t mon_start_time; /*!< Start time of monitoring */ - ib_time_t mon_stop_time; /*!< Stop time of monitoring */ - ib_time_t mon_reset_time; /*!< Time counter resetted */ + time_t mon_start_time; /*!< Start time of monitoring */ + time_t mon_stop_time; /*!< Stop time of monitoring */ + time_t mon_reset_time; /*!< Time of resetting the counter */ mon_type_t mon_value; /*!< Current counter Value */ mon_type_t mon_max_value; /*!< Current Max value */ mon_type_t mon_min_value; /*!< Current Min value */ @@ -719,8 +719,8 @@ monitor counter #define MONITOR_INC_TIME_IN_MICRO_SECS(monitor, value) \ MONITOR_CHECK_DEFINED(value); \ if (MONITOR_IS_ON(monitor)) { \ - uintmax_t old_time = (value); \ - value = ut_time_us(NULL); \ + uintmax_t old_time = value; \ + value = microsecond_interval_timer(); \ MONITOR_VALUE(monitor) += (mon_type_t) (value - old_time);\ } diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h index 843eadacc5f..fe1be626a23 100644 --- a/storage/innobase/include/srv0srv.h +++ b/storage/innobase/include/srv0srv.h @@ -1064,10 +1064,14 @@ struct srv_slot_t{ ibool suspended; /*!< TRUE if the thread is waiting for the event of this slot */ - ib_time_t suspend_time; /*!< time when the thread was - suspended. Initialized by - lock_wait_table_reserve_slot() - for lock wait */ + /** time(NULL) when the thread was suspended. + FIXME: Use my_interval_timer() or similar, to avoid bogus + timeouts in lock_wait_check_and_cancel() or lock_wait_suspend_thread() + when the system time is adjusted to the past! + + FIXME: This is duplicating trx_lock_t::wait_started, + which is being used for diagnostic purposes only. */ + time_t suspend_time; ulong wait_timeout; /*!< wait time that if exceeded the thread will be timed out. Initialized by diff --git a/storage/innobase/include/trx0i_s.h b/storage/innobase/include/trx0i_s.h index 97fb0a7cdaa..65c7d321597 100644 --- a/storage/innobase/include/trx0i_s.h +++ b/storage/innobase/include/trx0i_s.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 2007, 2015, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, MariaDB Corporation. +Copyright (c) 2017, 2019, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -127,12 +127,12 @@ struct i_s_trx_row_t { trx_id_t trx_id; /*!< transaction identifier */ const char* trx_state; /*!< transaction state from trx_get_que_state_str() */ - ib_time_t trx_started; /*!< trx_t::start_time */ + time_t trx_started; /*!< trx_t::start_time */ const i_s_locks_row_t* requested_lock_row; /*!< pointer to a row in innodb_locks if trx is waiting, or NULL */ - ib_time_t trx_wait_started; /*!< trx_t::wait_started */ + time_t trx_wait_started; /*!< trx_t->lock.wait_started */ uintmax_t trx_weight; /*!< TRX_WEIGHT() */ ulint trx_mysql_thread_id; /*!< thd_get_thread_id() */ const char* trx_query; /*!< MySQL statement being diff --git a/storage/innobase/include/trx0trx.h b/storage/innobase/include/trx0trx.h index 06362a62865..9d2a7680900 100644 --- a/storage/innobase/include/trx0trx.h +++ b/storage/innobase/include/trx0trx.h @@ -927,10 +927,11 @@ public: on dict_sys.latch. Protected by dict_sys.latch. */ - time_t start_time; /*!< time the state last time became - TRX_STATE_ACTIVE */ - ib_uint64_t start_time_micro; /*!< start time of transaction in - microseconds */ + /** wall-clock time of the latest transition to TRX_STATE_ACTIVE; + used for diagnostic purposes only */ + time_t start_time; + /** microsecond_interval_timer() of transaction start */ + ulonglong start_time_micro; lsn_t commit_lsn; /*!< lsn at the time of the commit */ table_id_t table_id; /*!< Table to drop iff dict_operation == TRX_DICT_OP_TABLE, or 0. */ diff --git a/storage/innobase/include/ut0timer.h b/storage/innobase/include/ut0timer.h deleted file mode 100644 index 376af3cf0ef..00000000000 --- a/storage/innobase/include/ut0timer.h +++ /dev/null @@ -1,67 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2013, 2014, Facebook, Inc. All Rights Reserved. -Copyright (c) 2014, 2018, MariaDB Corporation. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/********************************************************************//** -@file include/ut0timer.h -Timer routines - -Created 30/07/2014 Jan Lindström jan.lindstrom@skysql.com -modified from https://github.com/facebook/mysql-5.6/commit/c75a413edeb96eb99bf11d7269bdfea06f96d6b6 -*************************************************************************/ -#ifndef ut0timer_h -#define ut0timer_h - -#include "univ.i" - -/* Current timer stats */ -extern struct my_timer_unit_info ut_timer; - -/**************************************************************//** -Function pointer to point selected timer function. -@return timer current value */ -extern ulonglong (*ut_timer_now)(void); - -/**************************************************************//** -Sets up the data required for use of my_timer_* functions. -Selects the best timer by high frequency, and tight resolution. -Points my_timer_now() to the selected timer function. -Initializes my_timer struct to contain the info for selected timer.*/ -UNIV_INTERN -void ut_init_timer(void); - -/**************************************************************//** -Convert native timer units in a ulonglong into microseconds in a double -@return time in microseconds */ -UNIV_INLINE -double -ut_timer_to_microseconds( -/*=====================*/ - ulonglong when); /*!< in: time where to calculate */ -/**************************************************************//** -Convert microseconds in a double to native timer units in a ulonglong -@return time in microseconds */ -UNIV_INLINE -ulonglong -ut_microseconds_to_timer( -/*=====================*/ - ulonglong when); /*!< in: time where to calculate */ - -#include "ut0timer.ic" - -#endif diff --git a/storage/innobase/include/ut0timer.ic b/storage/innobase/include/ut0timer.ic deleted file mode 100644 index 26cf0bd2fbe..00000000000 --- a/storage/innobase/include/ut0timer.ic +++ /dev/null @@ -1,56 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2013, 2014, Facebook, Inc. All Rights Reserved. -Copyright (c) 2014, 2018, MariaDB Corporation. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/********************************************************************//** -@file include/ut0timer.ic -Timer routines - -Created 30/07/2014 Jan Lindström jan.lindstrom@skysql.com -modified from https://github.com/facebook/mysql-5.6/commit/c75a413edeb96eb99bf11d7269bdfea06f96d6b6 -*************************************************************************/ - -/**************************************************************//** -Convert native timer units in a ulonglong into microseconds in a double -@return time in microseconds */ -UNIV_INLINE -double -ut_timer_to_microseconds( -/*=====================*/ - ulonglong when) /*!< in: time where to calculate */ -{ - double ret = (double)(when); - ret *= 1000000.0; - ret /= (double)(ut_timer.frequency); - return ret; -} - -/**************************************************************//** -Convert microseconds in a double to native timer units in a ulonglong -@return time in microseconds */ -UNIV_INLINE -ulonglong -ut_microseconds_to_timer( -/*=====================*/ - ulonglong when) /*!< in: time where to calculate */ -{ - double ret = (double)when; - ret *= (double)(ut_timer.frequency); - ret /= 1000000.0; - return (ulonglong)ret; -} diff --git a/storage/innobase/include/ut0ut.h b/storage/innobase/include/ut0ut.h index a063399849d..410d2ead738 100644 --- a/storage/innobase/include/ut0ut.h +++ b/storage/innobase/include/ut0ut.h @@ -50,25 +50,6 @@ Created 1/20/1994 Heikki Tuuri /** Index name prefix in fast index creation, as a string constant */ #define TEMP_INDEX_PREFIX_STR "\377" -/** Time stamp */ -typedef time_t ib_time_t; - -/*********************************************************************//** -Delays execution for at most max_wait_us microseconds or returns earlier -if cond becomes true. -@param cond in: condition to wait for; evaluated every 2 ms -@param max_wait_us in: maximum delay to wait, in microseconds */ -# define UT_WAIT_FOR(cond, max_wait_us) \ -do { \ - uintmax_t start_us; \ - start_us = ut_time_us(NULL); \ - while (!(cond) \ - && ut_time_us(NULL) - start_us < (max_wait_us)) {\ - \ - os_thread_sleep(2000 /* 2 ms */); \ - } \ -} while (0) - #define ut_max std::max #define ut_min std::min @@ -165,44 +146,6 @@ ut_2_power_up( MY_ATTRIBUTE((const)); /**********************************************************//** -Returns system time. We do not specify the format of the time returned: -the only way to manipulate it is to use the function ut_difftime. -@return system time */ -ib_time_t -ut_time(void); -/*=========*/ - -/**********************************************************//** -Returns system time. -Upon successful completion, the value 0 is returned; otherwise the -value -1 is returned and the global variable errno is set to indicate the -error. -@return 0 on success, -1 otherwise */ -int -ut_usectime( -/*========*/ - ulint* sec, /*!< out: seconds since the Epoch */ - ulint* ms); /*!< out: microseconds since the Epoch+*sec */ - -/**********************************************************//** -Returns the number of microseconds since epoch. Similar to -time(3), the return value is also stored in *tloc, provided -that tloc is non-NULL. -@return us since epoch */ -uintmax_t -ut_time_us( -/*=======*/ - uintmax_t* tloc); /*!< out: us since epoch, if non-NULL */ -/**********************************************************//** -Returns the number of milliseconds since some epoch. The -value may wrap around. It should only be used for heuristic -purposes. -@return ms since epoch */ -ulint -ut_time_ms(void); -/*============*/ - -/**********************************************************//** Returns the number of milliseconds since some epoch. The value may wrap around. It should only be used for heuristic purposes. @@ -210,16 +153,6 @@ purposes. ulint ut_time_ms(void); /*============*/ - -/**********************************************************//** -Returns the difference of two times in seconds. -@return time2 - time1 expressed in seconds */ -double -ut_difftime( -/*========*/ - ib_time_t time2, /*!< in: time */ - ib_time_t time1); /*!< in: time */ - #endif /* !UNIV_INNOCHECKSUM */ /** Determine how many bytes (groups of 8 bits) are needed to diff --git a/storage/innobase/include/ut0wqueue.h b/storage/innobase/include/ut0wqueue.h index 008ea2a70dd..6a096a36894 100644 --- a/storage/innobase/include/ut0wqueue.h +++ b/storage/innobase/include/ut0wqueue.h @@ -84,7 +84,7 @@ ib_wqueue_timedwait( /*================*/ /* out: work item or NULL on timeout*/ ib_wqueue_t* wq, /* in: work queue */ - ib_time_t wait_in_usecs); /* in: wait time in micro seconds */ + ulint wait_in_usecs); /* in: wait time in micro seconds */ /******************************************************************** Return first item on work queue or NULL if queue is empty diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc index 781fb6dabb4..21ff8097af7 100644 --- a/storage/innobase/lock/lock0lock.cc +++ b/storage/innobase/lock/lock0lock.cc @@ -72,44 +72,39 @@ extern "C" void thd_rpl_deadlock_check(MYSQL_THD thd, MYSQL_THD other_thd); extern "C" int thd_need_wait_reports(const MYSQL_THD thd); extern "C" int thd_need_ordering_with(const MYSQL_THD thd, const MYSQL_THD other_thd); -/** Print info of a table lock. +/** Pretty-print a table lock. @param[in,out] file output stream @param[in] lock table lock */ -static -void -lock_table_print(FILE* file, const lock_t* lock); +static void lock_table_print(FILE* file, const lock_t* lock); -/** Print info of a record lock. +/** Pretty-print a record lock. @param[in,out] file output stream -@param[in] lock record lock */ -static -void -lock_rec_print(FILE* file, const lock_t* lock); +@param[in] lock record lock +@param[in,out] mtr mini-transaction for accessing the record */ +static void lock_rec_print(FILE* file, const lock_t* lock, mtr_t& mtr); /** Deadlock checker. */ class DeadlockChecker { public: - /** Checks if a joining lock request results in a deadlock. If - a deadlock is found this function will resolve the deadlock - by choosing a victim transaction and rolling it back. It - will attempt to resolve all deadlocks. The returned transaction - id will be the joining transaction id or 0 if some other - transaction was chosen as a victim and rolled back or no - deadlock found. - - @param lock lock the transaction is requesting - @param trx transaction requesting the lock - - @return id of transaction chosen as victim or 0 */ - static const trx_t* check_and_resolve( - const lock_t* lock, - trx_t* trx); + /** Check if a joining lock request results in a deadlock. + If a deadlock is found, we will resolve the deadlock by + choosing a victim transaction and rolling it back. + We will attempt to resolve all deadlocks. + + @param[in] lock the lock request + @param[in,out] trx transaction requesting the lock + + @return trx if it was chosen as victim + @retval NULL if another victim was chosen, + or there is no deadlock (any more) */ + static const trx_t* check_and_resolve(const lock_t* lock, trx_t* trx); private: /** Do a shallow copy. Default destructor OK. @param trx the start transaction (start node) @param wait_lock lock that a transaction wants - @param mark_start visited node counter */ + @param mark_start visited node counter + @param report_waiters whether to call thd_rpl_deadlock_check() */ DeadlockChecker( const trx_t* trx, const lock_t* wait_lock, @@ -751,11 +746,12 @@ lock_rec_has_to_wait( thread, we need to look at trx ordering and lock types */ if (wsrep_thd_is_BF(trx->mysql_thd, FALSE) && wsrep_thd_is_BF(lock2->trx->mysql_thd, TRUE)) { + mtr_t mtr; if (wsrep_debug) { ib::info() << "BF-BF lock conflict, locking: " << for_locking; - lock_rec_print(stderr, lock2); + lock_rec_print(stderr, lock2, mtr); ib::info() << " SQL1: " << wsrep_thd_query(trx->mysql_thd) << " SQL2: " @@ -777,7 +773,7 @@ lock_rec_has_to_wait( << " locked " << wsrep_thd_transaction_state_str( lock2->trx->mysql_thd); - lock_rec_print(stderr, lock2); + lock_rec_print(stderr, lock2, mtr); ib::info() << " SQL1: " << wsrep_thd_query(trx->mysql_thd) << " SQL2: " @@ -1100,6 +1096,7 @@ wsrep_kill_victim( } my_bool bf_other = wsrep_thd_is_BF(lock->trx->mysql_thd, TRUE); + mtr_t mtr; if ((!bf_other) || (wsrep_thd_order_before( @@ -1127,7 +1124,7 @@ wsrep_kill_victim( ib::info() << "*** WAITING FOR THIS LOCK TO BE GRANTED:"; if (lock_get_type(lock) == LOCK_REC) { - lock_rec_print(stderr, lock); + lock_rec_print(stderr, lock, mtr); } else { lock_table_print(stderr, lock); } @@ -1293,6 +1290,7 @@ wsrep_print_wait_locks( lock_t* c_lock) /* conflicting lock to print */ { if (wsrep_debug && c_lock->trx->lock.wait_lock != c_lock) { + mtr_t mtr; ib::info() << "WSREP: c_lock != wait lock"; ib::info() << " SQL: " << wsrep_thd_query(c_lock->trx->mysql_thd); @@ -1300,13 +1298,14 @@ wsrep_print_wait_locks( if (lock_get_type_low(c_lock) & LOCK_TABLE) { lock_table_print(stderr, c_lock); } else { - lock_rec_print(stderr, c_lock); + lock_rec_print(stderr, c_lock, mtr); } if (lock_get_type_low(c_lock->trx->lock.wait_lock) & LOCK_TABLE) { lock_table_print(stderr, c_lock->trx->lock.wait_lock); } else { - lock_rec_print(stderr, c_lock->trx->lock.wait_lock); + lock_rec_print(stderr, c_lock->trx->lock.wait_lock, + mtr); } } } @@ -1519,11 +1518,7 @@ If only one of them is a wait lock, it has lower priority. If either is a high priority transaction, the lock has higher priority. Otherwise, the one with an older transaction has higher priority. @returns true if lock1 has higher priority, false otherwise. */ -static -bool -has_higher_priority( - lock_t *lock1, - lock_t *lock2) +static bool has_higher_priority(lock_t *lock1, lock_t *lock2) { if (lock1 == NULL) { return false; @@ -1730,10 +1725,7 @@ lock_rec_enqueue_waiting( lock_prdt_set_prdt(lock, prdt); } - if ( -#ifdef UNIV_DEBUG - const trx_t* victim = -#endif + if (ut_d(const trx_t* victim =) DeadlockChecker::check_and_resolve(lock, trx)) { ut_ad(victim == trx); lock_reset_lock_and_trx_wait(lock); @@ -1757,7 +1749,7 @@ lock_rec_enqueue_waiting( trx->lock.que_state = TRX_QUE_LOCK_WAIT; trx->lock.was_chosen_as_deadlock_victim = false; - trx->lock.wait_started = ut_time(); + trx->lock.wait_started = time(NULL); ut_a(que_thr_stop(thr)); @@ -2067,12 +2059,13 @@ lock_rec_has_to_wait_in_queue( if (wsrep_thd_is_BF(wait_lock->trx->mysql_thd, FALSE) && wsrep_thd_is_BF(lock->trx->mysql_thd, TRUE)) { if (wsrep_debug) { + mtr_t mtr; ib::info() << "WSREP: waiting BF trx: " << ib::hex(wait_lock->trx->id) << " query: " << wsrep_thd_query(wait_lock->trx->mysql_thd); - lock_rec_print(stderr, wait_lock); + lock_rec_print(stderr, wait_lock, mtr); ib::info() << "WSREP: do not wait another BF trx: " << ib::hex(lock->trx->id) << " query: " << wsrep_thd_query(lock->trx->mysql_thd); - lock_rec_print(stderr, lock); + lock_rec_print(stderr, lock, mtr); } /* don't wait for another BF lock */ continue; @@ -3770,7 +3763,7 @@ lock_table_enqueue_waiting( ); const trx_t* victim_trx = - DeadlockChecker::check_and_resolve(lock, trx); + DeadlockChecker::check_and_resolve(lock, trx); if (victim_trx != 0) { ut_ad(victim_trx == trx); @@ -3791,7 +3784,7 @@ lock_table_enqueue_waiting( trx->lock.que_state = TRX_QUE_LOCK_WAIT; - trx->lock.wait_started = ut_time(); + trx->lock.wait_started = time(NULL); trx->lock.was_chosen_as_deadlock_victim = false; ut_a(que_thr_stop(thr)); @@ -4429,20 +4422,14 @@ lock_table_print(FILE* file, const lock_t* lock) putc('\n', file); } -/** Print info of a record lock. +/** Pretty-print a record lock. @param[in,out] file output stream -@param[in] lock record lock */ -static -void -lock_rec_print(FILE* file, const lock_t* lock) +@param[in] lock record lock +@param[in,out] mtr mini-transaction for accessing the record */ +static void lock_rec_print(FILE* file, const lock_t* lock, mtr_t& mtr) { ulint space; ulint page_no; - mtr_t mtr; - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - rec_offs_init(offsets_); ut_ad(lock_mutex_own()); ut_a(lock_get_type_low(lock) == LOCK_REC); @@ -4482,13 +4469,16 @@ lock_rec_print(FILE* file, const lock_t* lock) fputs(" waiting", file); } - mtr_start(&mtr); - putc('\n', file); - const buf_block_t* block; + mem_heap_t* heap = NULL; + ulint offsets_[REC_OFFS_NORMAL_SIZE]; + ulint* offsets = offsets_; + rec_offs_init(offsets_); - block = buf_page_try_get(page_id_t(space, page_no), &mtr); + mtr.start(); + const buf_block_t* block = buf_page_try_get(page_id_t(space, page_no), + &mtr); for (ulint i = 0; i < lock_rec_get_n_bits(lock); ++i) { @@ -4517,9 +4507,9 @@ lock_rec_print(FILE* file, const lock_t* lock) putc('\n', file); } - mtr_commit(&mtr); + mtr.commit(); - if (heap) { + if (UNIV_LIKELY_NULL(heap)) { mem_heap_free(heap); } } @@ -4622,11 +4612,10 @@ lock_print_info_summary( /** Prints transaction lock wait and MVCC state. @param[in,out] file file where to print -@param[in] trx transaction */ +@param[in] trx transaction +@param[in] now current time */ void -lock_trx_print_wait_and_mvcc_state( - FILE* file, - const trx_t* trx) +lock_trx_print_wait_and_mvcc_state(FILE* file, const trx_t* trx, time_t now) { fprintf(file, "---"); @@ -4646,10 +4635,11 @@ lock_trx_print_wait_and_mvcc_state( fprintf(file, "------- TRX HAS BEEN WAITING %lu SEC" " FOR THIS LOCK TO BE GRANTED:\n", - (ulong) difftime(ut_time(), trx->lock.wait_started)); + (ulong) difftime(now, trx->lock.wait_started)); if (lock_get_type_low(trx->lock.wait_lock) == LOCK_REC) { - lock_rec_print(file, trx->lock.wait_lock); + mtr_t mtr; + lock_rec_print(file, trx->lock.wait_lock, mtr); } else { lock_table_print(file, trx->lock.wait_lock); } @@ -4667,6 +4657,7 @@ lock_trx_print_locks( FILE* file, /*!< in/out: File to write */ const trx_t* trx) /*!< in: current transaction */ { + mtr_t mtr; uint32_t i= 0; /* Iterate over the transaction's locks. */ for (lock_t *lock = UT_LIST_GET_FIRST(trx->lock.trx_locks); @@ -4674,7 +4665,7 @@ lock_trx_print_locks( lock = UT_LIST_GET_NEXT(trx_locks, lock)) { if (lock_get_type_low(lock) == LOCK_REC) { - lock_rec_print(file, lock); + lock_rec_print(file, lock, mtr); } else { ut_ad(lock_get_type_low(lock) & LOCK_TABLE); @@ -4695,20 +4686,21 @@ lock_trx_print_locks( /** Functor to display all transactions */ struct lock_print_info { - lock_print_info(FILE* file) : file(file) {} + lock_print_info(FILE* file, time_t now) : file(file), now(now) {} void operator()(const trx_t* trx) const { ut_ad(mutex_own(&trx_sys.mutex)); if (trx == purge_sys.query->trx) return; - lock_trx_print_wait_and_mvcc_state(file, trx); + lock_trx_print_wait_and_mvcc_state(file, trx, now); if (trx->will_lock && srv_print_innodb_lock_monitor) lock_trx_print_locks(file, trx); } FILE* const file; + const time_t now; }; /*********************************************************************//** @@ -4723,9 +4715,10 @@ lock_print_info_all_transactions( ut_ad(lock_mutex_own()); fprintf(file, "LIST OF TRANSACTIONS FOR EACH SESSION:\n"); + const time_t now = time(NULL); mutex_enter(&trx_sys.mutex); - ut_list_map(trx_sys.trx_list, lock_print_info(file)); + ut_list_map(trx_sys.trx_list, lock_print_info(file, now)); mutex_exit(&trx_sys.mutex); lock_mutex_exit(); @@ -6642,10 +6635,11 @@ DeadlockChecker::print(const lock_t* lock) ut_ad(lock_mutex_own()); if (lock_get_type_low(lock) == LOCK_REC) { - lock_rec_print(lock_latest_err_file, lock); + mtr_t mtr; + lock_rec_print(lock_latest_err_file, lock, mtr); if (srv_print_all_deadlocks) { - lock_rec_print(stderr, lock); + lock_rec_print(stderr, lock, mtr); } } else { lock_table_print(lock_latest_err_file, lock); @@ -6940,7 +6934,7 @@ DeadlockChecker::search() @param trx transaction rolled back @param lock lock trx wants */ void -DeadlockChecker::rollback_print(const trx_t* trx, const lock_t* lock) +DeadlockChecker::rollback_print(const trx_t* trx, const lock_t* lock) { ut_ad(lock_mutex_own()); @@ -6985,16 +6979,17 @@ DeadlockChecker::trx_rollback() trx_mutex_exit(trx); } -/** Checks if a joining lock request results in a deadlock. If a deadlock is -found this function will resolve the deadlock by choosing a victim transaction -and rolling it back. It will attempt to resolve all deadlocks. The returned -transaction id will be the joining transaction instance or NULL if some other -transaction was chosen as a victim and rolled back or no deadlock found. +/** Check if a joining lock request results in a deadlock. +If a deadlock is found, we will resolve the deadlock by +choosing a victim transaction and rolling it back. +We will attempt to resolve all deadlocks. -@param[in] lock lock the transaction is requesting -@param[in,out] trx transaction requesting the lock +@param[in] lock the lock request +@param[in,out] trx transaction requesting the lock -@return transaction instanace chosen as victim or 0 */ +@return trx if it was chosen as victim +@retval NULL if another victim was chosen, +or there is no deadlock (any more) */ const trx_t* DeadlockChecker::check_and_resolve(const lock_t* lock, trx_t* trx) { diff --git a/storage/innobase/lock/lock0wait.cc b/storage/innobase/lock/lock0wait.cc index ead9815ac02..94104172577 100644 --- a/storage/innobase/lock/lock0wait.cc +++ b/storage/innobase/lock/lock0wait.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2014, 2018, MariaDB Corporation. +Copyright (c) 2014, 2019, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -58,7 +58,7 @@ lock_wait_table_print(void) (ulong) slot->in_use, (ulong) slot->suspended, slot->wait_timeout, - (ulong) difftime(ut_time(), slot->suspend_time)); + (ulong) difftime(time(NULL), slot->suspend_time)); } } @@ -155,7 +155,7 @@ lock_wait_table_reserve_slot( os_event_reset(slot->event); slot->suspended = TRUE; - slot->suspend_time = ut_time(); + slot->suspend_time = time(NULL); slot->wait_timeout = wait_timeout; if (slot == lock_sys.last_slot) { @@ -231,13 +231,8 @@ lock_wait_suspend_thread( user OS thread */ { srv_slot_t* slot; - double wait_time; trx_t* trx; ibool was_declared_inside_innodb; - int64_t start_time = 0; - int64_t finish_time; - ulint sec; - ulint ms; ulong lock_wait_timeout; trx = thr_get_trx(thr); @@ -283,15 +278,12 @@ lock_wait_suspend_thread( lock_wait_mutex_exit(); trx_mutex_exit(trx); + ulonglong start_time = 0; + if (thr->lock_state == QUE_THR_LOCK_ROW) { srv_stats.n_lock_wait_count.inc(); srv_stats.n_lock_wait_current_count++; - - if (ut_usectime(&sec, &ms) == -1) { - start_time = -1; - } else { - start_time = int64_t(sec) * 1000000 + int64_t(ms); - } + start_time = my_interval_timer(); } ulint lock_type = ULINT_UNDEFINED; @@ -371,28 +363,23 @@ lock_wait_suspend_thread( row_mysql_freeze_data_dictionary(trx); } - wait_time = ut_difftime(ut_time(), slot->suspend_time); + double wait_time = difftime(time(NULL), slot->suspend_time); /* Release the slot for others to use */ lock_wait_table_release_slot(slot); if (thr->lock_state == QUE_THR_LOCK_ROW) { - int64_t diff_time; - if (start_time == -1 || ut_usectime(&sec, &ms) == -1) { - finish_time = -1; - diff_time = 0; - } else { - finish_time = int64_t(sec) * 1000000 + int64_t(ms); - diff_time = std::max<int64_t>( - 0, finish_time - start_time); - srv_stats.n_lock_wait_time.add(diff_time); + const ulonglong finish_time = my_interval_timer(); + if (finish_time >= start_time) { + const ulint diff_time = static_cast<ulint> + ((finish_time - start_time) / 1000); + srv_stats.n_lock_wait_time.add(diff_time); /* Only update the variable if we successfully retrieved the start and finish times. See Bug#36819. */ - if (ulint(diff_time) > lock_sys.n_lock_max_wait_time) { - lock_sys.n_lock_max_wait_time - = ulint(diff_time); + if (diff_time > lock_sys.n_lock_max_wait_time) { + lock_sys.n_lock_max_wait_time = diff_time; } /* Record the lock wait time for this thread */ thd_storage_lock_wait(trx->mysql_thd, diff_time); @@ -468,19 +455,12 @@ lock_wait_check_and_cancel( const srv_slot_t* slot) /*!< in: slot reserved by a user thread when the wait started */ { - trx_t* trx; - double wait_time; - ib_time_t suspend_time = slot->suspend_time; - ut_ad(lock_wait_mutex_own()); - ut_ad(slot->in_use); - ut_ad(slot->suspended); - wait_time = ut_difftime(ut_time(), suspend_time); - - trx = thr_get_trx(slot->thr); + double wait_time = difftime(time(NULL), slot->suspend_time); + trx_t* trx = thr_get_trx(slot->thr); if (trx_is_interrupted(trx) || (slot->wait_timeout < 100000000 @@ -515,7 +495,6 @@ lock_wait_check_and_cancel( trx_mutex_exit(trx); } - } /*********************************************************************//** diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc index 5cb27a4a68c..52edbcf238f 100644 --- a/storage/innobase/log/log0recv.cc +++ b/storage/innobase/log/log0recv.cc @@ -60,7 +60,7 @@ Created 9/20/1997 Heikki Tuuri /** Log records are stored in the hash table in chunks at most of this size; this must be less than srv_page_size as it is stored in the buffer pool */ -#define RECV_DATA_BLOCK_SIZE (MEM_MAX_ALLOC_IN_BUF - sizeof(recv_data_t)) +#define RECV_DATA_BLOCK_SIZE (MEM_MAX_ALLOC_IN_BUF - sizeof(recv_data_t) - REDZONE_SIZE) /** Read-ahead area in applying log records to file pages */ #define RECV_READ_AHEAD_AREA 32 @@ -864,7 +864,7 @@ void recv_sys_t::create() addr_hash = hash_create(size / 512); n_addrs = 0; - progress_time = ut_time(); + progress_time = time(NULL); recv_max_page_lsn = 0; memset(truncated_undo_spaces, 0, sizeof truncated_undo_spaces); @@ -1006,7 +1006,7 @@ fail: } } - if (recv_sys.report(ut_time())) { + if (recv_sys.report(time(NULL))) { ib::info() << "Read redo log up to LSN=" << *start_lsn; service_manager_extend_timeout(INNODB_EXTEND_TIMEOUT_INTERVAL, "Read redo log up to LSN=" LSN_PF, @@ -2079,7 +2079,7 @@ static void recv_recover_page(buf_block_t* block, mtr_t& mtr, mtr.discard_modifications(); mtr.commit(); - ib_time_t time = ut_time(); + time_t now = time(NULL); mutex_enter(&recv_sys.mutex); @@ -2092,7 +2092,7 @@ static void recv_recover_page(buf_block_t* block, mtr_t& mtr, ut_a(recv_sys.n_addrs > 0); if (ulint n = --recv_sys.n_addrs) { - if (recv_sys.report(time)) { + if (recv_sys.report(now)) { ib::info() << "To recover: " << n << " pages from log"; service_manager_extend_timeout( INNODB_EXTEND_TIMEOUT_INTERVAL, "To recover: " ULINTPF " pages from log", n); diff --git a/storage/innobase/os/os0event.cc b/storage/innobase/os/os0event.cc index d6dd137f692..0676ba5f6c1 100644 --- a/storage/innobase/os/os0event.cc +++ b/storage/innobase/os/os0event.cc @@ -1,6 +1,7 @@ /***************************************************************************** Copyright (c) 2013, 2015, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2019, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -25,13 +26,11 @@ Created 2012-09-23 Sunny Bains #include "os0event.h" #include "ut0mutex.h" +#include <my_sys.h> #ifdef _WIN32 #include <windows.h> #include <synchapi.h> -#endif /* _WIN32 */ - -#ifdef _WIN32 /** Native condition variable. */ typedef CONDITION_VARIABLE os_cond_t; #else @@ -358,21 +357,9 @@ os_event::wait_time_low( struct timespec abstime; if (time_in_usec != OS_SYNC_INFINITE_TIME) { - struct timeval tv; - int ret; - ulint sec; - ulint usec; - - ret = ut_usectime(&sec, &usec); - ut_a(ret == 0); - - tv.tv_sec = sec; - tv.tv_usec = usec; - - tv.tv_usec += time_in_usec; - - abstime.tv_sec = tv.tv_sec + tv.tv_usec / 1000000; - abstime.tv_nsec = tv.tv_usec % 1000000 * 1000; + ulonglong usec = ulonglong(time_in_usec) + my_hrtime().val; + abstime.tv_sec = usec / 1000000; + abstime.tv_nsec = (usec % 1000000) * 1000; } else { abstime.tv_nsec = 999999999; abstime.tv_sec = (time_t) ULINT_MAX; diff --git a/storage/innobase/os/os0file.cc b/storage/innobase/os/os0file.cc index 594e9f0aeb4..ceb6c69aec8 100644 --- a/storage/innobase/os/os0file.cc +++ b/storage/innobase/os/os0file.cc @@ -5885,7 +5885,7 @@ AIO::start( os_aio_validate(); - os_last_printout = ut_time(); + os_last_printout = time(NULL); if (srv_use_native_aio) { return(true); @@ -6141,7 +6141,7 @@ AIO::reserve_slot( } slot->is_reserved = true; - slot->reservation_time = ut_time(); + slot->reservation_time = time(NULL); slot->m1 = m1; slot->m2 = m2; slot->file = file; @@ -6951,7 +6951,7 @@ private: { ulint age; - age = (ulint) difftime(ut_time(), slot->reservation_time); + age = (ulint) difftime(time(NULL), slot->reservation_time); if ((age >= 2 && age > m_oldest) || (age >= 2 @@ -7353,7 +7353,7 @@ os_aio_print(FILE* file) AIO::print_all(file); putc('\n', file); - current_time = ut_time(); + current_time = time(NULL); time_elapsed = 0.001 + difftime(current_time, os_last_printout); fprintf(file, @@ -7419,7 +7419,7 @@ os_aio_refresh_stats() os_bytes_read_since_printout = 0; - os_last_printout = ut_time(); + os_last_printout = time(NULL); } /** Checks that all slots in the system have been freed, that is, there are diff --git a/storage/innobase/page/page0cur.cc b/storage/innobase/page/page0cur.cc index 97a485810ec..ded90c1c4f8 100644 --- a/storage/innobase/page/page0cur.cc +++ b/storage/innobase/page/page0cur.cc @@ -41,7 +41,7 @@ number between 0 and 2^64-1 inclusive. The formula and the constants being used are: X[n+1] = (a * X[n] + c) mod m where: -X[0] = ut_time_us(NULL) +X[0] = my_interval_timer() a = 1103515245 (3^5 * 5 * 7 * 129749) c = 12345 (3 * 5 * 823) m = 18446744073709551616 (2^64) @@ -54,12 +54,10 @@ page_cur_lcg_prng(void) { #define LCG_a 1103515245 #define LCG_c 12345 - static ib_uint64_t lcg_current = 0; - static ibool initialized = FALSE; + static uint64_t lcg_current; - if (!initialized) { - lcg_current = (ib_uint64_t) ut_time_us(NULL); - initialized = TRUE; + if (!lcg_current) { + lcg_current = my_interval_timer(); } /* no need to "% 2^64" explicitly because lcg_current is diff --git a/storage/innobase/page/page0zip.cc b/storage/innobase/page/page0zip.cc index c1bb2f2c37d..243f44ad118 100644 --- a/storage/innobase/page/page0zip.cc +++ b/storage/innobase/page/page0zip.cc @@ -1258,7 +1258,7 @@ page_zip_compress( ulint n_blobs = 0; byte* storage; /* storage of uncompressed columns */ - uintmax_t usec = ut_time_us(NULL); + const ulonglong ns = my_interval_timer(); #ifdef PAGE_ZIP_COMPRESS_DBG FILE* logfile = NULL; #endif @@ -1509,7 +1509,7 @@ err_exit: dict_index_zip_failure(index); } - uintmax_t time_diff = ut_time_us(NULL) - usec; + const uint64_t time_diff = (my_interval_timer() - ns) / 1000; page_zip_stat[page_zip->ssize - 1].compressed_usec += time_diff; if (cmp_per_index_enabled) { @@ -1575,7 +1575,7 @@ err_exit: fclose(logfile); } #endif /* PAGE_ZIP_COMPRESS_DBG */ - uintmax_t time_diff = ut_time_us(NULL) - usec; + const uint64_t time_diff = (my_interval_timer() - ns) / 1000; page_zip_stat[page_zip->ssize - 1].compressed_ok++; page_zip_stat[page_zip->ssize - 1].compressed_usec += time_diff; if (cmp_per_index_enabled) { @@ -3202,13 +3202,13 @@ page_zip_decompress( page header fields that should not change after page creation */ { - uintmax_t usec = ut_time_us(NULL); + const ulonglong ns = my_interval_timer(); if (!page_zip_decompress_low(page_zip, page, all)) { return(FALSE); } - uintmax_t time_diff = ut_time_us(NULL) - usec; + const uint64_t time_diff = (my_interval_timer() - ns) / 1000; page_zip_stat[page_zip->ssize - 1].decompressed++; page_zip_stat[page_zip->ssize - 1].decompressed_usec += time_diff; diff --git a/storage/innobase/row/row0ftsort.cc b/storage/innobase/row/row0ftsort.cc index 8a7485e1064..e9076bfae50 100644 --- a/storage/innobase/row/row0ftsort.cc +++ b/storage/innobase/row/row0ftsort.cc @@ -159,25 +159,26 @@ row_merge_create_fts_sort_index( return(new_index); } -/*********************************************************************//** -Initialize FTS parallel sort structures. -@return TRUE if all successful */ -ibool + +/** Initialize FTS parallel sort structures. +@param[in] trx transaction +@param[in,out] dup descriptor of FTS index being created +@param[in,out] new_table table where indexes are created +@param[in] opt_doc_id_size whether to use 4 bytes instead of 8 bytes + integer to store Doc ID during sort +@param[in] old_zip_size page size of the old table during alter +@param[out] psort parallel sort info to be instantiated +@param[out] merge parallel merge info to be instantiated +@return true if all successful */ +bool row_fts_psort_info_init( -/*====================*/ - trx_t* trx, /*!< in: transaction */ - row_merge_dup_t* dup, /*!< in,own: descriptor of - FTS index being created */ - const dict_table_t* new_table,/*!< in: table on which indexes are - created */ - ibool opt_doc_id_size, - /*!< in: whether to use 4 bytes - instead of 8 bytes integer to - store Doc ID during sort */ - fts_psort_t** psort, /*!< out: parallel sort info to be - instantiated */ - fts_psort_t** merge) /*!< out: parallel merge info - to be instantiated */ + trx_t* trx, + row_merge_dup_t*dup, + dict_table_t* new_table, + bool opt_doc_id_size, + ulint old_zip_size, + fts_psort_t** psort, + fts_psort_t** merge) { ulint i; ulint j; @@ -187,6 +188,7 @@ row_fts_psort_info_init( ulint block_size; ibool ret = TRUE; bool encrypted = false; + ut_ad(ut_is_2pow(old_zip_size)); block_size = 3 * srv_sort_buf_size; @@ -209,7 +211,8 @@ row_fts_psort_info_init( } common_info->dup = dup; - common_info->new_table = (dict_table_t*) new_table; + common_info->new_table = new_table; + common_info->old_zip_size = old_zip_size; common_info->trx = trx; common_info->all_info = psort_info; common_info->sort_event = os_event_create(0); @@ -803,7 +806,7 @@ DECLARE_THREAD(fts_parallel_tokenization)( block = psort_info->merge_block; crypt_block = psort_info->crypt_block; - const ulint zip_size = table->space->zip_size(); + const ulint zip_size = psort_info->psort_common->old_zip_size; row_merge_fts_get_next_doc_item(psort_info, &doc_item); diff --git a/storage/innobase/row/row0ins.cc b/storage/innobase/row/row0ins.cc index c965d51a6d1..bd894d06541 100644 --- a/storage/innobase/row/row0ins.cc +++ b/storage/innobase/row/row0ins.cc @@ -1430,7 +1430,8 @@ row_ins_foreign_check_on_constraint( cascade->state = UPD_NODE_UPDATE_CLUSTERED; #ifdef WITH_WSREP - err = wsrep_append_foreign_key(trx, foreign, clust_rec, clust_index, + err = wsrep_append_foreign_key(trx, foreign, cascade->pcur->old_rec, + clust_index, FALSE, WSREP_SERVICE_KEY_EXCLUSIVE); if (err != DB_SUCCESS) { fprintf(stderr, @@ -1816,6 +1817,10 @@ row_ins_check_foreign_constraint( && wsrep_protocol_version < 4) ? WSREP_SERVICE_KEY_SHARED : WSREP_SERVICE_KEY_REFERENCE); + if (err != DB_SUCCESS) { + fprintf(stderr, + "WSREP: foreign key append failed: %d\n", err); + } #endif /* WITH_WSREP */ goto end_scan; } else if (foreign->type != 0) { diff --git a/storage/innobase/row/row0merge.cc b/storage/innobase/row/row0merge.cc index e62a41e4275..55d3292b9e6 100644 --- a/storage/innobase/row/row0merge.cc +++ b/storage/innobase/row/row0merge.cc @@ -2567,6 +2567,7 @@ write_buffers: BTR_SEARCH_LEAF, &pcur, &mtr); buf = row_merge_buf_empty(buf); + merge_buf[i] = buf; /* Restart the outer loop on the record. We did not insert it into any index yet. */ @@ -2692,6 +2693,7 @@ write_buffers: } } merge_buf[i] = row_merge_buf_empty(buf); + buf = merge_buf[i]; if (UNIV_LIKELY(row != NULL)) { /* Try writing the record again, now @@ -2869,8 +2871,7 @@ wait_again: if (max_doc_id && err == DB_SUCCESS) { /* Sync fts cache for other fts indexes to keep all fts indexes consistent in sync_doc_id. */ - err = fts_sync_table(const_cast<dict_table_t*>(new_table), - false, true, false); + err = fts_sync_table(const_cast<dict_table_t*>(new_table)); if (err == DB_SUCCESS) { fts_update_next_doc_id(NULL, new_table, max_doc_id); @@ -4682,6 +4683,7 @@ row_merge_build_indexes( created */ if (!row_fts_psort_info_init( trx, dup, new_table, opt_doc_id_size, + old_table->space->zip_size(), &psort_info, &merge_info)) { error = DB_CORRUPTION; goto func_exit; diff --git a/storage/innobase/row/row0mysql.cc b/storage/innobase/row/row0mysql.cc index 8e3c8ff8d28..c834fba9b13 100644 --- a/storage/innobase/row/row0mysql.cc +++ b/storage/innobase/row/row0mysql.cc @@ -3412,7 +3412,9 @@ row_drop_table_for_mysql( calling btr_search_drop_page_hash_index() while we hold the InnoDB dictionary lock, we will drop any adaptive hash index entries upfront. */ - bool immune = is_temp_name + const bool immune = is_temp_name + || create_failed + || sqlcom == SQLCOM_CREATE_TABLE || strstr(table->name.m_name, "/FTS"); while (buf_LRU_drop_page_hash_for_tablespace(table)) { diff --git a/storage/innobase/row/row0vers.cc b/storage/innobase/row/row0vers.cc index ac3c5af7d8e..f327dce121b 100644 --- a/storage/innobase/row/row0vers.cc +++ b/storage/innobase/row/row0vers.cc @@ -897,7 +897,8 @@ row_vers_old_has_index_entry( ut_ad(mtr_memo_contains_page_flagged(mtr, rec, MTR_MEMO_PAGE_X_FIX | MTR_MEMO_PAGE_S_FIX)); - ut_ad(!rw_lock_own(&(purge_sys.latch), RW_LOCK_S)); + ut_ad(!rw_lock_own(&purge_sys.latch, RW_LOCK_S)); + ut_ad(also_curr || !vcol_info); clust_index = dict_table_get_first_index(index->table); @@ -964,7 +965,7 @@ row_vers_old_has_index_entry( entry = row_build_index_entry( row, ext, index, heap); if (entry && !dtuple_coll_cmp(ientry, entry)) { - goto safe_to_purge; + goto unsafe_to_purge; } } else { /* Build index entry out of row */ @@ -985,7 +986,7 @@ row_vers_old_has_index_entry( clust_index, clust_offsets, index, ientry, roll_ptr, trx_id, NULL, &vrow, mtr)) { - goto safe_to_purge; + goto unsafe_to_purge; } } clust_offsets = rec_get_offsets(rec, clust_index, NULL, @@ -1018,7 +1019,7 @@ row_vers_old_has_index_entry( a different binary value in a char field, but the collation identifies the old and new value anyway! */ if (entry && !dtuple_coll_cmp(ientry, entry)) { -safe_to_purge: +unsafe_to_purge: mem_heap_free(heap); if (v_heap) { @@ -1058,7 +1059,6 @@ safe_to_purge: if (!prev_version) { /* Versions end here */ -unsafe_to_purge: mem_heap_free(heap); if (v_heap) { @@ -1120,7 +1120,7 @@ unsafe_to_purge: and new value anyway! */ if (entry && !dtuple_coll_cmp(ientry, entry)) { - goto safe_to_purge; + goto unsafe_to_purge; } } diff --git a/storage/innobase/srv/srv0srv.cc b/storage/innobase/srv/srv0srv.cc index 909b208b9d8..6df284b1f15 100644 --- a/storage/innobase/srv/srv0srv.cc +++ b/storage/innobase/srv/srv0srv.cc @@ -1668,8 +1668,9 @@ DECLARE_THREAD(srv_monitor_thread)(void*) pfs_register_thread(srv_monitor_thread_key); #endif /* UNIV_PFS_THREAD */ - srv_last_monitor_time = ut_time(); - last_monitor_time = ut_time(); + current_time = time(NULL); + srv_last_monitor_time = current_time; + last_monitor_time = current_time; mutex_skipped = 0; last_srv_print_monitor = srv_print_innodb_monitor; loop: @@ -1680,12 +1681,12 @@ loop: os_event_wait_time_low(srv_monitor_event, 5000000, sig_count); - current_time = ut_time(); + current_time = time(NULL); time_elapsed = difftime(current_time, last_monitor_time); if (time_elapsed > 15) { - last_monitor_time = ut_time(); + last_monitor_time = current_time; if (srv_print_innodb_monitor) { /* Reset mutex_skipped counter everytime @@ -2022,20 +2023,16 @@ static void srv_shutdown_print_master_pending( /*==============================*/ - ib_time_t* last_print_time, /*!< last time the function + time_t* last_print_time, /*!< last time the function print the message */ ulint n_tables_to_drop, /*!< number of tables to be dropped */ ulint n_bytes_merged) /*!< number of change buffer just merged */ { - ib_time_t current_time; - double time_elapsed; - - current_time = ut_time(); - time_elapsed = ut_difftime(current_time, *last_print_time); + time_t current_time = time(NULL); - if (time_elapsed > 60) { + if (difftime(current_time, *last_print_time) > 60) { *last_print_time = current_time; if (n_tables_to_drop) { @@ -2113,8 +2110,8 @@ void srv_master_do_active_tasks(void) /*============================*/ { - ib_time_t cur_time = ut_time(); - uintmax_t counter_time = ut_time_us(NULL); + time_t cur_time = time(NULL); + ulonglong counter_time = microsecond_interval_timer(); /* First do the tasks that we are suppose to do at each invocation of this function. */ @@ -2144,7 +2141,7 @@ srv_master_do_active_tasks(void) /* Do an ibuf merge */ srv_main_thread_op_info = "doing insert buffer merge"; - counter_time = ut_time_us(NULL); + counter_time = microsecond_interval_timer(); ibuf_merge_in_background(false); MONITOR_INC_TIME_IN_MICRO_SECS( MONITOR_SRV_IBUF_MERGE_MICROSECOND, counter_time); @@ -2209,8 +2206,6 @@ void srv_master_do_idle_tasks(void) /*==========================*/ { - uintmax_t counter_time; - ++srv_main_idle_loops; MONITOR_INC(MONITOR_MASTER_IDLE_LOOPS); @@ -2219,7 +2214,7 @@ srv_master_do_idle_tasks(void) /* ALTER TABLE in MySQL requires on Unix that the table handler can drop tables lazily after there no longer are SELECT queries to them. */ - counter_time = ut_time_us(NULL); + ulonglong counter_time = microsecond_interval_timer(); srv_main_thread_op_info = "doing background drop tables"; row_drop_tables_for_mysql_in_background(); MONITOR_INC_TIME_IN_MICRO_SECS( @@ -2238,7 +2233,7 @@ srv_master_do_idle_tasks(void) log_free_check(); /* Do an ibuf merge */ - counter_time = ut_time_us(NULL); + counter_time = microsecond_interval_timer(); srv_main_thread_op_info = "doing insert buffer merge"; ibuf_merge_in_background(true); MONITOR_INC_TIME_IN_MICRO_SECS( @@ -2291,7 +2286,7 @@ srv_shutdown(bool ibuf_merge) { ulint n_bytes_merged = 0; ulint n_tables_to_drop; - ib_time_t now = ut_time(); + time_t now = time(NULL); do { ut_ad(!srv_read_only_mode); @@ -2429,10 +2424,10 @@ static bool srv_purge_should_exit() uint32_t history_size = trx_sys.rseg_history_len; if (history_size) { #if defined HAVE_SYSTEMD && !defined EMBEDDED_LIBRARY - static ib_time_t progress_time; - ib_time_t time = ut_time(); - if (time - progress_time >= 15) { - progress_time = time; + static time_t progress_time; + time_t now = time(NULL); + if (now - progress_time >= 15) { + progress_time = now; service_manager_extend_timeout( INNODB_EXTEND_TIMEOUT_INTERVAL, "InnoDB: to purge %u transactions", diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc index 985382c69a5..5783bb9ae66 100644 --- a/storage/innobase/srv/srv0start.cc +++ b/storage/innobase/srv/srv0start.cc @@ -47,7 +47,6 @@ Created 2/16/1996 Heikki Tuuri #include "row0ftsort.h" #include "ut0mem.h" -#include "ut0timer.h" #include "mem0mem.h" #include "data0data.h" #include "data0type.h" diff --git a/storage/innobase/sync/sync0arr.cc b/storage/innobase/sync/sync0arr.cc index e6c2220765f..6e858254ad4 100644 --- a/storage/innobase/sync/sync0arr.cc +++ b/storage/innobase/sync/sync0arr.cc @@ -2,7 +2,7 @@ Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, Google Inc. -Copyright (c) 2013, 2018, MariaDB Corporation. +Copyright (c) 2013, 2019, MariaDB Corporation. Portions of this file contain modifications contributed and copyrighted by Google, Inc. Those modifications are gratefully acknowledged and are described @@ -119,8 +119,10 @@ struct sync_cell_t { has not been signalled in the period between the reset and wait call. */ - time_t reservation_time;/*!< time when the thread reserved - the wait cell */ + /** time(NULL) when the wait cell was reserved. + FIXME: sync_array_print_long_waits_low() may display bogus + warnings when the system time is adjusted to the past! */ + time_t reservation_time; }; /* NOTE: It is allowed for a thread to wait for an event allocated for @@ -375,7 +377,7 @@ sync_array_reserve_cell( cell->thread_id = os_thread_get_curr_id(); - cell->reservation_time = ut_time(); + cell->reservation_time = time(NULL); /* Make sure the event is reset and also store the value of signal_count at which the event was reset. */ diff --git a/storage/innobase/trx/trx0i_s.cc b/storage/innobase/trx/trx0i_s.cc index 145af721366..a39fb5d2e95 100644 --- a/storage/innobase/trx/trx0i_s.cc +++ b/storage/innobase/trx/trx0i_s.cc @@ -140,9 +140,8 @@ struct i_s_table_cache_t { struct trx_i_s_cache_t { rw_lock_t rw_lock; /*!< read-write lock protecting the rest of this structure */ - uintmax_t last_read; /*!< last time the cache was read; - measured in microseconds since - epoch */ + ulonglong last_read; /*!< last time the cache was read; + measured in nanoseconds */ ib_mutex_t last_read_mutex;/*!< mutex protecting the last_read member - it is updated inside a shared lock of the @@ -457,7 +456,7 @@ fill_trx_row( ut_ad(lock_mutex_own()); row->trx_id = trx_get_id_for_print(trx); - row->trx_started = (ib_time_t) trx->start_time; + row->trx_started = trx->start_time; row->trx_state = trx_get_que_state_str(trx); row->requested_lock_row = requested_lock_row; ut_ad(requested_lock_row == NULL @@ -466,7 +465,7 @@ fill_trx_row( if (trx->lock.wait_lock != NULL) { ut_a(requested_lock_row != NULL); - row->trx_wait_started = (ib_time_t) trx->lock.wait_started; + row->trx_wait_started = trx->lock.wait_started; } else { ut_a(requested_lock_row == NULL); row->trx_wait_started = 0; @@ -1183,22 +1182,16 @@ add_trx_relevant_locks_to_cache( } /** The minimum time that a cache must not be updated after it has been -read for the last time; measured in microseconds. We use this technique +read for the last time; measured in nanoseconds. We use this technique to ensure that SELECTs which join several INFORMATION SCHEMA tables read the same version of the cache. */ -#define CACHE_MIN_IDLE_TIME_US 100000 /* 0.1 sec */ +#define CACHE_MIN_IDLE_TIME_NS 100000000 /* 0.1 sec */ /*******************************************************************//** Checks if the cache can safely be updated. -@return TRUE if can be updated */ -static -ibool -can_cache_be_updated( -/*=================*/ - trx_i_s_cache_t* cache) /*!< in: cache */ +@return whether the cache can be updated */ +static bool can_cache_be_updated(trx_i_s_cache_t* cache) { - uintmax_t now; - /* Here we read cache->last_read without acquiring its mutex because last_read is only updated when a shared rw lock on the whole cache is being held (see trx_i_s_cache_end_read()) and @@ -1208,13 +1201,7 @@ can_cache_be_updated( ut_ad(rw_lock_own(&cache->rw_lock, RW_LOCK_X)); - now = ut_time_us(NULL); - if (now - cache->last_read > CACHE_MIN_IDLE_TIME_US) { - - return(TRUE); - } - - return(FALSE); + return my_interval_timer() - cache->last_read > CACHE_MIN_IDLE_TIME_NS; } /*******************************************************************//** @@ -1314,8 +1301,7 @@ trx_i_s_possibly_fetch_data_into_cache( lock_mutex_exit(); /* update cache last read time */ - time_t now = ut_time_us(NULL); - cache->last_read = now; + cache->last_read = my_interval_timer(); return(0); } @@ -1405,12 +1391,10 @@ trx_i_s_cache_end_read( /*===================*/ trx_i_s_cache_t* cache) /*!< in: cache */ { - uintmax_t now; - ut_ad(rw_lock_own(&cache->rw_lock, RW_LOCK_S)); /* update cache last read time */ - now = ut_time_us(NULL); + const ulonglong now = my_interval_timer(); mutex_enter(&cache->last_read_mutex); cache->last_read = now; mutex_exit(&cache->last_read_mutex); diff --git a/storage/innobase/trx/trx0purge.cc b/storage/innobase/trx/trx0purge.cc index eeca6ef9286..db0e9c0758c 100644 --- a/storage/innobase/trx/trx0purge.cc +++ b/storage/innobase/trx/trx0purge.cc @@ -137,7 +137,8 @@ purge_graph_build() trx_t* trx = trx_create(); ut_ad(!trx->id); - trx->start_time = ut_time(); + trx->start_time = time(NULL); + trx->start_time_micro = microsecond_interval_timer(); trx->state = TRX_STATE_ACTIVE; trx->op_info = "purge trx"; diff --git a/storage/innobase/trx/trx0roll.cc b/storage/innobase/trx/trx0roll.cc index 310e7982352..61f0ce57777 100644 --- a/storage/innobase/trx/trx0roll.cc +++ b/storage/innobase/trx/trx0roll.cc @@ -726,9 +726,9 @@ static my_bool trx_roll_count_callback(rw_trx_hash_element_t *element, /** Report progress when rolling back a row of a recovered transaction. */ void trx_roll_report_progress() { - ib_time_t time = ut_time(); + time_t now = time(NULL); mutex_enter(&recv_sys.mutex); - bool report = recv_sys.report(time); + bool report = recv_sys.report(now); mutex_exit(&recv_sys.mutex); if (report) { diff --git a/storage/innobase/trx/trx0trx.cc b/storage/innobase/trx/trx0trx.cc index c75d766a48b..ab67e09b992 100644 --- a/storage/innobase/trx/trx0trx.cc +++ b/storage/innobase/trx/trx0trx.cc @@ -610,7 +610,8 @@ trx_resurrect_table_locks( */ static void trx_resurrect(trx_undo_t *undo, trx_rseg_t *rseg, - ib_time_t start_time, uint64_t *rows_to_undo, + time_t start_time, ulonglong start_time_micro, + uint64_t *rows_to_undo, bool is_old_insert) { trx_state_t state; @@ -662,6 +663,7 @@ static void trx_resurrect(trx_undo_t *undo, trx_rseg_t *rseg, trx->id= undo->trx_id; trx->is_recovered= true; trx->start_time= start_time; + trx->start_time_micro= start_time_micro; if (undo->dict_operation) { @@ -702,7 +704,8 @@ trx_lists_init_at_db_start() /* Look from the rollback segments if there exist undo logs for transactions. */ - const ib_time_t start_time = ut_time(); + const time_t start_time = time(NULL); + const ulonglong start_time_micro= microsecond_interval_timer(); uint64_t rows_to_undo = 0; for (ulint i = 0; i < TRX_SYS_N_RSEGS; ++i) { @@ -721,8 +724,8 @@ trx_lists_init_at_db_start() undo = UT_LIST_GET_FIRST(rseg->old_insert_list); while (undo) { trx_undo_t* next = UT_LIST_GET_NEXT(undo_list, undo); - trx_resurrect(undo, rseg, start_time, &rows_to_undo, - true); + trx_resurrect(undo, rseg, start_time, start_time_micro, + &rows_to_undo, true); undo = next; } @@ -733,6 +736,7 @@ trx_lists_init_at_db_start() trx_t *trx = trx_sys.find(0, undo->trx_id, false); if (!trx) { trx_resurrect(undo, rseg, start_time, + start_time_micro, &rows_to_undo, false); } else { ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE) || @@ -990,14 +994,10 @@ trx_start_low( } } - if (trx->mysql_thd != NULL) { - trx->start_time = thd_start_time_in_secs(trx->mysql_thd); - trx->start_time_micro = thd_query_start_micro(trx->mysql_thd); - - } else { - trx->start_time = ut_time(); - trx->start_time_micro = 0; - } + trx->start_time = time(NULL); + trx->start_time_micro = trx->mysql_thd + ? thd_query_start_micro(trx->mysql_thd) + : microsecond_interval_timer(); ut_a(trx->error_state == DB_SUCCESS); @@ -1232,7 +1232,7 @@ trx_update_mod_tables_timestamp( { /* consider using trx->start_time if calling time() is too expensive here */ - time_t now = ut_time(); + const time_t now = time(NULL); trx_mod_tables_t::const_iterator end = trx->mod_tables.end(); diff --git a/storage/innobase/ut/ut0crc32.cc b/storage/innobase/ut/ut0crc32.cc index bbc519ad92b..5ccd15dd5ca 100644 --- a/storage/innobase/ut/ut0crc32.cc +++ b/storage/innobase/ut/ut0crc32.cc @@ -539,23 +539,6 @@ ut_crc32_init() ut_cpuid(vend, &model, &family, &stepping, &features_ecx, &features_edx); - /* Valgrind does not understand the CRC32 instructions: - - vex amd64->IR: unhandled instruction bytes: 0xF2 0x48 0xF 0x38 0xF0 0xA - valgrind: Unrecognised instruction at address 0xad3db5. - Your program just tried to execute an instruction that Valgrind - did not recognise. There are two possible reasons for this. - 1. Your program has a bug and erroneously jumped to a non-code - location. If you are running Memcheck and you just saw a - warning about a bad jump, it's probably your program's fault. - 2. The instruction is legitimate but Valgrind doesn't handle it, - i.e. it's Valgrind's fault. If you think this is the case or - you are not sure, please let us know and we'll try to fix it. - Either way, Valgrind will now raise a SIGILL signal which will - probably kill your program. - - */ - if (features_ecx & 1 << 20) { ut_crc32 = ut_crc32_hw; ut_crc32_implementation = "Using SSE2 crc32 instructions"; diff --git a/storage/innobase/ut/ut0timer.cc b/storage/innobase/ut/ut0timer.cc deleted file mode 100644 index 9aefcafebc6..00000000000 --- a/storage/innobase/ut/ut0timer.cc +++ /dev/null @@ -1,90 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2013, 2014, Facebook, Inc. All Rights Reserved. -Copyright (c) 2014, SkySQL Ab. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/********************************************************************//** -@file ut/ut0timer.cc -Timer rountines - -Created 30/07/2014 Jan Lindström jan.lindstrom@skysql.com -modified from https://github.com/facebook/mysql-5.6/commit/c75a413edeb96eb99bf11d7269bdfea06f96d6b6 -*************************************************************************/ - -#include "data0type.h" -#include <my_rdtsc.h> -#include <ut0timer.h> - -/**************************************************************//** -Initial timer definition -@return 0 */ -static -ulonglong -ut_timer_none(void) -/*===============*/ -{ - return 0; -} - -/**************************************************************//** -Function pointer to point selected timer function. -@return timer current value */ -ulonglong (*ut_timer_now)(void) = &ut_timer_none; - -struct my_timer_unit_info ut_timer; -extern MYSQL_PLUGIN_IMPORT MY_TIMER_INFO sys_timer_info; - -/**************************************************************//** -Sets up the data required for use of my_timer_* functions. -Selects the best timer by high frequency, and tight resolution. -Points my_timer_now() to the selected timer function. -Initializes my_timer struct to contain the info for selected timer.*/ -UNIV_INTERN -void -ut_init_timer(void) -/*===============*/ -{ - if (sys_timer_info.cycles.frequency > 1000000 && - sys_timer_info.cycles.resolution == 1) { - ut_timer = sys_timer_info.cycles; - ut_timer_now = &my_timer_cycles; - } else if (sys_timer_info.nanoseconds.frequency > 1000000 && - sys_timer_info.nanoseconds.resolution == 1) { - ut_timer = sys_timer_info.nanoseconds; - ut_timer_now = &my_timer_nanoseconds; - } else if (sys_timer_info.microseconds.frequency >= 1000000 && - sys_timer_info.microseconds.resolution == 1) { - ut_timer = sys_timer_info.microseconds; - ut_timer_now = &my_timer_microseconds; - - } else if (sys_timer_info.milliseconds.frequency >= 1000 && - sys_timer_info.milliseconds.resolution == 1) { - ut_timer = sys_timer_info.milliseconds; - ut_timer_now = &my_timer_milliseconds; - } else if (sys_timer_info.ticks.frequency >= 1000 && - /* Will probably be false */ - sys_timer_info.ticks.resolution == 1) { - ut_timer = sys_timer_info.ticks; - ut_timer_now = &my_timer_ticks; - } else { - /* None are acceptable, so leave it as "None", and fill in struct */ - ut_timer.frequency = 1; /* Avoid div-by-zero */ - ut_timer.overhead = 0; /* Since it doesn't do anything */ - ut_timer.resolution = 10; /* Another sign it's bad */ - ut_timer.routine = 0; /* None */ - } -} diff --git a/storage/innobase/ut/ut0ut.cc b/storage/innobase/ut/ut0ut.cc index 4265e23334b..5c19fcb6825 100644 --- a/storage/innobase/ut/ut0ut.cc +++ b/storage/innobase/ut/ut0ut.cc @@ -39,136 +39,6 @@ Created 5/11/1994 Heikki Tuuri #include "log.h" #include "my_cpu.h" -#ifdef _WIN32 -typedef VOID(WINAPI *time_fn)(LPFILETIME); -static time_fn ut_get_system_time_as_file_time = GetSystemTimeAsFileTime; - -/*****************************************************************//** -NOTE: The Windows epoch starts from 1601/01/01 whereas the Unix -epoch starts from 1970/1/1. For selection of constant see: -http://support.microsoft.com/kb/167296/ */ -#define WIN_TO_UNIX_DELTA_USEC 11644473600000000LL - - -/*****************************************************************//** -This is the Windows version of gettimeofday(2). -@return 0 if all OK else -1 */ -static -int -ut_gettimeofday( -/*============*/ - struct timeval* tv, /*!< out: Values are relative to Unix epoch */ - void* tz) /*!< in: not used */ -{ - FILETIME ft; - int64_t tm; - - if (!tv) { - errno = EINVAL; - return(-1); - } - - ut_get_system_time_as_file_time(&ft); - - tm = (int64_t) ft.dwHighDateTime << 32; - tm |= ft.dwLowDateTime; - - ut_a(tm >= 0); /* If tm wraps over to negative, the quotient / 10 - does not work */ - - tm /= 10; /* Convert from 100 nsec periods to usec */ - - /* If we don't convert to the Unix epoch the value for - struct timeval::tv_sec will overflow.*/ - tm -= WIN_TO_UNIX_DELTA_USEC; - - tv->tv_sec = (long) (tm / 1000000L); - tv->tv_usec = (long) (tm % 1000000L); - - return(0); -} -#else -/** An alias for gettimeofday(2). On Microsoft Windows, we have to -reimplement this function. */ -#define ut_gettimeofday gettimeofday -#endif - -/**********************************************************//** -Returns system time. We do not specify the format of the time returned: -the only way to manipulate it is to use the function ut_difftime. -@return system time */ -ib_time_t -ut_time(void) -/*=========*/ -{ - return(time(NULL)); -} - - -/**********************************************************//** -Returns system time. -Upon successful completion, the value 0 is returned; otherwise the -value -1 is returned and the global variable errno is set to indicate the -error. -@return 0 on success, -1 otherwise */ -int -ut_usectime( -/*========*/ - ulint* sec, /*!< out: seconds since the Epoch */ - ulint* ms) /*!< out: microseconds since the Epoch+*sec */ -{ - struct timeval tv; - int ret; - int errno_gettimeofday; - int i; - - for (i = 0; i < 10; i++) { - - ret = ut_gettimeofday(&tv, NULL); - - if (ret == -1) { - errno_gettimeofday = errno; - ib::error() << "gettimeofday(): " - << strerror(errno_gettimeofday); - os_thread_sleep(100000); /* 0.1 sec */ - errno = errno_gettimeofday; - } else { - break; - } - } - - if (ret != -1) { - *sec = (ulint) tv.tv_sec; - *ms = (ulint) tv.tv_usec; - } - - return(ret); -} - -/**********************************************************//** -Returns the number of microseconds since epoch. Similar to -time(3), the return value is also stored in *tloc, provided -that tloc is non-NULL. -@return us since epoch */ -uintmax_t -ut_time_us( -/*=======*/ - uintmax_t* tloc) /*!< out: us since epoch, if non-NULL */ -{ - struct timeval tv; - uintmax_t us; - - ut_gettimeofday(&tv, NULL); - - us = uintmax_t(tv.tv_sec) * 1000000 + uintmax_t(tv.tv_usec); - - if (tloc != NULL) { - *tloc = us; - } - - return(us); -} - /**********************************************************//** Returns the number of milliseconds since some epoch. The value may wrap around. It should only be used for heuristic @@ -178,25 +48,8 @@ ulint ut_time_ms(void) /*============*/ { - struct timeval tv; - - ut_gettimeofday(&tv, NULL); - - return(ulint(tv.tv_sec) * 1000 + ulint(tv.tv_usec / 1000)); -} - -/**********************************************************//** -Returns the difference of two times in seconds. -@return time2 - time1 expressed in seconds */ -double -ut_difftime( -/*========*/ - ib_time_t time2, /*!< in: time */ - ib_time_t time1) /*!< in: time */ -{ - return(difftime(time2, time1)); + return static_cast<ulint>(my_interval_timer() / 1000000); } - #endif /* !UNIV_INNOCHECKSUM */ /**********************************************************//** diff --git a/storage/innobase/ut/ut0wqueue.cc b/storage/innobase/ut/ut0wqueue.cc index 4697aa2fc46..026431695ed 100644 --- a/storage/innobase/ut/ut0wqueue.cc +++ b/storage/innobase/ut/ut0wqueue.cc @@ -135,7 +135,7 @@ ib_wqueue_timedwait( /*================*/ /* out: work item or NULL on timeout*/ ib_wqueue_t* wq, /* in: work queue */ - ib_time_t wait_in_usecs) /* in: wait time in micro seconds */ + ulint wait_in_usecs) /* in: wait time in micro seconds */ { ib_list_node_t* node = NULL; |