diff options
author | Sergei Golubchik <sergii@pisem.net> | 2014-08-06 20:05:10 +0200 |
---|---|---|
committer | Sergei Golubchik <sergii@pisem.net> | 2014-08-06 20:05:10 +0200 |
commit | 2023fac28130d7d3f7d6776332239c62c3890195 (patch) | |
tree | c4cab7ec55da2f87ce0e19ac9270608c04e78e4b | |
parent | a7f39aacd573bfa299a930ee8275ba3066efc33a (diff) | |
parent | 58b09cd45fb2c75d6194365730d7a3fed1829bb8 (diff) | |
download | mariadb-git-2023fac28130d7d3f7d6776332239c62c3890195.tar.gz |
innodb-5.6.19
25 files changed, 688 insertions, 242 deletions
diff --git a/storage/innobase/btr/btr0cur.cc b/storage/innobase/btr/btr0cur.cc index a165c9c47f4..34a72f360be 100644 --- a/storage/innobase/btr/btr0cur.cc +++ b/storage/innobase/btr/btr0cur.cc @@ -202,15 +202,6 @@ btr_rec_free_externally_stored_fields( mtr_t* mtr); /*!< in: mini-transaction handle which contains an X-latch to record page and to the index tree */ -/***********************************************************//** -Gets the externally stored size of a record, in units of a database page. -@return externally stored part, in units of a database page */ -static -ulint -btr_rec_get_externally_stored_len( -/*==============================*/ - const rec_t* rec, /*!< in: record */ - const ulint* offsets);/*!< in: array returned by rec_get_offsets() */ #endif /* !UNIV_HOTBACKUP */ /******************************************************//** @@ -4044,15 +4035,15 @@ btr_rec_get_field_ref_offs( #define btr_rec_get_field_ref(rec, offsets, n) \ ((rec) + btr_rec_get_field_ref_offs(offsets, n)) -/***********************************************************//** -Gets the externally stored size of a record, in units of a database page. +/** Gets the externally stored size of a record, in units of a database page. +@param[in] rec record +@param[in] offsets array returned by rec_get_offsets() @return externally stored part, in units of a database page */ -static + ulint btr_rec_get_externally_stored_len( -/*==============================*/ - const rec_t* rec, /*!< in: record */ - const ulint* offsets)/*!< in: array returned by rec_get_offsets() */ + const rec_t* rec, + const ulint* offsets) { ulint n_fields; ulint total_extern_len = 0; diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc index 3cce75abe74..fa2edb90b8e 100644 --- a/storage/innobase/buf/buf0flu.cc +++ b/storage/innobase/buf/buf0flu.cc @@ -2183,6 +2183,10 @@ af_get_pct_for_dirty() { ulint dirty_pct = buf_get_modified_ratio_pct(); + if (dirty_pct > 0 && srv_max_buf_pool_modified_pct == 0) { + return(100); + } + ut_a(srv_max_dirty_pages_pct_lwm <= srv_max_buf_pool_modified_pct); diff --git a/storage/innobase/dict/dict0dict.cc b/storage/innobase/dict/dict0dict.cc index 052e9bf2567..30266262f37 100644 --- a/storage/innobase/dict/dict0dict.cc +++ b/storage/innobase/dict/dict0dict.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1996, 2014, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2012, Facebook Inc. This program is free software; you can redistribute it and/or modify it under @@ -50,6 +50,7 @@ UNIV_INTERN dict_index_t* dict_ind_compact; #include "btr0btr.h" #include "btr0cur.h" #include "btr0sea.h" +#include "os0once.h" #include "page0zip.h" #include "page0page.h" #include "pars0pars.h" @@ -102,7 +103,7 @@ UNIV_INTERN ulong zip_pad_max = 50; UNIV_INTERN mysql_pfs_key_t dict_operation_lock_key; UNIV_INTERN mysql_pfs_key_t index_tree_rw_lock_key; UNIV_INTERN mysql_pfs_key_t index_online_log_key; -UNIV_INTERN mysql_pfs_key_t dict_table_stats_latch_key; +UNIV_INTERN mysql_pfs_key_t dict_table_stats_key; #endif /* UNIV_PFS_RWLOCK */ #ifdef UNIV_PFS_MUTEX @@ -324,6 +325,82 @@ dict_mutex_exit_for_mysql(void) mutex_exit(&(dict_sys->mutex)); } +/** Allocate and init a dict_table_t's stats latch. +This function must not be called concurrently on the same table object. +@param[in,out] table_void table whose stats latch to create */ +static +void +dict_table_stats_latch_alloc( + void* table_void) +{ + dict_table_t* table = static_cast<dict_table_t*>(table_void); + + table->stats_latch = new(std::nothrow) rw_lock_t; + + ut_a(table->stats_latch != NULL); + + rw_lock_create(dict_table_stats_key, table->stats_latch, + SYNC_INDEX_TREE); +} + +/** Deinit and free a dict_table_t's stats latch. +This function must not be called concurrently on the same table object. +@param[in,out] table table whose stats latch to free */ +static +void +dict_table_stats_latch_free( + dict_table_t* table) +{ + rw_lock_free(table->stats_latch); + delete table->stats_latch; +} + +/** Create a dict_table_t's stats latch or delay for lazy creation. +This function is only called from either single threaded environment +or from a thread that has not shared the table object with other threads. +@param[in,out] table table whose stats latch to create +@param[in] enabled if false then the latch is disabled +and dict_table_stats_lock()/unlock() become noop on this table. */ + +void +dict_table_stats_latch_create( + dict_table_t* table, + bool enabled) +{ + if (!enabled) { + table->stats_latch = NULL; + table->stats_latch_created = os_once::DONE; + return; + } + +#ifdef HAVE_ATOMIC_BUILTINS + /* We create this lazily the first time it is used. */ + table->stats_latch = NULL; + table->stats_latch_created = os_once::NEVER_DONE; +#else /* HAVE_ATOMIC_BUILTINS */ + + dict_table_stats_latch_alloc(table); + + table->stats_latch_created = os_once::DONE; +#endif /* HAVE_ATOMIC_BUILTINS */ +} + +/** Destroy a dict_table_t's stats latch. +This function is only called from either single threaded environment +or from a thread that has not shared the table object with other threads. +@param[in,out] table table whose stats latch to destroy */ + +void +dict_table_stats_latch_destroy( + dict_table_t* table) +{ + if (table->stats_latch_created == os_once::DONE + && table->stats_latch != NULL) { + + dict_table_stats_latch_free(table); + } +} + /**********************************************************************//** Lock the appropriate latch to protect a given table's statistics. */ UNIV_INTERN @@ -336,6 +413,14 @@ dict_table_stats_lock( ut_ad(table != NULL); ut_ad(table->magic_n == DICT_TABLE_MAGIC_N); +#ifdef HAVE_ATOMIC_BUILTINS + os_once::do_or_wait_for_done( + &table->stats_latch_created, + dict_table_stats_latch_alloc, table); +#else /* HAVE_ATOMIC_BUILTINS */ + ut_ad(table->stats_latch_created == os_once::DONE); +#endif /* HAVE_ATOMIC_BUILTINS */ + if (table->stats_latch == NULL) { /* This is a dummy table object that is private in the current thread and is not shared between multiple threads, thus we @@ -5195,8 +5280,6 @@ dict_table_print( index = UT_LIST_GET_NEXT(indexes, index); } - table->stat_initialized = FALSE; - dict_table_stats_unlock(table, RW_X_LATCH); foreign = UT_LIST_GET_FIRST(table->foreign_list); diff --git a/storage/innobase/dict/dict0mem.cc b/storage/innobase/dict/dict0mem.cc index 60daeea3a96..6310b2fd225 100644 --- a/storage/innobase/dict/dict0mem.cc +++ b/storage/innobase/dict/dict0mem.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1996, 2014, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2012, Facebook Inc. This program is free software; you can redistribute it and/or modify it under @@ -95,9 +95,9 @@ dict_mem_table_create( ut_d(table->magic_n = DICT_TABLE_MAGIC_N); - table->stats_latch = new rw_lock_t; - rw_lock_create(dict_table_stats_latch_key, table->stats_latch, - SYNC_INDEX_TREE); + /* true means that the stats latch will be enabled - + dict_table_stats_lock() will not be noop. */ + dict_table_stats_latch_create(table, true); #ifndef UNIV_HOTBACKUP table->autoinc_lock = static_cast<ib_lock_t*>( @@ -154,8 +154,7 @@ dict_mem_table_free( mutex_free(&(table->autoinc_mutex)); #endif /* UNIV_HOTBACKUP */ - rw_lock_free(table->stats_latch); - delete table->stats_latch; + dict_table_stats_latch_destroy(table); ut_free(table->name); mem_heap_free(table->heap); diff --git a/storage/innobase/dict/dict0stats.cc b/storage/innobase/dict/dict0stats.cc index aa417bbae7b..1eac9e0df51 100644 --- a/storage/innobase/dict/dict0stats.cc +++ b/storage/innobase/dict/dict0stats.cc @@ -46,6 +46,7 @@ Created Jan 06, 2010 Vasil Dimov #include "ut0rnd.h" /* ut_rnd_interval() */ #include "ut0ut.h" /* ut_format_name(), ut_time() */ +#include <algorithm> #include <map> #include <vector> @@ -127,10 +128,11 @@ where n=1..n_uniq. #endif /* UNIV_STATS_DEBUG */ /* Gets the number of leaf pages to sample in persistent stats estimation */ -#define N_SAMPLE_PAGES(index) \ - ((index)->table->stats_sample_pages != 0 ? \ - (index)->table->stats_sample_pages : \ - srv_stats_persistent_sample_pages) +#define N_SAMPLE_PAGES(index) \ + static_cast<ib_uint64_t>( \ + (index)->table->stats_sample_pages != 0 \ + ? (index)->table->stats_sample_pages \ + : srv_stats_persistent_sample_pages) /* number of distinct records on a given level that are required to stop descending to lower levels and fetch N_SAMPLE_PAGES(index) records @@ -432,9 +434,9 @@ dict_stats_table_clone_create( t->corrupted = table->corrupted; /* This private object "t" is not shared with other threads, so - we do not need the stats_latch. The lock/unlock routines will do - nothing if stats_latch is NULL. */ - t->stats_latch = NULL; + we do not need the stats_latch (thus we pass false below). The + dict_table_stats_lock()/unlock() routines will do nothing. */ + dict_table_stats_latch_create(t, false); UT_LIST_INIT(t->indexes); @@ -510,6 +512,7 @@ dict_stats_table_clone_free( /*========================*/ dict_table_t* t) /*!< in: dummy table object to free */ { + dict_table_stats_latch_destroy(t); mem_heap_free(t->heap); } @@ -1285,35 +1288,40 @@ enum page_scan_method_t { }; /* @} */ -/*********************************************************************//** -Scan a page, reading records from left to right and counting the number -of distinct records on that page (looking only at the first n_prefix -columns). If scan_method is QUIT_ON_FIRST_NON_BORING then the function +/** Scan a page, reading records from left to right and counting the number +of distinct records (looking only at the first n_prefix +columns) and the number of external pages pointed by records from this page. +If scan_method is QUIT_ON_FIRST_NON_BORING then the function will return as soon as it finds a record that does not match its neighbor to the right, which means that in the case of QUIT_ON_FIRST_NON_BORING the returned n_diff can either be 0 (empty page), 1 (the whole page has all keys equal) or 2 (the function found a non-boring record and returned). +@param[out] out_rec record, or NULL +@param[out] offsets1 rec_get_offsets() working space (must +be big enough) +@param[out] offsets2 rec_get_offsets() working space (must +be big enough) +@param[in] index index of the page +@param[in] page the page to scan +@param[in] n_prefix look at the first n_prefix columns +@param[in] scan_method scan to the end of the page or not +@param[out] n_diff number of distinct records encountered +@param[out] n_external_pages if this is non-NULL then it will be set +to the number of externally stored pages which were encountered @return offsets1 or offsets2 (the offsets of *out_rec), or NULL if the page is empty and does not contain user records. */ -UNIV_INLINE __attribute__((nonnull)) +UNIV_INLINE ulint* dict_stats_scan_page( -/*=================*/ - const rec_t** out_rec, /*!< out: record, or NULL */ - ulint* offsets1, /*!< out: rec_get_offsets() - working space (must be big - enough) */ - ulint* offsets2, /*!< out: rec_get_offsets() - working space (must be big - enough) */ - dict_index_t* index, /*!< in: index of the page */ - const page_t* page, /*!< in: the page to scan */ - ulint n_prefix, /*!< in: look at the first - n_prefix columns */ - page_scan_method_t scan_method, /*!< in: scan to the end of - the page or not */ - ib_uint64_t* n_diff) /*!< out: number of distinct - records encountered */ + const rec_t** out_rec, + ulint* offsets1, + ulint* offsets2, + dict_index_t* index, + const page_t* page, + ulint n_prefix, + page_scan_method_t scan_method, + ib_uint64_t* n_diff, + ib_uint64_t* n_external_pages) { ulint* offsets_rec = offsets1; ulint* offsets_next_rec = offsets2; @@ -1331,6 +1339,12 @@ dict_stats_scan_page( get_next = page_rec_get_next_const; } + const bool should_count_external_pages = n_external_pages != NULL; + + if (should_count_external_pages) { + *n_external_pages = 0; + } + rec = get_next(page_get_infimum_rec(page)); if (page_rec_is_supremum(rec)) { @@ -1343,6 +1357,11 @@ dict_stats_scan_page( offsets_rec = rec_get_offsets(rec, index, offsets_rec, ULINT_UNDEFINED, &heap); + if (should_count_external_pages) { + *n_external_pages += btr_rec_get_externally_stored_len( + rec, offsets_rec); + } + next_rec = get_next(rec); *n_diff = 1; @@ -1393,6 +1412,11 @@ dict_stats_scan_page( offsets_next_rec = offsets_tmp; } + if (should_count_external_pages) { + *n_external_pages += btr_rec_get_externally_stored_len( + rec, offsets_rec); + } + next_rec = get_next(next_rec); } @@ -1403,19 +1427,25 @@ func_exit: return(offsets_rec); } -/*********************************************************************//** -Dive below the current position of a cursor and calculate the number of +/** Dive below the current position of a cursor and calculate the number of distinct records on the leaf page, when looking at the fist n_prefix -columns. +columns. Also calculate the number of external pages pointed by records +on the leaf page. +@param[in] cur cursor +@param[in] n_prefix look at the first n_prefix columns +when comparing records +@param[out] n_diff number of distinct records +@param[out] n_external_pages number of external pages +@param[in,out] mtr mini-transaction @return number of distinct records on the leaf page */ static -ib_uint64_t +void dict_stats_analyze_index_below_cur( -/*===============================*/ - const btr_cur_t*cur, /*!< in: cursor */ - ulint n_prefix, /*!< in: look at the first n_prefix - columns when comparing records */ - mtr_t* mtr) /*!< in/out: mini-transaction */ + const btr_cur_t* cur, + ulint n_prefix, + ib_uint64_t* n_diff, + ib_uint64_t* n_external_pages, + mtr_t* mtr) { dict_index_t* index; ulint space; @@ -1428,7 +1458,6 @@ dict_stats_analyze_index_below_cur( ulint* offsets1; ulint* offsets2; ulint* offsets_rec; - ib_uint64_t n_diff; /* the result */ ulint size; index = btr_cur_get_index(cur); @@ -1464,6 +1493,10 @@ dict_stats_analyze_index_below_cur( page_no = btr_node_ptr_get_child_page_no(rec, offsets_rec); + /* assume no external pages by default - in case we quit from this + function without analyzing any leaf pages */ + *n_external_pages = 0; + /* descend to the leaf level on the B-tree */ for (;;) { @@ -1482,20 +1515,24 @@ dict_stats_analyze_index_below_cur( /* search for the first non-boring record on the page */ offsets_rec = dict_stats_scan_page( &rec, offsets1, offsets2, index, page, n_prefix, - QUIT_ON_FIRST_NON_BORING, &n_diff); + QUIT_ON_FIRST_NON_BORING, n_diff, NULL); /* pages on level > 0 are not allowed to be empty */ ut_a(offsets_rec != NULL); /* if page is not empty (offsets_rec != NULL) then n_diff must be > 0, otherwise there is a bug in dict_stats_scan_page() */ - ut_a(n_diff > 0); + ut_a(*n_diff > 0); - if (n_diff == 1) { + if (*n_diff == 1) { /* page has all keys equal and the end of the page was reached by dict_stats_scan_page(), no need to descend to the leaf level */ mem_heap_free(heap); - return(1); + /* can't get an estimate for n_external_pages here + because we do not dive to the leaf level, assume no + external pages (*n_external_pages was assigned to 0 + above). */ + return; } /* else */ @@ -1503,7 +1540,7 @@ dict_stats_analyze_index_below_cur( first non-boring record it finds, then the returned n_diff can either be 0 (empty page), 1 (page has all keys equal) or 2 (non-boring record was found) */ - ut_a(n_diff == 2); + ut_a(*n_diff == 2); /* we have a non-boring record in rec, descend below it */ @@ -1514,11 +1551,14 @@ dict_stats_analyze_index_below_cur( ut_ad(btr_page_get_level(page, mtr) == 0); /* scan the leaf page and find the number of distinct keys, - when looking only at the first n_prefix columns */ + when looking only at the first n_prefix columns; also estimate + the number of externally stored pages pointed by records on this + page */ offsets_rec = dict_stats_scan_page( &rec, offsets1, offsets2, index, page, n_prefix, - COUNT_ALL_NON_BORING_AND_SKIP_DEL_MARKED, &n_diff); + COUNT_ALL_NON_BORING_AND_SKIP_DEL_MARKED, n_diff, + n_external_pages); #if 0 DEBUG_PRINTF(" %s(): n_diff below page_no=%lu: " UINT64PF "\n", @@ -1526,133 +1566,146 @@ dict_stats_analyze_index_below_cur( #endif mem_heap_free(heap); - - return(n_diff); } -/*********************************************************************//** -For a given level in an index select N_SAMPLE_PAGES(index) -(or less) records from that level and dive below them to the corresponding -leaf pages, then scan those leaf pages and save the sampling results in -index->stat_n_diff_key_vals[n_prefix - 1] and the number of pages scanned in -index->stat_n_sample_sizes[n_prefix - 1]. */ +/** Input data that is used to calculate dict_index_t::stat_n_diff_key_vals[] +for each n-columns prefix (n from 1 to n_uniq). */ +struct n_diff_data_t { + /** Index of the level on which the descent through the btree + stopped. level 0 is the leaf level. This is >= 1 because we + avoid scanning the leaf level because it may contain too many + pages and doing so is useless when combined with the random dives - + if we are to scan the leaf level, this means a full scan and we can + simply do that instead of fiddling with picking random records higher + in the tree and to dive below them. At the start of the analyzing + we may decide to do full scan of the leaf level, but then this + structure is not used in that code path. */ + ulint level; + + /** Number of records on the level where the descend through the btree + stopped. When we scan the btree from the root, we stop at some mid + level, choose some records from it and dive below them towards a leaf + page to analyze. */ + ib_uint64_t n_recs_on_level; + + /** Number of different key values that were found on the mid level. */ + ib_uint64_t n_diff_on_level; + + /** Number of leaf pages that are analyzed. This is also the same as + the number of records that we pick from the mid level and dive below + them. */ + ib_uint64_t n_leaf_pages_to_analyze; + + /** Cumulative sum of the number of different key values that were + found on all analyzed pages. */ + ib_uint64_t n_diff_all_analyzed_pages; + + /** Cumulative sum of the number of external pages (stored outside of + the btree but in the same file segment). */ + ib_uint64_t n_external_pages_sum; +}; + +/** Estimate the number of different key values in an index when looking at +the first n_prefix columns. For a given level in an index select +n_diff_data->n_leaf_pages_to_analyze records from that level and dive below +them to the corresponding leaf pages, then scan those leaf pages and save the +sampling results in n_diff_data->n_diff_all_analyzed_pages. +@param[in] index index +@param[in] n_prefix look at first 'n_prefix' columns when +comparing records +@param[in] boundaries a vector that contains +n_diff_data->n_diff_on_level integers each of which represents the index (on +level 'level', counting from left/smallest to right/biggest from 0) of the +last record from each group of distinct keys +@param[in,out] n_diff_data n_diff_all_analyzed_pages and +n_external_pages_sum in this structure will be set by this function. The +members level, n_diff_on_level and n_leaf_pages_to_analyze must be set by the +caller in advance - they are used by some calculations inside this function +@param[in,out] mtr mini-transaction */ static void dict_stats_analyze_index_for_n_prefix( -/*==================================*/ - dict_index_t* index, /*!< in/out: index */ - ulint level, /*!< in: level, must be >= 1 */ - ib_uint64_t total_recs_on_level, - /*!< in: total number of - records on the given level */ - ulint n_prefix, /*!< in: look at first - n_prefix columns when - comparing records */ - ib_uint64_t n_diff_for_this_prefix, - /*!< in: number of distinct - records on the given level, - when looking at the first - n_prefix columns */ - boundaries_t* boundaries, /*!< in: array that contains - n_diff_for_this_prefix - integers each of which - represents the index (on the - level, counting from - left/smallest to right/biggest - from 0) of the last record - from each group of distinct - keys */ - mtr_t* mtr) /*!< in/out: mini-transaction */ + dict_index_t* index, + ulint n_prefix, + const boundaries_t* boundaries, + n_diff_data_t* n_diff_data, + mtr_t* mtr) { btr_pcur_t pcur; const page_t* page; ib_uint64_t rec_idx; - ib_uint64_t last_idx_on_level; - ib_uint64_t n_recs_to_dive_below; - ib_uint64_t n_diff_sum_of_all_analyzed_pages; ib_uint64_t i; #if 0 DEBUG_PRINTF(" %s(table=%s, index=%s, level=%lu, n_prefix=%lu, " - "n_diff_for_this_prefix=" UINT64PF ")\n", + "n_diff_on_level=" UINT64PF ")\n", __func__, index->table->name, index->name, level, - n_prefix, n_diff_for_this_prefix); + n_prefix, n_diff_data->n_diff_on_level); #endif ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index), MTR_MEMO_S_LOCK)); - /* if some of those is 0 then this means that there is exactly one - page in the B-tree and it is empty and we should have done full scan - and should not be here */ - ut_ad(total_recs_on_level > 0); - ut_ad(n_diff_for_this_prefix > 0); - - /* this must be at least 1 */ - ut_ad(N_SAMPLE_PAGES(index) > 0); - /* Position pcur on the leftmost record on the leftmost page on the desired level. */ btr_pcur_open_at_index_side( true, index, BTR_SEARCH_LEAF | BTR_ALREADY_S_LATCHED, - &pcur, true, level, mtr); + &pcur, true, n_diff_data->level, mtr); btr_pcur_move_to_next_on_page(&pcur); page = btr_pcur_get_page(&pcur); + const rec_t* first_rec = btr_pcur_get_rec(&pcur); + + /* We shouldn't be scanning the leaf level. The caller of this function + should have stopped the descend on level 1 or higher. */ + ut_ad(n_diff_data->level > 0); + ut_ad(!page_is_leaf(page)); + /* The page must not be empty, except when it is the root page (and the whole index is empty). */ - ut_ad(btr_pcur_is_on_user_rec(&pcur) || page_is_leaf(page)); - ut_ad(btr_pcur_get_rec(&pcur) - == page_rec_get_next_const(page_get_infimum_rec(page))); + ut_ad(btr_pcur_is_on_user_rec(&pcur)); + ut_ad(first_rec == page_rec_get_next_const(page_get_infimum_rec(page))); /* check that we are indeed on the desired level */ - ut_a(btr_page_get_level(page, mtr) == level); + ut_a(btr_page_get_level(page, mtr) == n_diff_data->level); /* there should not be any pages on the left */ ut_a(btr_page_get_prev(page, mtr) == FIL_NULL); /* check whether the first record on the leftmost page is marked - as such, if we are on a non-leaf level */ - ut_a((level == 0) - == !(REC_INFO_MIN_REC_FLAG & rec_get_info_bits( - btr_pcur_get_rec(&pcur), page_is_comp(page)))); + as such; we are on a non-leaf level */ + ut_a(rec_get_info_bits(first_rec, page_is_comp(page)) + & REC_INFO_MIN_REC_FLAG); - last_idx_on_level = boundaries->at( - static_cast<unsigned int>(n_diff_for_this_prefix - 1)); + const ib_uint64_t last_idx_on_level = boundaries->at( + static_cast<unsigned>(n_diff_data->n_diff_on_level - 1)); rec_idx = 0; - n_diff_sum_of_all_analyzed_pages = 0; + n_diff_data->n_diff_all_analyzed_pages = 0; + n_diff_data->n_external_pages_sum = 0; - n_recs_to_dive_below = ut_min(N_SAMPLE_PAGES(index), - n_diff_for_this_prefix); - - for (i = 0; i < n_recs_to_dive_below; i++) { - ib_uint64_t left; - ib_uint64_t right; - ib_uint64_t rnd; - ib_uint64_t dive_below_idx; - - /* there are n_diff_for_this_prefix elements + for (i = 0; i < n_diff_data->n_leaf_pages_to_analyze; i++) { + /* there are n_diff_on_level elements in 'boundaries' and we divide those elements - into n_recs_to_dive_below segments, for example: + into n_leaf_pages_to_analyze segments, for example: - let n_diff_for_this_prefix=100, n_recs_to_dive_below=4, then: + let n_diff_on_level=100, n_leaf_pages_to_analyze=4, then: segment i=0: [0, 24] segment i=1: [25, 49] segment i=2: [50, 74] segment i=3: [75, 99] or - let n_diff_for_this_prefix=1, n_recs_to_dive_below=1, then: + let n_diff_on_level=1, n_leaf_pages_to_analyze=1, then: segment i=0: [0, 0] or - let n_diff_for_this_prefix=2, n_recs_to_dive_below=2, then: + let n_diff_on_level=2, n_leaf_pages_to_analyze=2, then: segment i=0: [0, 0] segment i=1: [1, 1] or - let n_diff_for_this_prefix=13, n_recs_to_dive_below=7, then: + let n_diff_on_level=13, n_leaf_pages_to_analyze=7, then: segment i=0: [0, 0] segment i=1: [1, 2] segment i=2: [3, 4] @@ -1663,9 +1716,12 @@ dict_stats_analyze_index_for_n_prefix( then we select a random record from each segment and dive below it */ - left = n_diff_for_this_prefix * i / n_recs_to_dive_below; - right = n_diff_for_this_prefix * (i + 1) - / n_recs_to_dive_below - 1; + const ib_uint64_t n_diff = n_diff_data->n_diff_on_level; + const ib_uint64_t n_pick + = n_diff_data->n_leaf_pages_to_analyze; + + const ib_uint64_t left = n_diff * i / n_pick; + const ib_uint64_t right = n_diff * (i + 1) / n_pick - 1; ut_a(left <= right); ut_a(right <= last_idx_on_level); @@ -1673,11 +1729,11 @@ dict_stats_analyze_index_for_n_prefix( /* we do not pass (left, right) because we do not want to ask ut_rnd_interval() to work with too big numbers since ib_uint64_t could be bigger than ulint */ - rnd = static_cast<ib_uint64_t>( - ut_rnd_interval(0, static_cast<ulint>(right - left))); + const ulint rnd = ut_rnd_interval( + 0, static_cast<ulint>(right - left)); - dive_below_idx = boundaries->at( - static_cast<unsigned int>(left + rnd)); + const ib_uint64_t dive_below_idx + = boundaries->at(static_cast<unsigned>(left + rnd)); #if 0 DEBUG_PRINTF(" %s(): dive below record with index=" @@ -1713,9 +1769,13 @@ dict_stats_analyze_index_for_n_prefix( ut_a(rec_idx == dive_below_idx); ib_uint64_t n_diff_on_leaf_page; + ib_uint64_t n_external_pages; - n_diff_on_leaf_page = dict_stats_analyze_index_below_cur( - btr_pcur_get_btr_cur(&pcur), n_prefix, mtr); + dict_stats_analyze_index_below_cur(btr_pcur_get_btr_cur(&pcur), + n_prefix, + &n_diff_on_leaf_page, + &n_external_pages, + mtr); /* We adjust n_diff_on_leaf_page here to avoid counting one record twice - once as the last on some page and once @@ -1735,37 +1795,86 @@ dict_stats_analyze_index_for_n_prefix( n_diff_on_leaf_page--; } - n_diff_sum_of_all_analyzed_pages += n_diff_on_leaf_page; - } - - /* n_diff_sum_of_all_analyzed_pages can be 0 here if all the leaf - pages sampled contained only delete-marked records. In this case - we should assign 0 to index->stat_n_diff_key_vals[n_prefix - 1], which - the formula below does. */ - - /* See REF01 for an explanation of the algorithm */ - index->stat_n_diff_key_vals[n_prefix - 1] - = index->stat_n_leaf_pages - - * n_diff_for_this_prefix - / total_recs_on_level + n_diff_data->n_diff_all_analyzed_pages += n_diff_on_leaf_page; - * n_diff_sum_of_all_analyzed_pages - / n_recs_to_dive_below; + n_diff_data->n_external_pages_sum += n_external_pages; + } - index->stat_n_sample_sizes[n_prefix - 1] = n_recs_to_dive_below; + btr_pcur_close(&pcur); +} - DEBUG_PRINTF(" %s(): n_diff=" UINT64PF " for n_prefix=%lu " - "(%lu" - " * " UINT64PF " / " UINT64PF - " * " UINT64PF " / " UINT64PF ")\n", - __func__, index->stat_n_diff_key_vals[n_prefix - 1], - n_prefix, - index->stat_n_leaf_pages, - n_diff_for_this_prefix, total_recs_on_level, - n_diff_sum_of_all_analyzed_pages, n_recs_to_dive_below); +/** Set dict_index_t::stat_n_diff_key_vals[] and stat_n_sample_sizes[]. +@param[in] n_diff_data input data to use to derive the results +@param[in,out] index index whose stat_n_diff_key_vals[] to set */ +UNIV_INLINE +void +dict_stats_index_set_n_diff( + const n_diff_data_t* n_diff_data, + dict_index_t* index) +{ + for (ulint n_prefix = dict_index_get_n_unique(index); + n_prefix >= 1; + n_prefix--) { + /* n_diff_all_analyzed_pages can be 0 here if + all the leaf pages sampled contained only + delete-marked records. In this case we should assign + 0 to index->stat_n_diff_key_vals[n_prefix - 1], which + the formula below does. */ + + const n_diff_data_t* data = &n_diff_data[n_prefix - 1]; + + ut_ad(data->n_leaf_pages_to_analyze > 0); + ut_ad(data->n_recs_on_level > 0); + + ulint n_ordinary_leaf_pages; + + if (data->level == 1) { + /* If we know the number of records on level 1, then + this number is the same as the number of pages on + level 0 (leaf). */ + n_ordinary_leaf_pages = data->n_recs_on_level; + } else { + /* If we analyzed D ordinary leaf pages and found E + external pages in total linked from those D ordinary + leaf pages, then this means that the ratio + ordinary/external is D/E. Then the ratio ordinary/total + is D / (D + E). Knowing that the total number of pages + is T (including ordinary and external) then we estimate + that the total number of ordinary leaf pages is + T * D / (D + E). */ + n_ordinary_leaf_pages + = index->stat_n_leaf_pages + * data->n_leaf_pages_to_analyze + / (data->n_leaf_pages_to_analyze + + data->n_external_pages_sum); + } - btr_pcur_close(&pcur); + /* See REF01 for an explanation of the algorithm */ + index->stat_n_diff_key_vals[n_prefix - 1] + = n_ordinary_leaf_pages + + * data->n_diff_on_level + / data->n_recs_on_level + + * data->n_diff_all_analyzed_pages + / data->n_leaf_pages_to_analyze; + + index->stat_n_sample_sizes[n_prefix - 1] + = data->n_leaf_pages_to_analyze; + + DEBUG_PRINTF(" %s(): n_diff=" UINT64PF " for n_prefix=%lu" + " (%lu" + " * " UINT64PF " / " UINT64PF + " * " UINT64PF " / " UINT64PF ")\n", + __func__, + index->stat_n_diff_key_vals[n_prefix - 1], + n_prefix, + index->stat_n_leaf_pages, + data->n_diff_on_level, + data->n_recs_on_level, + data->n_diff_all_analyzed_pages, + data->n_leaf_pages_to_analyze); + } } /*********************************************************************//** @@ -1783,10 +1892,8 @@ dict_stats_analyze_index( bool level_is_analyzed; ulint n_uniq; ulint n_prefix; - ib_uint64_t* n_diff_on_level; ib_uint64_t total_recs; ib_uint64_t total_pages; - boundaries_t* n_diff_boundaries; mtr_t mtr; ulint size; DBUG_ENTER("dict_stats_analyze_index"); @@ -1872,11 +1979,18 @@ dict_stats_analyze_index( DBUG_VOID_RETURN; } - /* set to zero */ - n_diff_on_level = reinterpret_cast<ib_uint64_t*> - (mem_zalloc(n_uniq * sizeof(ib_uint64_t))); + /* For each level that is being scanned in the btree, this contains the + number of different key values for all possible n-column prefixes. */ + ib_uint64_t* n_diff_on_level = new ib_uint64_t[n_uniq]; + + /* For each level that is being scanned in the btree, this contains the + index of the last record from each group of equal records (when + comparing only the first n columns, n=1..n_uniq). */ + boundaries_t* n_diff_boundaries = new boundaries_t[n_uniq]; - n_diff_boundaries = new boundaries_t[n_uniq]; + /* For each n-column prefix this array contains the input data that is + used to calculate dict_index_t::stat_n_diff_key_vals[]. */ + n_diff_data_t* n_diff_data = new n_diff_data_t[n_uniq]; /* total_recs is also used to estimate the number of pages on one level below, so at the start we have 1 page (the root) */ @@ -1988,12 +2102,12 @@ dict_stats_analyze_index( level_is_analyzed = true; - if (n_diff_on_level[n_prefix - 1] - >= N_DIFF_REQUIRED(index) - || level == 1) { - /* we found a good level with many distinct - records or we have reached the last level we - could scan */ + if (level == 1 + || n_diff_on_level[n_prefix - 1] + >= N_DIFF_REQUIRED(index)) { + /* we have reached the last level we could scan + or we found a good level with many distinct + records */ break; } @@ -2006,7 +2120,6 @@ found_level: " distinct records for n_prefix=%lu\n", __func__, level, n_diff_on_level[n_prefix - 1], n_prefix); - /* here we are either on level 1 or the level that we are on contains >= N_DIFF_REQUIRED distinct keys or we did not scan deeper levels because they would contain too many pages */ @@ -2015,20 +2128,47 @@ found_level: ut_ad(level_is_analyzed); + /* if any of these is 0 then there is exactly one page in the + B-tree and it is empty and we should have done full scan and + should not be here */ + ut_ad(total_recs > 0); + ut_ad(n_diff_on_level[n_prefix - 1] > 0); + + ut_ad(N_SAMPLE_PAGES(index) > 0); + + n_diff_data_t* data = &n_diff_data[n_prefix - 1]; + + data->level = level; + + data->n_recs_on_level = total_recs; + + data->n_diff_on_level = n_diff_on_level[n_prefix - 1]; + + data->n_leaf_pages_to_analyze = std::min( + N_SAMPLE_PAGES(index), + n_diff_on_level[n_prefix - 1]); + /* pick some records from this level and dive below them for the given n_prefix */ dict_stats_analyze_index_for_n_prefix( - index, level, total_recs, n_prefix, - n_diff_on_level[n_prefix - 1], - &n_diff_boundaries[n_prefix - 1], &mtr); + index, n_prefix, &n_diff_boundaries[n_prefix - 1], + data, &mtr); } mtr_commit(&mtr); delete[] n_diff_boundaries; - mem_free(n_diff_on_level); + delete[] n_diff_on_level; + + /* n_prefix == 0 means that the above loop did not end up prematurely + due to tree being changed and so n_diff_data[] is set up. */ + if (n_prefix == 0) { + dict_stats_index_set_n_diff(n_diff_data, index); + } + + delete[] n_diff_data; dict_stats_assert_initialized_index(index); DBUG_VOID_RETURN; diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc index fb975c64f11..f4e5721caa7 100644 --- a/storage/innobase/fil/fil0fil.cc +++ b/storage/innobase/fil/fil0fil.cc @@ -758,7 +758,7 @@ fil_node_open_file( fprintf(stderr, "InnoDB: Error: the size of single-table" " tablespace file %s\n" - "InnoDB: is only "UINT64PF"," + "InnoDB: is only " UINT64PF "," " should be at least %lu!\n", node->name, size_bytes, diff --git a/storage/innobase/fts/fts0fts.cc b/storage/innobase/fts/fts0fts.cc index 4a667686795..f503cc487b7 100644 --- a/storage/innobase/fts/fts0fts.cc +++ b/storage/innobase/fts/fts0fts.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2011, 2013, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2011, 2014, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -44,6 +44,13 @@ Full Text Search interface /** Column name from the FTS config table */ #define FTS_MAX_CACHE_SIZE_IN_MB "cache_size_in_mb" +/** Verify if a aux table name is a obsolete table +by looking up the key word in the obsolete table names */ +#define FTS_IS_OBSOLETE_AUX_TABLE(table_name) \ + (strstr((table_name), "DOC_ID") != NULL \ + || strstr((table_name), "ADDED") != NULL \ + || strstr((table_name), "STOPWORDS") != NULL) + /** This is maximum FTS cache for each table and would be a configurable variable */ UNIV_INTERN ulong fts_max_cache_size; @@ -5837,6 +5844,12 @@ fts_is_aux_table_name( } } + /* Could be obsolete common tables. */ + if (strncmp(ptr, "ADDED", len) == 0 + || strncmp(ptr, "STOPWORDS", len) == 0) { + return(true); + } + /* Try and read the index id. */ if (!fts_read_object_id(&table->index_id, ptr)) { return(FALSE); @@ -6433,6 +6446,56 @@ fts_check_and_drop_orphaned_tables( mem_free(path); } + } else { + if (FTS_IS_OBSOLETE_AUX_TABLE(aux_table->name)) { + + /* Current table could be one of the three + obsolete tables, in this case, we should + always try to drop it but not rename it. + This could happen when we try to upgrade + from older server to later one, which doesn't + contain these obsolete tables. */ + drop = true; + + dberr_t err; + trx_t* trx_drop = + trx_allocate_for_background(); + + trx_drop->op_info = "Drop obsolete aux tables"; + trx_drop->dict_operation_lock_mode = RW_X_LATCH; + + trx_start_for_ddl(trx_drop, TRX_DICT_OP_TABLE); + + err = row_drop_table_for_mysql( + aux_table->name, trx_drop, false, true); + + trx_drop->dict_operation_lock_mode = 0; + + if (err != DB_SUCCESS) { + /* We don't need to worry about the + failure, since server would try to + drop it on next restart, even if + the table was broken. */ + + ib_logf(IB_LOG_LEVEL_WARN, + "Fail to drop obsolete aux" + " table '%s', which is" + " harmless. will try to drop" + " it on next restart.", + aux_table->name); + + fts_sql_rollback(trx_drop); + } else { + ib_logf(IB_LOG_LEVEL_INFO, + "Dropped obsolete aux" + " table '%s'.", + aux_table->name); + + fts_sql_commit(trx_drop); + } + + trx_free_for_background(trx_drop); + } } #ifdef _WIN32 if (!drop && rename) { diff --git a/storage/innobase/fts/fts0opt.cc b/storage/innobase/fts/fts0opt.cc index a9f3a25530d..910a00cd521 100644 --- a/storage/innobase/fts/fts0opt.cc +++ b/storage/innobase/fts/fts0opt.cc @@ -95,7 +95,7 @@ enum fts_msg_type_t { /** Compressed list of words that have been read from FTS INDEX that needs to be optimized. */ struct fts_zip_t { - ulint status; /*!< Status of (un)/zip operation */ + lint status; /*!< Status of (un)/zip operation */ ulint n_words; /*!< Number of words compressed */ diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 2d15eaba296..19137dabe24 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -394,7 +394,7 @@ static PSI_rwlock_info all_innodb_rwlocks[] = { {&trx_purge_latch_key, "trx_purge_latch", 0}, {&index_tree_rw_lock_key, "index_tree_rw_lock", 0}, {&index_online_log_key, "index_online_log", 0}, - {&dict_table_stats_latch_key, "dict_table_stats", 0}, + {&dict_table_stats_key, "dict_table_stats", 0}, {&hash_table_rw_lock_key, "hash_table_locks", 0} }; # endif /* UNIV_PFS_RWLOCK */ @@ -4170,7 +4170,7 @@ innobase_close_connection( sql_print_warning( "MySQL is closing a connection that has an active " - "InnoDB transaction. "TRX_ID_FMT" row modifications " + "InnoDB transaction. " TRX_ID_FMT " row modifications " "will roll back.", trx->undo_no); } @@ -7321,7 +7321,7 @@ calc_row_difference( if (doc_id < prebuilt->table->fts->cache->next_doc_id) { fprintf(stderr, "InnoDB: FTS Doc ID must be larger than" - " "IB_ID_FMT" for table", + " " IB_ID_FMT " for table", innodb_table->fts->cache->next_doc_id - 1); ut_print_name(stderr, trx, @@ -7333,9 +7333,9 @@ calc_row_difference( - prebuilt->table->fts->cache->next_doc_id) >= FTS_DOC_ID_MAX_STEP) { fprintf(stderr, - "InnoDB: Doc ID "UINT64PF" is too" + "InnoDB: Doc ID " UINT64PF " is too" " big. Its difference with largest" - " Doc ID used "UINT64PF" cannot" + " Doc ID used " UINT64PF " cannot" " exceed or equal to %d\n", doc_id, prebuilt->table->fts->cache->next_doc_id - 1, diff --git a/storage/innobase/include/btr0cur.h b/storage/innobase/include/btr0cur.h index 833166e783c..f1e4406fcf7 100644 --- a/storage/innobase/include/btr0cur.h +++ b/storage/innobase/include/btr0cur.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1994, 2013, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1994, 2014, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -576,6 +576,17 @@ void btr_estimate_number_of_different_key_vals( /*======================================*/ dict_index_t* index); /*!< in: index */ + +/** Gets the externally stored size of a record, in units of a database page. +@param[in] rec record +@param[in] offsets array returned by rec_get_offsets() +@return externally stored part, in units of a database page */ + +ulint +btr_rec_get_externally_stored_len( + const rec_t* rec, + const ulint* offsets); + /*******************************************************************//** Marks non-updated off-page fields as disowned by this record. The ownership must be transferred to the updated record which is inserted elsewhere in the diff --git a/storage/innobase/include/dict0dict.h b/storage/innobase/include/dict0dict.h index f43ba11e74d..026187b2000 100644 --- a/storage/innobase/include/dict0dict.h +++ b/storage/innobase/include/dict0dict.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1996, 2014, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2012, Facebook Inc. This program is free software; you can redistribute it and/or modify it under @@ -1438,6 +1438,28 @@ UNIV_INTERN void dict_mutex_exit_for_mysql(void); /*===========================*/ + +/** Create a dict_table_t's stats latch or delay for lazy creation. +This function is only called from either single threaded environment +or from a thread that has not shared the table object with other threads. +@param[in,out] table table whose stats latch to create +@param[in] enabled if false then the latch is disabled +and dict_table_stats_lock()/unlock() become noop on this table. */ + +void +dict_table_stats_latch_create( + dict_table_t* table, + bool enabled); + +/** Destroy a dict_table_t's stats latch. +This function is only called from either single threaded environment +or from a thread that has not shared the table object with other threads. +@param[in,out] table table whose stats latch to destroy */ + +void +dict_table_stats_latch_destroy( + dict_table_t* table); + /**********************************************************************//** Lock the appropriate latch to protect a given table's statistics. table->id is used to pick the corresponding latch from a global array of diff --git a/storage/innobase/include/dict0mem.h b/storage/innobase/include/dict0mem.h index 58de2c2b3f0..f1975456ab1 100644 --- a/storage/innobase/include/dict0mem.h +++ b/storage/innobase/include/dict0mem.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1996, 2014, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2012, Facebook Inc. This program is free software; you can redistribute it and/or modify it under @@ -46,6 +46,7 @@ Created 1/8/1996 Heikki Tuuri #include "hash0hash.h" #include "trx0types.h" #include "fts0fts.h" +#include "os0once.h" /* Forward declaration. */ struct ib_rbt_t; @@ -842,6 +843,10 @@ struct dict_table_t{ initialized in dict_table_add_to_cache() */ /** Statistics for query optimization */ /* @{ */ + + volatile os_once::state_t stats_latch_created; + /*!< Creation state of 'stats_latch'. */ + rw_lock_t* stats_latch; /*!< this latch protects: dict_table_t::stat_initialized dict_table_t::stat_n_rows (*) diff --git a/storage/innobase/include/os0once.h b/storage/innobase/include/os0once.h new file mode 100644 index 00000000000..a8bbaf1d2d4 --- /dev/null +++ b/storage/innobase/include/os0once.h @@ -0,0 +1,125 @@ +/***************************************************************************** + +Copyright (c) 2014, Oracle and/or its affiliates. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA + +*****************************************************************************/ + +/**************************************************//** +@file include/os0once.h +A class that aids executing a given function exactly once in a multi-threaded +environment. + +Created Feb 20, 2014 Vasil Dimov +*******************************************************/ + +#ifndef os0once_h +#define os0once_h + +#include "univ.i" + +#include "os0sync.h" +#include "ut0ut.h" + +/** Execute a given function exactly once in a multi-threaded environment +or wait for the function to be executed by another thread. + +Example usage: +First the user must create a control variable of type os_once::state_t and +assign it os_once::NEVER_DONE. +Then the user must pass this variable, together with a function to be +executed to os_once::do_or_wait_for_done(). + +Multiple threads can call os_once::do_or_wait_for_done() simultaneously with +the same (os_once::state_t) control variable. The provided function will be +called exactly once and when os_once::do_or_wait_for_done() returns then this +function has completed execution, by this or another thread. In other words +os_once::do_or_wait_for_done() will either execute the provided function or +will wait for its execution to complete if it is already called by another +thread or will do nothing if the function has already completed its execution +earlier. + +This mimics pthread_once(3), but unfortunatelly pthread_once(3) does not +support passing arguments to the init_routine() function. We should use +std::call_once() when we start compiling with C++11 enabled. */ +class os_once { +public: + /** Control variables' state type */ + typedef ib_uint32_t state_t; + + /** Not yet executed. */ + static const state_t NEVER_DONE = 0; + + /** Currently being executed by this or another thread. */ + static const state_t IN_PROGRESS = 1; + + /** Finished execution. */ + static const state_t DONE = 2; + +#ifdef HAVE_ATOMIC_BUILTINS + /** Call a given function or wait its execution to complete if it is + already called by another thread. + @param[in,out] state control variable + @param[in] do_func function to call + @param[in,out] do_func_arg an argument to pass to do_func(). */ + static + void + do_or_wait_for_done( + volatile state_t* state, + void (*do_func)(void*), + void* do_func_arg) + { + /* Avoid calling os_compare_and_swap_uint32() in the most + common case. */ + if (*state == DONE) { + return; + } + + if (os_compare_and_swap_uint32(state, + NEVER_DONE, IN_PROGRESS)) { + /* We are the first. Call the function. */ + + do_func(do_func_arg); + + const bool swapped = os_compare_and_swap_uint32( + state, IN_PROGRESS, DONE); + + ut_a(swapped); + } else { + /* The state is not NEVER_DONE, so either it is + IN_PROGRESS (somebody is calling the function right + now or DONE (it has already been called and completed). + Wait for it to become DONE. */ + for (;;) { + const state_t s = *state; + + switch (s) { + case DONE: + return; + case IN_PROGRESS: + break; + case NEVER_DONE: + /* fall through */ + default: + ut_error; + } + + UT_RELAX_CPU(); + } + } + } +#endif /* HAVE_ATOMIC_BUILTINS */ +}; + +#endif /* os0once_h */ diff --git a/storage/innobase/include/os0sync.h b/storage/innobase/include/os0sync.h index f4fcead7412..6d3dd850e08 100644 --- a/storage/innobase/include/os0sync.h +++ b/storage/innobase/include/os0sync.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, Google Inc. Portions of this file contain modifications contributed and copyrighted by @@ -580,7 +580,8 @@ Returns true if swapped, ptr is pointer to target, old_val is value to compare to, new_val is the value to swap in. */ # define os_compare_and_swap_uint32(ptr, old_val, new_val) \ - (win_cmp_and_xchg_dword(ptr, new_val, old_val) == old_val) + (InterlockedCompareExchange(reinterpret_cast<volatile long*>(ptr), \ + new_val, old_val) == old_val) # define os_compare_and_swap_ulint(ptr, old_val, new_val) \ (win_cmp_and_xchg_ulint(ptr, new_val, old_val) == old_val) diff --git a/storage/innobase/include/sync0rw.h b/storage/innobase/include/sync0rw.h index 80ad2dd1554..b36e04f2810 100644 --- a/storage/innobase/include/sync0rw.h +++ b/storage/innobase/include/sync0rw.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, Google Inc. Portions of this file contain modifications contributed and copyrighted by @@ -135,7 +135,7 @@ extern mysql_pfs_key_t trx_i_s_cache_lock_key; extern mysql_pfs_key_t trx_purge_latch_key; extern mysql_pfs_key_t index_tree_rw_lock_key; extern mysql_pfs_key_t index_online_log_key; -extern mysql_pfs_key_t dict_table_stats_latch_key; +extern mysql_pfs_key_t dict_table_stats_key; extern mysql_pfs_key_t trx_sys_rw_lock_key; extern mysql_pfs_key_t hash_table_rw_lock_key; #endif /* UNIV_PFS_RWLOCK */ diff --git a/storage/innobase/include/univ.i b/storage/innobase/include/univ.i index 98c5512bd0b..bc359746a0b 100644 --- a/storage/innobase/include/univ.i +++ b/storage/innobase/include/univ.i @@ -44,7 +44,7 @@ Created 1/20/1994 Heikki Tuuri #define INNODB_VERSION_MAJOR 5 #define INNODB_VERSION_MINOR 6 -#define INNODB_VERSION_BUGFIX 17 +#define INNODB_VERSION_BUGFIX 19 /* The following is the InnoDB version as shown in SELECT plugin_version FROM information_schema.plugins; @@ -439,10 +439,10 @@ typedef unsigned __int64 ib_uint64_t; typedef unsigned __int32 ib_uint32_t; #else /* Use the integer types and formatting strings defined in the C99 standard. */ -# define UINT32PF "%"PRIu32 -# define INT64PF "%"PRId64 -# define UINT64PF "%"PRIu64 -# define UINT64PFx "%016"PRIx64 +# define UINT32PF "%" PRIu32 +# define INT64PF "%" PRId64 +# define UINT64PF "%" PRIu64 +# define UINT64PFx "%016" PRIx64 # define DBUG_LSN_PF UINT64PF typedef int64_t ib_int64_t; typedef uint64_t ib_uint64_t; diff --git a/storage/innobase/os/os0file.cc b/storage/innobase/os/os0file.cc index 6380e881d6f..3972ef4afe9 100644 --- a/storage/innobase/os/os0file.cc +++ b/storage/innobase/os/os0file.cc @@ -2659,7 +2659,7 @@ try_again: } ib_logf(IB_LOG_LEVEL_ERROR, - "Tried to read "ULINTPF" bytes at offset " UINT64PF". " + "Tried to read " ULINTPF " bytes at offset " UINT64PF ". " "Was only able to read %ld.", n, offset, (lint) ret); #endif /* __WIN__ */ #ifdef __WIN__ @@ -3004,7 +3004,7 @@ retry: fprintf(stderr, " InnoDB: Error: Write to file %s failed" - " at offset "UINT64PF".\n" + " at offset " UINT64PF ".\n" "InnoDB: %lu bytes should have been written," " only %ld were written.\n" "InnoDB: Operating system error number %lu.\n" diff --git a/storage/innobase/row/row0ins.cc b/storage/innobase/row/row0ins.cc index 65a6c433f5c..834dfe6f8eb 100644 --- a/storage/innobase/row/row0ins.cc +++ b/storage/innobase/row/row0ins.cc @@ -151,35 +151,37 @@ row_ins_alloc_sys_fields( ut_ad(row && table && heap); ut_ad(dtuple_get_n_fields(row) == dict_table_get_n_cols(table)); - /* 1. Allocate buffer for row id */ + /* allocate buffer to hold the needed system created hidden columns. */ + uint len = DATA_ROW_ID_LEN + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN; + ptr = static_cast<byte*>(mem_heap_zalloc(heap, len)); + /* 1. Populate row-id */ col = dict_table_get_sys_col(table, DATA_ROW_ID); dfield = dtuple_get_nth_field(row, dict_col_get_no(col)); - ptr = static_cast<byte*>(mem_heap_zalloc(heap, DATA_ROW_ID_LEN)); - dfield_set_data(dfield, ptr, DATA_ROW_ID_LEN); node->row_id_buf = ptr; - /* 3. Allocate buffer for trx id */ + ptr += DATA_ROW_ID_LEN; + /* 2. Populate trx id */ col = dict_table_get_sys_col(table, DATA_TRX_ID); dfield = dtuple_get_nth_field(row, dict_col_get_no(col)); - ptr = static_cast<byte*>(mem_heap_zalloc(heap, DATA_TRX_ID_LEN)); dfield_set_data(dfield, ptr, DATA_TRX_ID_LEN); node->trx_id_buf = ptr; - /* 4. Allocate buffer for roll ptr */ + ptr += DATA_TRX_ID_LEN; + + /* 3. Populate roll ptr */ col = dict_table_get_sys_col(table, DATA_ROLL_PTR); dfield = dtuple_get_nth_field(row, dict_col_get_no(col)); - ptr = static_cast<byte*>(mem_heap_zalloc(heap, DATA_ROLL_PTR_LEN)); dfield_set_data(dfield, ptr, DATA_ROLL_PTR_LEN); } diff --git a/storage/innobase/row/row0merge.cc b/storage/innobase/row/row0merge.cc index 56cf9f1943c..86b47c9f3bd 100644 --- a/storage/innobase/row/row0merge.cc +++ b/storage/innobase/row/row0merge.cc @@ -786,7 +786,7 @@ row_merge_read( if (UNIV_UNLIKELY(!success)) { ut_print_timestamp(stderr); fprintf(stderr, - " InnoDB: failed to read merge block at "UINT64PF"\n", + " InnoDB: failed to read merge block at " UINT64PF "\n", ofs); } diff --git a/storage/innobase/row/row0mysql.cc b/storage/innobase/row/row0mysql.cc index 93d13ea49ee..dd7af8a3526 100644 --- a/storage/innobase/row/row0mysql.cc +++ b/storage/innobase/row/row0mysql.cc @@ -1359,7 +1359,7 @@ error_exit: if (doc_id < next_doc_id) { fprintf(stderr, "InnoDB: FTS Doc ID must be large than" - " "UINT64PF" for table", + " " UINT64PF " for table", next_doc_id - 1); ut_print_name(stderr, trx, TRUE, table->name); putc('\n', stderr); @@ -1374,9 +1374,9 @@ error_exit: if (doc_id - next_doc_id >= FTS_DOC_ID_MAX_STEP) { fprintf(stderr, - "InnoDB: Doc ID "UINT64PF" is too" + "InnoDB: Doc ID " UINT64PF " is too" " big. Its difference with largest" - " used Doc ID "UINT64PF" cannot" + " used Doc ID " UINT64PF " cannot" " exceed or equal to %d\n", doc_id, next_doc_id - 1, FTS_DOC_ID_MAX_STEP); diff --git a/storage/innobase/srv/srv0mon.cc b/storage/innobase/srv/srv0mon.cc index ea346566e57..64417b1e5fb 100644 --- a/storage/innobase/srv/srv0mon.cc +++ b/storage/innobase/srv/srv0mon.cc @@ -41,8 +41,8 @@ Created 12/9/2009 Jimmy Yang /* Macro to standardize the counter names for counters in the "monitor_buf_page" module as they have very structured defines */ #define MONITOR_BUF_PAGE(name, description, code, op, op_code) \ - {"buffer_page_"op"_"name, "buffer_page_io", \ - "Number of "description" Pages "op, \ + {"buffer_page_" op "_" name, "buffer_page_io", \ + "Number of " description " Pages " op, \ MONITOR_GROUP_MODULE, MONITOR_DEFAULT_START, \ MONITOR_##code##_##op_code} diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc index 0c04fba421a..1c2bfcbd920 100644 --- a/storage/innobase/srv/srv0start.cc +++ b/storage/innobase/srv/srv0start.cc @@ -2197,9 +2197,9 @@ innobase_start_or_create_for_mysql(void) } else if (size != srv_log_file_size) { ib_logf(IB_LOG_LEVEL_ERROR, "Log file %s is" - " of different size "UINT64PF" bytes" + " of different size " UINT64PF " bytes" " than other log" - " files "UINT64PF" bytes!", + " files " UINT64PF " bytes!", logfilename, size << UNIV_PAGE_SIZE_SHIFT, (os_offset_t) srv_log_file_size diff --git a/storage/innobase/sync/sync0sync.cc b/storage/innobase/sync/sync0sync.cc index 5dc2f69b9d3..54018471abc 100644 --- a/storage/innobase/sync/sync0sync.cc +++ b/storage/innobase/sync/sync0sync.cc @@ -1555,12 +1555,12 @@ sync_print_wait_info( FILE* file) /*!< in: file where to print */ { fprintf(file, - "Mutex spin waits "UINT64PF", rounds "UINT64PF", " - "OS waits "UINT64PF"\n" - "RW-shared spins "UINT64PF", rounds "UINT64PF", " - "OS waits "UINT64PF"\n" - "RW-excl spins "UINT64PF", rounds "UINT64PF", " - "OS waits "UINT64PF"\n", + "Mutex spin waits " UINT64PF ", rounds " UINT64PF ", " + "OS waits " UINT64PF "\n" + "RW-shared spins " UINT64PF ", rounds " UINT64PF ", " + "OS waits " UINT64PF "\n" + "RW-excl spins " UINT64PF ", rounds " UINT64PF ", " + "OS waits " UINT64PF "\n", (ib_uint64_t) mutex_spin_wait_count, (ib_uint64_t) mutex_spin_round_count, (ib_uint64_t) mutex_os_wait_count, diff --git a/storage/innobase/trx/trx0i_s.cc b/storage/innobase/trx/trx0i_s.cc index f6360562ae7..01ccfb8a6d0 100644 --- a/storage/innobase/trx/trx0i_s.cc +++ b/storage/innobase/trx/trx0i_s.cc @@ -1639,7 +1639,7 @@ trx_i_s_create_lock_id( } else { /* table lock */ res_len = ut_snprintf(lock_id, lock_id_size, - TRX_ID_FMT":"UINT64PF, + TRX_ID_FMT":" UINT64PF, row->lock_trx_id, row->lock_table_id); } diff --git a/storage/innobase/trx/trx0trx.cc b/storage/innobase/trx/trx0trx.cc index 4f8e4ad4487..796ac316d4b 100644 --- a/storage/innobase/trx/trx0trx.cc +++ b/storage/innobase/trx/trx0trx.cc @@ -1803,7 +1803,7 @@ state_ok: if (trx->undo_no != 0) { newline = TRUE; - fprintf(f, ", undo log entries "TRX_ID_FMT, trx->undo_no); + fprintf(f, ", undo log entries " TRX_ID_FMT, trx->undo_no); } if (newline) { @@ -2095,7 +2095,7 @@ trx_recover_for_mysql( ut_print_timestamp(stderr); fprintf(stderr, " InnoDB: Transaction contains changes" - " to "TRX_ID_FMT" rows\n", + " to " TRX_ID_FMT " rows\n", trx->undo_no); count++; |