summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergei Golubchik <sergii@pisem.net>2014-08-06 20:05:10 +0200
committerSergei Golubchik <sergii@pisem.net>2014-08-06 20:05:10 +0200
commit2023fac28130d7d3f7d6776332239c62c3890195 (patch)
treec4cab7ec55da2f87ce0e19ac9270608c04e78e4b
parenta7f39aacd573bfa299a930ee8275ba3066efc33a (diff)
parent58b09cd45fb2c75d6194365730d7a3fed1829bb8 (diff)
downloadmariadb-git-2023fac28130d7d3f7d6776332239c62c3890195.tar.gz
innodb-5.6.19
-rw-r--r--storage/innobase/btr/btr0cur.cc21
-rw-r--r--storage/innobase/buf/buf0flu.cc4
-rw-r--r--storage/innobase/dict/dict0dict.cc91
-rw-r--r--storage/innobase/dict/dict0mem.cc11
-rw-r--r--storage/innobase/dict/dict0stats.cc482
-rw-r--r--storage/innobase/fil/fil0fil.cc2
-rw-r--r--storage/innobase/fts/fts0fts.cc65
-rw-r--r--storage/innobase/fts/fts0opt.cc2
-rw-r--r--storage/innobase/handler/ha_innodb.cc10
-rw-r--r--storage/innobase/include/btr0cur.h13
-rw-r--r--storage/innobase/include/dict0dict.h24
-rw-r--r--storage/innobase/include/dict0mem.h7
-rw-r--r--storage/innobase/include/os0once.h125
-rw-r--r--storage/innobase/include/os0sync.h5
-rw-r--r--storage/innobase/include/sync0rw.h4
-rw-r--r--storage/innobase/include/univ.i10
-rw-r--r--storage/innobase/os/os0file.cc4
-rw-r--r--storage/innobase/row/row0ins.cc16
-rw-r--r--storage/innobase/row/row0merge.cc2
-rw-r--r--storage/innobase/row/row0mysql.cc6
-rw-r--r--storage/innobase/srv/srv0mon.cc4
-rw-r--r--storage/innobase/srv/srv0start.cc4
-rw-r--r--storage/innobase/sync/sync0sync.cc12
-rw-r--r--storage/innobase/trx/trx0i_s.cc2
-rw-r--r--storage/innobase/trx/trx0trx.cc4
25 files changed, 688 insertions, 242 deletions
diff --git a/storage/innobase/btr/btr0cur.cc b/storage/innobase/btr/btr0cur.cc
index a165c9c47f4..34a72f360be 100644
--- a/storage/innobase/btr/btr0cur.cc
+++ b/storage/innobase/btr/btr0cur.cc
@@ -202,15 +202,6 @@ btr_rec_free_externally_stored_fields(
mtr_t* mtr); /*!< in: mini-transaction handle which contains
an X-latch to record page and to the index
tree */
-/***********************************************************//**
-Gets the externally stored size of a record, in units of a database page.
-@return externally stored part, in units of a database page */
-static
-ulint
-btr_rec_get_externally_stored_len(
-/*==============================*/
- const rec_t* rec, /*!< in: record */
- const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
#endif /* !UNIV_HOTBACKUP */
/******************************************************//**
@@ -4044,15 +4035,15 @@ btr_rec_get_field_ref_offs(
#define btr_rec_get_field_ref(rec, offsets, n) \
((rec) + btr_rec_get_field_ref_offs(offsets, n))
-/***********************************************************//**
-Gets the externally stored size of a record, in units of a database page.
+/** Gets the externally stored size of a record, in units of a database page.
+@param[in] rec record
+@param[in] offsets array returned by rec_get_offsets()
@return externally stored part, in units of a database page */
-static
+
ulint
btr_rec_get_externally_stored_len(
-/*==============================*/
- const rec_t* rec, /*!< in: record */
- const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
+ const rec_t* rec,
+ const ulint* offsets)
{
ulint n_fields;
ulint total_extern_len = 0;
diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc
index 3cce75abe74..fa2edb90b8e 100644
--- a/storage/innobase/buf/buf0flu.cc
+++ b/storage/innobase/buf/buf0flu.cc
@@ -2183,6 +2183,10 @@ af_get_pct_for_dirty()
{
ulint dirty_pct = buf_get_modified_ratio_pct();
+ if (dirty_pct > 0 && srv_max_buf_pool_modified_pct == 0) {
+ return(100);
+ }
+
ut_a(srv_max_dirty_pages_pct_lwm
<= srv_max_buf_pool_modified_pct);
diff --git a/storage/innobase/dict/dict0dict.cc b/storage/innobase/dict/dict0dict.cc
index 052e9bf2567..30266262f37 100644
--- a/storage/innobase/dict/dict0dict.cc
+++ b/storage/innobase/dict/dict0dict.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2014, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2012, Facebook Inc.
This program is free software; you can redistribute it and/or modify it under
@@ -50,6 +50,7 @@ UNIV_INTERN dict_index_t* dict_ind_compact;
#include "btr0btr.h"
#include "btr0cur.h"
#include "btr0sea.h"
+#include "os0once.h"
#include "page0zip.h"
#include "page0page.h"
#include "pars0pars.h"
@@ -102,7 +103,7 @@ UNIV_INTERN ulong zip_pad_max = 50;
UNIV_INTERN mysql_pfs_key_t dict_operation_lock_key;
UNIV_INTERN mysql_pfs_key_t index_tree_rw_lock_key;
UNIV_INTERN mysql_pfs_key_t index_online_log_key;
-UNIV_INTERN mysql_pfs_key_t dict_table_stats_latch_key;
+UNIV_INTERN mysql_pfs_key_t dict_table_stats_key;
#endif /* UNIV_PFS_RWLOCK */
#ifdef UNIV_PFS_MUTEX
@@ -324,6 +325,82 @@ dict_mutex_exit_for_mysql(void)
mutex_exit(&(dict_sys->mutex));
}
+/** Allocate and init a dict_table_t's stats latch.
+This function must not be called concurrently on the same table object.
+@param[in,out] table_void table whose stats latch to create */
+static
+void
+dict_table_stats_latch_alloc(
+ void* table_void)
+{
+ dict_table_t* table = static_cast<dict_table_t*>(table_void);
+
+ table->stats_latch = new(std::nothrow) rw_lock_t;
+
+ ut_a(table->stats_latch != NULL);
+
+ rw_lock_create(dict_table_stats_key, table->stats_latch,
+ SYNC_INDEX_TREE);
+}
+
+/** Deinit and free a dict_table_t's stats latch.
+This function must not be called concurrently on the same table object.
+@param[in,out] table table whose stats latch to free */
+static
+void
+dict_table_stats_latch_free(
+ dict_table_t* table)
+{
+ rw_lock_free(table->stats_latch);
+ delete table->stats_latch;
+}
+
+/** Create a dict_table_t's stats latch or delay for lazy creation.
+This function is only called from either single threaded environment
+or from a thread that has not shared the table object with other threads.
+@param[in,out] table table whose stats latch to create
+@param[in] enabled if false then the latch is disabled
+and dict_table_stats_lock()/unlock() become noop on this table. */
+
+void
+dict_table_stats_latch_create(
+ dict_table_t* table,
+ bool enabled)
+{
+ if (!enabled) {
+ table->stats_latch = NULL;
+ table->stats_latch_created = os_once::DONE;
+ return;
+ }
+
+#ifdef HAVE_ATOMIC_BUILTINS
+ /* We create this lazily the first time it is used. */
+ table->stats_latch = NULL;
+ table->stats_latch_created = os_once::NEVER_DONE;
+#else /* HAVE_ATOMIC_BUILTINS */
+
+ dict_table_stats_latch_alloc(table);
+
+ table->stats_latch_created = os_once::DONE;
+#endif /* HAVE_ATOMIC_BUILTINS */
+}
+
+/** Destroy a dict_table_t's stats latch.
+This function is only called from either single threaded environment
+or from a thread that has not shared the table object with other threads.
+@param[in,out] table table whose stats latch to destroy */
+
+void
+dict_table_stats_latch_destroy(
+ dict_table_t* table)
+{
+ if (table->stats_latch_created == os_once::DONE
+ && table->stats_latch != NULL) {
+
+ dict_table_stats_latch_free(table);
+ }
+}
+
/**********************************************************************//**
Lock the appropriate latch to protect a given table's statistics. */
UNIV_INTERN
@@ -336,6 +413,14 @@ dict_table_stats_lock(
ut_ad(table != NULL);
ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
+#ifdef HAVE_ATOMIC_BUILTINS
+ os_once::do_or_wait_for_done(
+ &table->stats_latch_created,
+ dict_table_stats_latch_alloc, table);
+#else /* HAVE_ATOMIC_BUILTINS */
+ ut_ad(table->stats_latch_created == os_once::DONE);
+#endif /* HAVE_ATOMIC_BUILTINS */
+
if (table->stats_latch == NULL) {
/* This is a dummy table object that is private in the current
thread and is not shared between multiple threads, thus we
@@ -5195,8 +5280,6 @@ dict_table_print(
index = UT_LIST_GET_NEXT(indexes, index);
}
- table->stat_initialized = FALSE;
-
dict_table_stats_unlock(table, RW_X_LATCH);
foreign = UT_LIST_GET_FIRST(table->foreign_list);
diff --git a/storage/innobase/dict/dict0mem.cc b/storage/innobase/dict/dict0mem.cc
index 60daeea3a96..6310b2fd225 100644
--- a/storage/innobase/dict/dict0mem.cc
+++ b/storage/innobase/dict/dict0mem.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2014, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2012, Facebook Inc.
This program is free software; you can redistribute it and/or modify it under
@@ -95,9 +95,9 @@ dict_mem_table_create(
ut_d(table->magic_n = DICT_TABLE_MAGIC_N);
- table->stats_latch = new rw_lock_t;
- rw_lock_create(dict_table_stats_latch_key, table->stats_latch,
- SYNC_INDEX_TREE);
+ /* true means that the stats latch will be enabled -
+ dict_table_stats_lock() will not be noop. */
+ dict_table_stats_latch_create(table, true);
#ifndef UNIV_HOTBACKUP
table->autoinc_lock = static_cast<ib_lock_t*>(
@@ -154,8 +154,7 @@ dict_mem_table_free(
mutex_free(&(table->autoinc_mutex));
#endif /* UNIV_HOTBACKUP */
- rw_lock_free(table->stats_latch);
- delete table->stats_latch;
+ dict_table_stats_latch_destroy(table);
ut_free(table->name);
mem_heap_free(table->heap);
diff --git a/storage/innobase/dict/dict0stats.cc b/storage/innobase/dict/dict0stats.cc
index aa417bbae7b..1eac9e0df51 100644
--- a/storage/innobase/dict/dict0stats.cc
+++ b/storage/innobase/dict/dict0stats.cc
@@ -46,6 +46,7 @@ Created Jan 06, 2010 Vasil Dimov
#include "ut0rnd.h" /* ut_rnd_interval() */
#include "ut0ut.h" /* ut_format_name(), ut_time() */
+#include <algorithm>
#include <map>
#include <vector>
@@ -127,10 +128,11 @@ where n=1..n_uniq.
#endif /* UNIV_STATS_DEBUG */
/* Gets the number of leaf pages to sample in persistent stats estimation */
-#define N_SAMPLE_PAGES(index) \
- ((index)->table->stats_sample_pages != 0 ? \
- (index)->table->stats_sample_pages : \
- srv_stats_persistent_sample_pages)
+#define N_SAMPLE_PAGES(index) \
+ static_cast<ib_uint64_t>( \
+ (index)->table->stats_sample_pages != 0 \
+ ? (index)->table->stats_sample_pages \
+ : srv_stats_persistent_sample_pages)
/* number of distinct records on a given level that are required to stop
descending to lower levels and fetch N_SAMPLE_PAGES(index) records
@@ -432,9 +434,9 @@ dict_stats_table_clone_create(
t->corrupted = table->corrupted;
/* This private object "t" is not shared with other threads, so
- we do not need the stats_latch. The lock/unlock routines will do
- nothing if stats_latch is NULL. */
- t->stats_latch = NULL;
+ we do not need the stats_latch (thus we pass false below). The
+ dict_table_stats_lock()/unlock() routines will do nothing. */
+ dict_table_stats_latch_create(t, false);
UT_LIST_INIT(t->indexes);
@@ -510,6 +512,7 @@ dict_stats_table_clone_free(
/*========================*/
dict_table_t* t) /*!< in: dummy table object to free */
{
+ dict_table_stats_latch_destroy(t);
mem_heap_free(t->heap);
}
@@ -1285,35 +1288,40 @@ enum page_scan_method_t {
};
/* @} */
-/*********************************************************************//**
-Scan a page, reading records from left to right and counting the number
-of distinct records on that page (looking only at the first n_prefix
-columns). If scan_method is QUIT_ON_FIRST_NON_BORING then the function
+/** Scan a page, reading records from left to right and counting the number
+of distinct records (looking only at the first n_prefix
+columns) and the number of external pages pointed by records from this page.
+If scan_method is QUIT_ON_FIRST_NON_BORING then the function
will return as soon as it finds a record that does not match its neighbor
to the right, which means that in the case of QUIT_ON_FIRST_NON_BORING the
returned n_diff can either be 0 (empty page), 1 (the whole page has all keys
equal) or 2 (the function found a non-boring record and returned).
+@param[out] out_rec record, or NULL
+@param[out] offsets1 rec_get_offsets() working space (must
+be big enough)
+@param[out] offsets2 rec_get_offsets() working space (must
+be big enough)
+@param[in] index index of the page
+@param[in] page the page to scan
+@param[in] n_prefix look at the first n_prefix columns
+@param[in] scan_method scan to the end of the page or not
+@param[out] n_diff number of distinct records encountered
+@param[out] n_external_pages if this is non-NULL then it will be set
+to the number of externally stored pages which were encountered
@return offsets1 or offsets2 (the offsets of *out_rec),
or NULL if the page is empty and does not contain user records. */
-UNIV_INLINE __attribute__((nonnull))
+UNIV_INLINE
ulint*
dict_stats_scan_page(
-/*=================*/
- const rec_t** out_rec, /*!< out: record, or NULL */
- ulint* offsets1, /*!< out: rec_get_offsets()
- working space (must be big
- enough) */
- ulint* offsets2, /*!< out: rec_get_offsets()
- working space (must be big
- enough) */
- dict_index_t* index, /*!< in: index of the page */
- const page_t* page, /*!< in: the page to scan */
- ulint n_prefix, /*!< in: look at the first
- n_prefix columns */
- page_scan_method_t scan_method, /*!< in: scan to the end of
- the page or not */
- ib_uint64_t* n_diff) /*!< out: number of distinct
- records encountered */
+ const rec_t** out_rec,
+ ulint* offsets1,
+ ulint* offsets2,
+ dict_index_t* index,
+ const page_t* page,
+ ulint n_prefix,
+ page_scan_method_t scan_method,
+ ib_uint64_t* n_diff,
+ ib_uint64_t* n_external_pages)
{
ulint* offsets_rec = offsets1;
ulint* offsets_next_rec = offsets2;
@@ -1331,6 +1339,12 @@ dict_stats_scan_page(
get_next = page_rec_get_next_const;
}
+ const bool should_count_external_pages = n_external_pages != NULL;
+
+ if (should_count_external_pages) {
+ *n_external_pages = 0;
+ }
+
rec = get_next(page_get_infimum_rec(page));
if (page_rec_is_supremum(rec)) {
@@ -1343,6 +1357,11 @@ dict_stats_scan_page(
offsets_rec = rec_get_offsets(rec, index, offsets_rec,
ULINT_UNDEFINED, &heap);
+ if (should_count_external_pages) {
+ *n_external_pages += btr_rec_get_externally_stored_len(
+ rec, offsets_rec);
+ }
+
next_rec = get_next(rec);
*n_diff = 1;
@@ -1393,6 +1412,11 @@ dict_stats_scan_page(
offsets_next_rec = offsets_tmp;
}
+ if (should_count_external_pages) {
+ *n_external_pages += btr_rec_get_externally_stored_len(
+ rec, offsets_rec);
+ }
+
next_rec = get_next(next_rec);
}
@@ -1403,19 +1427,25 @@ func_exit:
return(offsets_rec);
}
-/*********************************************************************//**
-Dive below the current position of a cursor and calculate the number of
+/** Dive below the current position of a cursor and calculate the number of
distinct records on the leaf page, when looking at the fist n_prefix
-columns.
+columns. Also calculate the number of external pages pointed by records
+on the leaf page.
+@param[in] cur cursor
+@param[in] n_prefix look at the first n_prefix columns
+when comparing records
+@param[out] n_diff number of distinct records
+@param[out] n_external_pages number of external pages
+@param[in,out] mtr mini-transaction
@return number of distinct records on the leaf page */
static
-ib_uint64_t
+void
dict_stats_analyze_index_below_cur(
-/*===============================*/
- const btr_cur_t*cur, /*!< in: cursor */
- ulint n_prefix, /*!< in: look at the first n_prefix
- columns when comparing records */
- mtr_t* mtr) /*!< in/out: mini-transaction */
+ const btr_cur_t* cur,
+ ulint n_prefix,
+ ib_uint64_t* n_diff,
+ ib_uint64_t* n_external_pages,
+ mtr_t* mtr)
{
dict_index_t* index;
ulint space;
@@ -1428,7 +1458,6 @@ dict_stats_analyze_index_below_cur(
ulint* offsets1;
ulint* offsets2;
ulint* offsets_rec;
- ib_uint64_t n_diff; /* the result */
ulint size;
index = btr_cur_get_index(cur);
@@ -1464,6 +1493,10 @@ dict_stats_analyze_index_below_cur(
page_no = btr_node_ptr_get_child_page_no(rec, offsets_rec);
+ /* assume no external pages by default - in case we quit from this
+ function without analyzing any leaf pages */
+ *n_external_pages = 0;
+
/* descend to the leaf level on the B-tree */
for (;;) {
@@ -1482,20 +1515,24 @@ dict_stats_analyze_index_below_cur(
/* search for the first non-boring record on the page */
offsets_rec = dict_stats_scan_page(
&rec, offsets1, offsets2, index, page, n_prefix,
- QUIT_ON_FIRST_NON_BORING, &n_diff);
+ QUIT_ON_FIRST_NON_BORING, n_diff, NULL);
/* pages on level > 0 are not allowed to be empty */
ut_a(offsets_rec != NULL);
/* if page is not empty (offsets_rec != NULL) then n_diff must
be > 0, otherwise there is a bug in dict_stats_scan_page() */
- ut_a(n_diff > 0);
+ ut_a(*n_diff > 0);
- if (n_diff == 1) {
+ if (*n_diff == 1) {
/* page has all keys equal and the end of the page
was reached by dict_stats_scan_page(), no need to
descend to the leaf level */
mem_heap_free(heap);
- return(1);
+ /* can't get an estimate for n_external_pages here
+ because we do not dive to the leaf level, assume no
+ external pages (*n_external_pages was assigned to 0
+ above). */
+ return;
}
/* else */
@@ -1503,7 +1540,7 @@ dict_stats_analyze_index_below_cur(
first non-boring record it finds, then the returned n_diff
can either be 0 (empty page), 1 (page has all keys equal) or
2 (non-boring record was found) */
- ut_a(n_diff == 2);
+ ut_a(*n_diff == 2);
/* we have a non-boring record in rec, descend below it */
@@ -1514,11 +1551,14 @@ dict_stats_analyze_index_below_cur(
ut_ad(btr_page_get_level(page, mtr) == 0);
/* scan the leaf page and find the number of distinct keys,
- when looking only at the first n_prefix columns */
+ when looking only at the first n_prefix columns; also estimate
+ the number of externally stored pages pointed by records on this
+ page */
offsets_rec = dict_stats_scan_page(
&rec, offsets1, offsets2, index, page, n_prefix,
- COUNT_ALL_NON_BORING_AND_SKIP_DEL_MARKED, &n_diff);
+ COUNT_ALL_NON_BORING_AND_SKIP_DEL_MARKED, n_diff,
+ n_external_pages);
#if 0
DEBUG_PRINTF(" %s(): n_diff below page_no=%lu: " UINT64PF "\n",
@@ -1526,133 +1566,146 @@ dict_stats_analyze_index_below_cur(
#endif
mem_heap_free(heap);
-
- return(n_diff);
}
-/*********************************************************************//**
-For a given level in an index select N_SAMPLE_PAGES(index)
-(or less) records from that level and dive below them to the corresponding
-leaf pages, then scan those leaf pages and save the sampling results in
-index->stat_n_diff_key_vals[n_prefix - 1] and the number of pages scanned in
-index->stat_n_sample_sizes[n_prefix - 1]. */
+/** Input data that is used to calculate dict_index_t::stat_n_diff_key_vals[]
+for each n-columns prefix (n from 1 to n_uniq). */
+struct n_diff_data_t {
+ /** Index of the level on which the descent through the btree
+ stopped. level 0 is the leaf level. This is >= 1 because we
+ avoid scanning the leaf level because it may contain too many
+ pages and doing so is useless when combined with the random dives -
+ if we are to scan the leaf level, this means a full scan and we can
+ simply do that instead of fiddling with picking random records higher
+ in the tree and to dive below them. At the start of the analyzing
+ we may decide to do full scan of the leaf level, but then this
+ structure is not used in that code path. */
+ ulint level;
+
+ /** Number of records on the level where the descend through the btree
+ stopped. When we scan the btree from the root, we stop at some mid
+ level, choose some records from it and dive below them towards a leaf
+ page to analyze. */
+ ib_uint64_t n_recs_on_level;
+
+ /** Number of different key values that were found on the mid level. */
+ ib_uint64_t n_diff_on_level;
+
+ /** Number of leaf pages that are analyzed. This is also the same as
+ the number of records that we pick from the mid level and dive below
+ them. */
+ ib_uint64_t n_leaf_pages_to_analyze;
+
+ /** Cumulative sum of the number of different key values that were
+ found on all analyzed pages. */
+ ib_uint64_t n_diff_all_analyzed_pages;
+
+ /** Cumulative sum of the number of external pages (stored outside of
+ the btree but in the same file segment). */
+ ib_uint64_t n_external_pages_sum;
+};
+
+/** Estimate the number of different key values in an index when looking at
+the first n_prefix columns. For a given level in an index select
+n_diff_data->n_leaf_pages_to_analyze records from that level and dive below
+them to the corresponding leaf pages, then scan those leaf pages and save the
+sampling results in n_diff_data->n_diff_all_analyzed_pages.
+@param[in] index index
+@param[in] n_prefix look at first 'n_prefix' columns when
+comparing records
+@param[in] boundaries a vector that contains
+n_diff_data->n_diff_on_level integers each of which represents the index (on
+level 'level', counting from left/smallest to right/biggest from 0) of the
+last record from each group of distinct keys
+@param[in,out] n_diff_data n_diff_all_analyzed_pages and
+n_external_pages_sum in this structure will be set by this function. The
+members level, n_diff_on_level and n_leaf_pages_to_analyze must be set by the
+caller in advance - they are used by some calculations inside this function
+@param[in,out] mtr mini-transaction */
static
void
dict_stats_analyze_index_for_n_prefix(
-/*==================================*/
- dict_index_t* index, /*!< in/out: index */
- ulint level, /*!< in: level, must be >= 1 */
- ib_uint64_t total_recs_on_level,
- /*!< in: total number of
- records on the given level */
- ulint n_prefix, /*!< in: look at first
- n_prefix columns when
- comparing records */
- ib_uint64_t n_diff_for_this_prefix,
- /*!< in: number of distinct
- records on the given level,
- when looking at the first
- n_prefix columns */
- boundaries_t* boundaries, /*!< in: array that contains
- n_diff_for_this_prefix
- integers each of which
- represents the index (on the
- level, counting from
- left/smallest to right/biggest
- from 0) of the last record
- from each group of distinct
- keys */
- mtr_t* mtr) /*!< in/out: mini-transaction */
+ dict_index_t* index,
+ ulint n_prefix,
+ const boundaries_t* boundaries,
+ n_diff_data_t* n_diff_data,
+ mtr_t* mtr)
{
btr_pcur_t pcur;
const page_t* page;
ib_uint64_t rec_idx;
- ib_uint64_t last_idx_on_level;
- ib_uint64_t n_recs_to_dive_below;
- ib_uint64_t n_diff_sum_of_all_analyzed_pages;
ib_uint64_t i;
#if 0
DEBUG_PRINTF(" %s(table=%s, index=%s, level=%lu, n_prefix=%lu, "
- "n_diff_for_this_prefix=" UINT64PF ")\n",
+ "n_diff_on_level=" UINT64PF ")\n",
__func__, index->table->name, index->name, level,
- n_prefix, n_diff_for_this_prefix);
+ n_prefix, n_diff_data->n_diff_on_level);
#endif
ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
MTR_MEMO_S_LOCK));
- /* if some of those is 0 then this means that there is exactly one
- page in the B-tree and it is empty and we should have done full scan
- and should not be here */
- ut_ad(total_recs_on_level > 0);
- ut_ad(n_diff_for_this_prefix > 0);
-
- /* this must be at least 1 */
- ut_ad(N_SAMPLE_PAGES(index) > 0);
-
/* Position pcur on the leftmost record on the leftmost page
on the desired level. */
btr_pcur_open_at_index_side(
true, index, BTR_SEARCH_LEAF | BTR_ALREADY_S_LATCHED,
- &pcur, true, level, mtr);
+ &pcur, true, n_diff_data->level, mtr);
btr_pcur_move_to_next_on_page(&pcur);
page = btr_pcur_get_page(&pcur);
+ const rec_t* first_rec = btr_pcur_get_rec(&pcur);
+
+ /* We shouldn't be scanning the leaf level. The caller of this function
+ should have stopped the descend on level 1 or higher. */
+ ut_ad(n_diff_data->level > 0);
+ ut_ad(!page_is_leaf(page));
+
/* The page must not be empty, except when
it is the root page (and the whole index is empty). */
- ut_ad(btr_pcur_is_on_user_rec(&pcur) || page_is_leaf(page));
- ut_ad(btr_pcur_get_rec(&pcur)
- == page_rec_get_next_const(page_get_infimum_rec(page)));
+ ut_ad(btr_pcur_is_on_user_rec(&pcur));
+ ut_ad(first_rec == page_rec_get_next_const(page_get_infimum_rec(page)));
/* check that we are indeed on the desired level */
- ut_a(btr_page_get_level(page, mtr) == level);
+ ut_a(btr_page_get_level(page, mtr) == n_diff_data->level);
/* there should not be any pages on the left */
ut_a(btr_page_get_prev(page, mtr) == FIL_NULL);
/* check whether the first record on the leftmost page is marked
- as such, if we are on a non-leaf level */
- ut_a((level == 0)
- == !(REC_INFO_MIN_REC_FLAG & rec_get_info_bits(
- btr_pcur_get_rec(&pcur), page_is_comp(page))));
+ as such; we are on a non-leaf level */
+ ut_a(rec_get_info_bits(first_rec, page_is_comp(page))
+ & REC_INFO_MIN_REC_FLAG);
- last_idx_on_level = boundaries->at(
- static_cast<unsigned int>(n_diff_for_this_prefix - 1));
+ const ib_uint64_t last_idx_on_level = boundaries->at(
+ static_cast<unsigned>(n_diff_data->n_diff_on_level - 1));
rec_idx = 0;
- n_diff_sum_of_all_analyzed_pages = 0;
+ n_diff_data->n_diff_all_analyzed_pages = 0;
+ n_diff_data->n_external_pages_sum = 0;
- n_recs_to_dive_below = ut_min(N_SAMPLE_PAGES(index),
- n_diff_for_this_prefix);
-
- for (i = 0; i < n_recs_to_dive_below; i++) {
- ib_uint64_t left;
- ib_uint64_t right;
- ib_uint64_t rnd;
- ib_uint64_t dive_below_idx;
-
- /* there are n_diff_for_this_prefix elements
+ for (i = 0; i < n_diff_data->n_leaf_pages_to_analyze; i++) {
+ /* there are n_diff_on_level elements
in 'boundaries' and we divide those elements
- into n_recs_to_dive_below segments, for example:
+ into n_leaf_pages_to_analyze segments, for example:
- let n_diff_for_this_prefix=100, n_recs_to_dive_below=4, then:
+ let n_diff_on_level=100, n_leaf_pages_to_analyze=4, then:
segment i=0: [0, 24]
segment i=1: [25, 49]
segment i=2: [50, 74]
segment i=3: [75, 99] or
- let n_diff_for_this_prefix=1, n_recs_to_dive_below=1, then:
+ let n_diff_on_level=1, n_leaf_pages_to_analyze=1, then:
segment i=0: [0, 0] or
- let n_diff_for_this_prefix=2, n_recs_to_dive_below=2, then:
+ let n_diff_on_level=2, n_leaf_pages_to_analyze=2, then:
segment i=0: [0, 0]
segment i=1: [1, 1] or
- let n_diff_for_this_prefix=13, n_recs_to_dive_below=7, then:
+ let n_diff_on_level=13, n_leaf_pages_to_analyze=7, then:
segment i=0: [0, 0]
segment i=1: [1, 2]
segment i=2: [3, 4]
@@ -1663,9 +1716,12 @@ dict_stats_analyze_index_for_n_prefix(
then we select a random record from each segment and dive
below it */
- left = n_diff_for_this_prefix * i / n_recs_to_dive_below;
- right = n_diff_for_this_prefix * (i + 1)
- / n_recs_to_dive_below - 1;
+ const ib_uint64_t n_diff = n_diff_data->n_diff_on_level;
+ const ib_uint64_t n_pick
+ = n_diff_data->n_leaf_pages_to_analyze;
+
+ const ib_uint64_t left = n_diff * i / n_pick;
+ const ib_uint64_t right = n_diff * (i + 1) / n_pick - 1;
ut_a(left <= right);
ut_a(right <= last_idx_on_level);
@@ -1673,11 +1729,11 @@ dict_stats_analyze_index_for_n_prefix(
/* we do not pass (left, right) because we do not want to ask
ut_rnd_interval() to work with too big numbers since
ib_uint64_t could be bigger than ulint */
- rnd = static_cast<ib_uint64_t>(
- ut_rnd_interval(0, static_cast<ulint>(right - left)));
+ const ulint rnd = ut_rnd_interval(
+ 0, static_cast<ulint>(right - left));
- dive_below_idx = boundaries->at(
- static_cast<unsigned int>(left + rnd));
+ const ib_uint64_t dive_below_idx
+ = boundaries->at(static_cast<unsigned>(left + rnd));
#if 0
DEBUG_PRINTF(" %s(): dive below record with index="
@@ -1713,9 +1769,13 @@ dict_stats_analyze_index_for_n_prefix(
ut_a(rec_idx == dive_below_idx);
ib_uint64_t n_diff_on_leaf_page;
+ ib_uint64_t n_external_pages;
- n_diff_on_leaf_page = dict_stats_analyze_index_below_cur(
- btr_pcur_get_btr_cur(&pcur), n_prefix, mtr);
+ dict_stats_analyze_index_below_cur(btr_pcur_get_btr_cur(&pcur),
+ n_prefix,
+ &n_diff_on_leaf_page,
+ &n_external_pages,
+ mtr);
/* We adjust n_diff_on_leaf_page here to avoid counting
one record twice - once as the last on some page and once
@@ -1735,37 +1795,86 @@ dict_stats_analyze_index_for_n_prefix(
n_diff_on_leaf_page--;
}
- n_diff_sum_of_all_analyzed_pages += n_diff_on_leaf_page;
- }
-
- /* n_diff_sum_of_all_analyzed_pages can be 0 here if all the leaf
- pages sampled contained only delete-marked records. In this case
- we should assign 0 to index->stat_n_diff_key_vals[n_prefix - 1], which
- the formula below does. */
-
- /* See REF01 for an explanation of the algorithm */
- index->stat_n_diff_key_vals[n_prefix - 1]
- = index->stat_n_leaf_pages
-
- * n_diff_for_this_prefix
- / total_recs_on_level
+ n_diff_data->n_diff_all_analyzed_pages += n_diff_on_leaf_page;
- * n_diff_sum_of_all_analyzed_pages
- / n_recs_to_dive_below;
+ n_diff_data->n_external_pages_sum += n_external_pages;
+ }
- index->stat_n_sample_sizes[n_prefix - 1] = n_recs_to_dive_below;
+ btr_pcur_close(&pcur);
+}
- DEBUG_PRINTF(" %s(): n_diff=" UINT64PF " for n_prefix=%lu "
- "(%lu"
- " * " UINT64PF " / " UINT64PF
- " * " UINT64PF " / " UINT64PF ")\n",
- __func__, index->stat_n_diff_key_vals[n_prefix - 1],
- n_prefix,
- index->stat_n_leaf_pages,
- n_diff_for_this_prefix, total_recs_on_level,
- n_diff_sum_of_all_analyzed_pages, n_recs_to_dive_below);
+/** Set dict_index_t::stat_n_diff_key_vals[] and stat_n_sample_sizes[].
+@param[in] n_diff_data input data to use to derive the results
+@param[in,out] index index whose stat_n_diff_key_vals[] to set */
+UNIV_INLINE
+void
+dict_stats_index_set_n_diff(
+ const n_diff_data_t* n_diff_data,
+ dict_index_t* index)
+{
+ for (ulint n_prefix = dict_index_get_n_unique(index);
+ n_prefix >= 1;
+ n_prefix--) {
+ /* n_diff_all_analyzed_pages can be 0 here if
+ all the leaf pages sampled contained only
+ delete-marked records. In this case we should assign
+ 0 to index->stat_n_diff_key_vals[n_prefix - 1], which
+ the formula below does. */
+
+ const n_diff_data_t* data = &n_diff_data[n_prefix - 1];
+
+ ut_ad(data->n_leaf_pages_to_analyze > 0);
+ ut_ad(data->n_recs_on_level > 0);
+
+ ulint n_ordinary_leaf_pages;
+
+ if (data->level == 1) {
+ /* If we know the number of records on level 1, then
+ this number is the same as the number of pages on
+ level 0 (leaf). */
+ n_ordinary_leaf_pages = data->n_recs_on_level;
+ } else {
+ /* If we analyzed D ordinary leaf pages and found E
+ external pages in total linked from those D ordinary
+ leaf pages, then this means that the ratio
+ ordinary/external is D/E. Then the ratio ordinary/total
+ is D / (D + E). Knowing that the total number of pages
+ is T (including ordinary and external) then we estimate
+ that the total number of ordinary leaf pages is
+ T * D / (D + E). */
+ n_ordinary_leaf_pages
+ = index->stat_n_leaf_pages
+ * data->n_leaf_pages_to_analyze
+ / (data->n_leaf_pages_to_analyze
+ + data->n_external_pages_sum);
+ }
- btr_pcur_close(&pcur);
+ /* See REF01 for an explanation of the algorithm */
+ index->stat_n_diff_key_vals[n_prefix - 1]
+ = n_ordinary_leaf_pages
+
+ * data->n_diff_on_level
+ / data->n_recs_on_level
+
+ * data->n_diff_all_analyzed_pages
+ / data->n_leaf_pages_to_analyze;
+
+ index->stat_n_sample_sizes[n_prefix - 1]
+ = data->n_leaf_pages_to_analyze;
+
+ DEBUG_PRINTF(" %s(): n_diff=" UINT64PF " for n_prefix=%lu"
+ " (%lu"
+ " * " UINT64PF " / " UINT64PF
+ " * " UINT64PF " / " UINT64PF ")\n",
+ __func__,
+ index->stat_n_diff_key_vals[n_prefix - 1],
+ n_prefix,
+ index->stat_n_leaf_pages,
+ data->n_diff_on_level,
+ data->n_recs_on_level,
+ data->n_diff_all_analyzed_pages,
+ data->n_leaf_pages_to_analyze);
+ }
}
/*********************************************************************//**
@@ -1783,10 +1892,8 @@ dict_stats_analyze_index(
bool level_is_analyzed;
ulint n_uniq;
ulint n_prefix;
- ib_uint64_t* n_diff_on_level;
ib_uint64_t total_recs;
ib_uint64_t total_pages;
- boundaries_t* n_diff_boundaries;
mtr_t mtr;
ulint size;
DBUG_ENTER("dict_stats_analyze_index");
@@ -1872,11 +1979,18 @@ dict_stats_analyze_index(
DBUG_VOID_RETURN;
}
- /* set to zero */
- n_diff_on_level = reinterpret_cast<ib_uint64_t*>
- (mem_zalloc(n_uniq * sizeof(ib_uint64_t)));
+ /* For each level that is being scanned in the btree, this contains the
+ number of different key values for all possible n-column prefixes. */
+ ib_uint64_t* n_diff_on_level = new ib_uint64_t[n_uniq];
+
+ /* For each level that is being scanned in the btree, this contains the
+ index of the last record from each group of equal records (when
+ comparing only the first n columns, n=1..n_uniq). */
+ boundaries_t* n_diff_boundaries = new boundaries_t[n_uniq];
- n_diff_boundaries = new boundaries_t[n_uniq];
+ /* For each n-column prefix this array contains the input data that is
+ used to calculate dict_index_t::stat_n_diff_key_vals[]. */
+ n_diff_data_t* n_diff_data = new n_diff_data_t[n_uniq];
/* total_recs is also used to estimate the number of pages on one
level below, so at the start we have 1 page (the root) */
@@ -1988,12 +2102,12 @@ dict_stats_analyze_index(
level_is_analyzed = true;
- if (n_diff_on_level[n_prefix - 1]
- >= N_DIFF_REQUIRED(index)
- || level == 1) {
- /* we found a good level with many distinct
- records or we have reached the last level we
- could scan */
+ if (level == 1
+ || n_diff_on_level[n_prefix - 1]
+ >= N_DIFF_REQUIRED(index)) {
+ /* we have reached the last level we could scan
+ or we found a good level with many distinct
+ records */
break;
}
@@ -2006,7 +2120,6 @@ found_level:
" distinct records for n_prefix=%lu\n",
__func__, level, n_diff_on_level[n_prefix - 1],
n_prefix);
-
/* here we are either on level 1 or the level that we are on
contains >= N_DIFF_REQUIRED distinct keys or we did not scan
deeper levels because they would contain too many pages */
@@ -2015,20 +2128,47 @@ found_level:
ut_ad(level_is_analyzed);
+ /* if any of these is 0 then there is exactly one page in the
+ B-tree and it is empty and we should have done full scan and
+ should not be here */
+ ut_ad(total_recs > 0);
+ ut_ad(n_diff_on_level[n_prefix - 1] > 0);
+
+ ut_ad(N_SAMPLE_PAGES(index) > 0);
+
+ n_diff_data_t* data = &n_diff_data[n_prefix - 1];
+
+ data->level = level;
+
+ data->n_recs_on_level = total_recs;
+
+ data->n_diff_on_level = n_diff_on_level[n_prefix - 1];
+
+ data->n_leaf_pages_to_analyze = std::min(
+ N_SAMPLE_PAGES(index),
+ n_diff_on_level[n_prefix - 1]);
+
/* pick some records from this level and dive below them for
the given n_prefix */
dict_stats_analyze_index_for_n_prefix(
- index, level, total_recs, n_prefix,
- n_diff_on_level[n_prefix - 1],
- &n_diff_boundaries[n_prefix - 1], &mtr);
+ index, n_prefix, &n_diff_boundaries[n_prefix - 1],
+ data, &mtr);
}
mtr_commit(&mtr);
delete[] n_diff_boundaries;
- mem_free(n_diff_on_level);
+ delete[] n_diff_on_level;
+
+ /* n_prefix == 0 means that the above loop did not end up prematurely
+ due to tree being changed and so n_diff_data[] is set up. */
+ if (n_prefix == 0) {
+ dict_stats_index_set_n_diff(n_diff_data, index);
+ }
+
+ delete[] n_diff_data;
dict_stats_assert_initialized_index(index);
DBUG_VOID_RETURN;
diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc
index fb975c64f11..f4e5721caa7 100644
--- a/storage/innobase/fil/fil0fil.cc
+++ b/storage/innobase/fil/fil0fil.cc
@@ -758,7 +758,7 @@ fil_node_open_file(
fprintf(stderr,
"InnoDB: Error: the size of single-table"
" tablespace file %s\n"
- "InnoDB: is only "UINT64PF","
+ "InnoDB: is only " UINT64PF ","
" should be at least %lu!\n",
node->name,
size_bytes,
diff --git a/storage/innobase/fts/fts0fts.cc b/storage/innobase/fts/fts0fts.cc
index 4a667686795..f503cc487b7 100644
--- a/storage/innobase/fts/fts0fts.cc
+++ b/storage/innobase/fts/fts0fts.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2011, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2011, 2014, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -44,6 +44,13 @@ Full Text Search interface
/** Column name from the FTS config table */
#define FTS_MAX_CACHE_SIZE_IN_MB "cache_size_in_mb"
+/** Verify if a aux table name is a obsolete table
+by looking up the key word in the obsolete table names */
+#define FTS_IS_OBSOLETE_AUX_TABLE(table_name) \
+ (strstr((table_name), "DOC_ID") != NULL \
+ || strstr((table_name), "ADDED") != NULL \
+ || strstr((table_name), "STOPWORDS") != NULL)
+
/** This is maximum FTS cache for each table and would be
a configurable variable */
UNIV_INTERN ulong fts_max_cache_size;
@@ -5837,6 +5844,12 @@ fts_is_aux_table_name(
}
}
+ /* Could be obsolete common tables. */
+ if (strncmp(ptr, "ADDED", len) == 0
+ || strncmp(ptr, "STOPWORDS", len) == 0) {
+ return(true);
+ }
+
/* Try and read the index id. */
if (!fts_read_object_id(&table->index_id, ptr)) {
return(FALSE);
@@ -6433,6 +6446,56 @@ fts_check_and_drop_orphaned_tables(
mem_free(path);
}
+ } else {
+ if (FTS_IS_OBSOLETE_AUX_TABLE(aux_table->name)) {
+
+ /* Current table could be one of the three
+ obsolete tables, in this case, we should
+ always try to drop it but not rename it.
+ This could happen when we try to upgrade
+ from older server to later one, which doesn't
+ contain these obsolete tables. */
+ drop = true;
+
+ dberr_t err;
+ trx_t* trx_drop =
+ trx_allocate_for_background();
+
+ trx_drop->op_info = "Drop obsolete aux tables";
+ trx_drop->dict_operation_lock_mode = RW_X_LATCH;
+
+ trx_start_for_ddl(trx_drop, TRX_DICT_OP_TABLE);
+
+ err = row_drop_table_for_mysql(
+ aux_table->name, trx_drop, false, true);
+
+ trx_drop->dict_operation_lock_mode = 0;
+
+ if (err != DB_SUCCESS) {
+ /* We don't need to worry about the
+ failure, since server would try to
+ drop it on next restart, even if
+ the table was broken. */
+
+ ib_logf(IB_LOG_LEVEL_WARN,
+ "Fail to drop obsolete aux"
+ " table '%s', which is"
+ " harmless. will try to drop"
+ " it on next restart.",
+ aux_table->name);
+
+ fts_sql_rollback(trx_drop);
+ } else {
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "Dropped obsolete aux"
+ " table '%s'.",
+ aux_table->name);
+
+ fts_sql_commit(trx_drop);
+ }
+
+ trx_free_for_background(trx_drop);
+ }
}
#ifdef _WIN32
if (!drop && rename) {
diff --git a/storage/innobase/fts/fts0opt.cc b/storage/innobase/fts/fts0opt.cc
index a9f3a25530d..910a00cd521 100644
--- a/storage/innobase/fts/fts0opt.cc
+++ b/storage/innobase/fts/fts0opt.cc
@@ -95,7 +95,7 @@ enum fts_msg_type_t {
/** Compressed list of words that have been read from FTS INDEX
that needs to be optimized. */
struct fts_zip_t {
- ulint status; /*!< Status of (un)/zip operation */
+ lint status; /*!< Status of (un)/zip operation */
ulint n_words; /*!< Number of words compressed */
diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
index 2d15eaba296..19137dabe24 100644
--- a/storage/innobase/handler/ha_innodb.cc
+++ b/storage/innobase/handler/ha_innodb.cc
@@ -394,7 +394,7 @@ static PSI_rwlock_info all_innodb_rwlocks[] = {
{&trx_purge_latch_key, "trx_purge_latch", 0},
{&index_tree_rw_lock_key, "index_tree_rw_lock", 0},
{&index_online_log_key, "index_online_log", 0},
- {&dict_table_stats_latch_key, "dict_table_stats", 0},
+ {&dict_table_stats_key, "dict_table_stats", 0},
{&hash_table_rw_lock_key, "hash_table_locks", 0}
};
# endif /* UNIV_PFS_RWLOCK */
@@ -4170,7 +4170,7 @@ innobase_close_connection(
sql_print_warning(
"MySQL is closing a connection that has an active "
- "InnoDB transaction. "TRX_ID_FMT" row modifications "
+ "InnoDB transaction. " TRX_ID_FMT " row modifications "
"will roll back.",
trx->undo_no);
}
@@ -7321,7 +7321,7 @@ calc_row_difference(
if (doc_id < prebuilt->table->fts->cache->next_doc_id) {
fprintf(stderr,
"InnoDB: FTS Doc ID must be larger than"
- " "IB_ID_FMT" for table",
+ " " IB_ID_FMT " for table",
innodb_table->fts->cache->next_doc_id
- 1);
ut_print_name(stderr, trx,
@@ -7333,9 +7333,9 @@ calc_row_difference(
- prebuilt->table->fts->cache->next_doc_id)
>= FTS_DOC_ID_MAX_STEP) {
fprintf(stderr,
- "InnoDB: Doc ID "UINT64PF" is too"
+ "InnoDB: Doc ID " UINT64PF " is too"
" big. Its difference with largest"
- " Doc ID used "UINT64PF" cannot"
+ " Doc ID used " UINT64PF " cannot"
" exceed or equal to %d\n",
doc_id,
prebuilt->table->fts->cache->next_doc_id - 1,
diff --git a/storage/innobase/include/btr0cur.h b/storage/innobase/include/btr0cur.h
index 833166e783c..f1e4406fcf7 100644
--- a/storage/innobase/include/btr0cur.h
+++ b/storage/innobase/include/btr0cur.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1994, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2014, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -576,6 +576,17 @@ void
btr_estimate_number_of_different_key_vals(
/*======================================*/
dict_index_t* index); /*!< in: index */
+
+/** Gets the externally stored size of a record, in units of a database page.
+@param[in] rec record
+@param[in] offsets array returned by rec_get_offsets()
+@return externally stored part, in units of a database page */
+
+ulint
+btr_rec_get_externally_stored_len(
+ const rec_t* rec,
+ const ulint* offsets);
+
/*******************************************************************//**
Marks non-updated off-page fields as disowned by this record. The ownership
must be transferred to the updated record which is inserted elsewhere in the
diff --git a/storage/innobase/include/dict0dict.h b/storage/innobase/include/dict0dict.h
index f43ba11e74d..026187b2000 100644
--- a/storage/innobase/include/dict0dict.h
+++ b/storage/innobase/include/dict0dict.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2014, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2012, Facebook Inc.
This program is free software; you can redistribute it and/or modify it under
@@ -1438,6 +1438,28 @@ UNIV_INTERN
void
dict_mutex_exit_for_mysql(void);
/*===========================*/
+
+/** Create a dict_table_t's stats latch or delay for lazy creation.
+This function is only called from either single threaded environment
+or from a thread that has not shared the table object with other threads.
+@param[in,out] table table whose stats latch to create
+@param[in] enabled if false then the latch is disabled
+and dict_table_stats_lock()/unlock() become noop on this table. */
+
+void
+dict_table_stats_latch_create(
+ dict_table_t* table,
+ bool enabled);
+
+/** Destroy a dict_table_t's stats latch.
+This function is only called from either single threaded environment
+or from a thread that has not shared the table object with other threads.
+@param[in,out] table table whose stats latch to destroy */
+
+void
+dict_table_stats_latch_destroy(
+ dict_table_t* table);
+
/**********************************************************************//**
Lock the appropriate latch to protect a given table's statistics.
table->id is used to pick the corresponding latch from a global array of
diff --git a/storage/innobase/include/dict0mem.h b/storage/innobase/include/dict0mem.h
index 58de2c2b3f0..f1975456ab1 100644
--- a/storage/innobase/include/dict0mem.h
+++ b/storage/innobase/include/dict0mem.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2014, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2012, Facebook Inc.
This program is free software; you can redistribute it and/or modify it under
@@ -46,6 +46,7 @@ Created 1/8/1996 Heikki Tuuri
#include "hash0hash.h"
#include "trx0types.h"
#include "fts0fts.h"
+#include "os0once.h"
/* Forward declaration. */
struct ib_rbt_t;
@@ -842,6 +843,10 @@ struct dict_table_t{
initialized in dict_table_add_to_cache() */
/** Statistics for query optimization */
/* @{ */
+
+ volatile os_once::state_t stats_latch_created;
+ /*!< Creation state of 'stats_latch'. */
+
rw_lock_t* stats_latch; /*!< this latch protects:
dict_table_t::stat_initialized
dict_table_t::stat_n_rows (*)
diff --git a/storage/innobase/include/os0once.h b/storage/innobase/include/os0once.h
new file mode 100644
index 00000000000..a8bbaf1d2d4
--- /dev/null
+++ b/storage/innobase/include/os0once.h
@@ -0,0 +1,125 @@
+/*****************************************************************************
+
+Copyright (c) 2014, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/os0once.h
+A class that aids executing a given function exactly once in a multi-threaded
+environment.
+
+Created Feb 20, 2014 Vasil Dimov
+*******************************************************/
+
+#ifndef os0once_h
+#define os0once_h
+
+#include "univ.i"
+
+#include "os0sync.h"
+#include "ut0ut.h"
+
+/** Execute a given function exactly once in a multi-threaded environment
+or wait for the function to be executed by another thread.
+
+Example usage:
+First the user must create a control variable of type os_once::state_t and
+assign it os_once::NEVER_DONE.
+Then the user must pass this variable, together with a function to be
+executed to os_once::do_or_wait_for_done().
+
+Multiple threads can call os_once::do_or_wait_for_done() simultaneously with
+the same (os_once::state_t) control variable. The provided function will be
+called exactly once and when os_once::do_or_wait_for_done() returns then this
+function has completed execution, by this or another thread. In other words
+os_once::do_or_wait_for_done() will either execute the provided function or
+will wait for its execution to complete if it is already called by another
+thread or will do nothing if the function has already completed its execution
+earlier.
+
+This mimics pthread_once(3), but unfortunatelly pthread_once(3) does not
+support passing arguments to the init_routine() function. We should use
+std::call_once() when we start compiling with C++11 enabled. */
+class os_once {
+public:
+ /** Control variables' state type */
+ typedef ib_uint32_t state_t;
+
+ /** Not yet executed. */
+ static const state_t NEVER_DONE = 0;
+
+ /** Currently being executed by this or another thread. */
+ static const state_t IN_PROGRESS = 1;
+
+ /** Finished execution. */
+ static const state_t DONE = 2;
+
+#ifdef HAVE_ATOMIC_BUILTINS
+ /** Call a given function or wait its execution to complete if it is
+ already called by another thread.
+ @param[in,out] state control variable
+ @param[in] do_func function to call
+ @param[in,out] do_func_arg an argument to pass to do_func(). */
+ static
+ void
+ do_or_wait_for_done(
+ volatile state_t* state,
+ void (*do_func)(void*),
+ void* do_func_arg)
+ {
+ /* Avoid calling os_compare_and_swap_uint32() in the most
+ common case. */
+ if (*state == DONE) {
+ return;
+ }
+
+ if (os_compare_and_swap_uint32(state,
+ NEVER_DONE, IN_PROGRESS)) {
+ /* We are the first. Call the function. */
+
+ do_func(do_func_arg);
+
+ const bool swapped = os_compare_and_swap_uint32(
+ state, IN_PROGRESS, DONE);
+
+ ut_a(swapped);
+ } else {
+ /* The state is not NEVER_DONE, so either it is
+ IN_PROGRESS (somebody is calling the function right
+ now or DONE (it has already been called and completed).
+ Wait for it to become DONE. */
+ for (;;) {
+ const state_t s = *state;
+
+ switch (s) {
+ case DONE:
+ return;
+ case IN_PROGRESS:
+ break;
+ case NEVER_DONE:
+ /* fall through */
+ default:
+ ut_error;
+ }
+
+ UT_RELAX_CPU();
+ }
+ }
+ }
+#endif /* HAVE_ATOMIC_BUILTINS */
+};
+
+#endif /* os0once_h */
diff --git a/storage/innobase/include/os0sync.h b/storage/innobase/include/os0sync.h
index f4fcead7412..6d3dd850e08 100644
--- a/storage/innobase/include/os0sync.h
+++ b/storage/innobase/include/os0sync.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2008, Google Inc.
Portions of this file contain modifications contributed and copyrighted by
@@ -580,7 +580,8 @@ Returns true if swapped, ptr is pointer to target, old_val is value to
compare to, new_val is the value to swap in. */
# define os_compare_and_swap_uint32(ptr, old_val, new_val) \
- (win_cmp_and_xchg_dword(ptr, new_val, old_val) == old_val)
+ (InterlockedCompareExchange(reinterpret_cast<volatile long*>(ptr), \
+ new_val, old_val) == old_val)
# define os_compare_and_swap_ulint(ptr, old_val, new_val) \
(win_cmp_and_xchg_ulint(ptr, new_val, old_val) == old_val)
diff --git a/storage/innobase/include/sync0rw.h b/storage/innobase/include/sync0rw.h
index 80ad2dd1554..b36e04f2810 100644
--- a/storage/innobase/include/sync0rw.h
+++ b/storage/innobase/include/sync0rw.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2008, Google Inc.
Portions of this file contain modifications contributed and copyrighted by
@@ -135,7 +135,7 @@ extern mysql_pfs_key_t trx_i_s_cache_lock_key;
extern mysql_pfs_key_t trx_purge_latch_key;
extern mysql_pfs_key_t index_tree_rw_lock_key;
extern mysql_pfs_key_t index_online_log_key;
-extern mysql_pfs_key_t dict_table_stats_latch_key;
+extern mysql_pfs_key_t dict_table_stats_key;
extern mysql_pfs_key_t trx_sys_rw_lock_key;
extern mysql_pfs_key_t hash_table_rw_lock_key;
#endif /* UNIV_PFS_RWLOCK */
diff --git a/storage/innobase/include/univ.i b/storage/innobase/include/univ.i
index 98c5512bd0b..bc359746a0b 100644
--- a/storage/innobase/include/univ.i
+++ b/storage/innobase/include/univ.i
@@ -44,7 +44,7 @@ Created 1/20/1994 Heikki Tuuri
#define INNODB_VERSION_MAJOR 5
#define INNODB_VERSION_MINOR 6
-#define INNODB_VERSION_BUGFIX 17
+#define INNODB_VERSION_BUGFIX 19
/* The following is the InnoDB version as shown in
SELECT plugin_version FROM information_schema.plugins;
@@ -439,10 +439,10 @@ typedef unsigned __int64 ib_uint64_t;
typedef unsigned __int32 ib_uint32_t;
#else
/* Use the integer types and formatting strings defined in the C99 standard. */
-# define UINT32PF "%"PRIu32
-# define INT64PF "%"PRId64
-# define UINT64PF "%"PRIu64
-# define UINT64PFx "%016"PRIx64
+# define UINT32PF "%" PRIu32
+# define INT64PF "%" PRId64
+# define UINT64PF "%" PRIu64
+# define UINT64PFx "%016" PRIx64
# define DBUG_LSN_PF UINT64PF
typedef int64_t ib_int64_t;
typedef uint64_t ib_uint64_t;
diff --git a/storage/innobase/os/os0file.cc b/storage/innobase/os/os0file.cc
index 6380e881d6f..3972ef4afe9 100644
--- a/storage/innobase/os/os0file.cc
+++ b/storage/innobase/os/os0file.cc
@@ -2659,7 +2659,7 @@ try_again:
}
ib_logf(IB_LOG_LEVEL_ERROR,
- "Tried to read "ULINTPF" bytes at offset " UINT64PF". "
+ "Tried to read " ULINTPF " bytes at offset " UINT64PF ". "
"Was only able to read %ld.", n, offset, (lint) ret);
#endif /* __WIN__ */
#ifdef __WIN__
@@ -3004,7 +3004,7 @@ retry:
fprintf(stderr,
" InnoDB: Error: Write to file %s failed"
- " at offset "UINT64PF".\n"
+ " at offset " UINT64PF ".\n"
"InnoDB: %lu bytes should have been written,"
" only %ld were written.\n"
"InnoDB: Operating system error number %lu.\n"
diff --git a/storage/innobase/row/row0ins.cc b/storage/innobase/row/row0ins.cc
index 65a6c433f5c..834dfe6f8eb 100644
--- a/storage/innobase/row/row0ins.cc
+++ b/storage/innobase/row/row0ins.cc
@@ -151,35 +151,37 @@ row_ins_alloc_sys_fields(
ut_ad(row && table && heap);
ut_ad(dtuple_get_n_fields(row) == dict_table_get_n_cols(table));
- /* 1. Allocate buffer for row id */
+ /* allocate buffer to hold the needed system created hidden columns. */
+ uint len = DATA_ROW_ID_LEN + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
+ ptr = static_cast<byte*>(mem_heap_zalloc(heap, len));
+ /* 1. Populate row-id */
col = dict_table_get_sys_col(table, DATA_ROW_ID);
dfield = dtuple_get_nth_field(row, dict_col_get_no(col));
- ptr = static_cast<byte*>(mem_heap_zalloc(heap, DATA_ROW_ID_LEN));
-
dfield_set_data(dfield, ptr, DATA_ROW_ID_LEN);
node->row_id_buf = ptr;
- /* 3. Allocate buffer for trx id */
+ ptr += DATA_ROW_ID_LEN;
+ /* 2. Populate trx id */
col = dict_table_get_sys_col(table, DATA_TRX_ID);
dfield = dtuple_get_nth_field(row, dict_col_get_no(col));
- ptr = static_cast<byte*>(mem_heap_zalloc(heap, DATA_TRX_ID_LEN));
dfield_set_data(dfield, ptr, DATA_TRX_ID_LEN);
node->trx_id_buf = ptr;
- /* 4. Allocate buffer for roll ptr */
+ ptr += DATA_TRX_ID_LEN;
+
+ /* 3. Populate roll ptr */
col = dict_table_get_sys_col(table, DATA_ROLL_PTR);
dfield = dtuple_get_nth_field(row, dict_col_get_no(col));
- ptr = static_cast<byte*>(mem_heap_zalloc(heap, DATA_ROLL_PTR_LEN));
dfield_set_data(dfield, ptr, DATA_ROLL_PTR_LEN);
}
diff --git a/storage/innobase/row/row0merge.cc b/storage/innobase/row/row0merge.cc
index 56cf9f1943c..86b47c9f3bd 100644
--- a/storage/innobase/row/row0merge.cc
+++ b/storage/innobase/row/row0merge.cc
@@ -786,7 +786,7 @@ row_merge_read(
if (UNIV_UNLIKELY(!success)) {
ut_print_timestamp(stderr);
fprintf(stderr,
- " InnoDB: failed to read merge block at "UINT64PF"\n",
+ " InnoDB: failed to read merge block at " UINT64PF "\n",
ofs);
}
diff --git a/storage/innobase/row/row0mysql.cc b/storage/innobase/row/row0mysql.cc
index 93d13ea49ee..dd7af8a3526 100644
--- a/storage/innobase/row/row0mysql.cc
+++ b/storage/innobase/row/row0mysql.cc
@@ -1359,7 +1359,7 @@ error_exit:
if (doc_id < next_doc_id) {
fprintf(stderr,
"InnoDB: FTS Doc ID must be large than"
- " "UINT64PF" for table",
+ " " UINT64PF " for table",
next_doc_id - 1);
ut_print_name(stderr, trx, TRUE, table->name);
putc('\n', stderr);
@@ -1374,9 +1374,9 @@ error_exit:
if (doc_id - next_doc_id >= FTS_DOC_ID_MAX_STEP) {
fprintf(stderr,
- "InnoDB: Doc ID "UINT64PF" is too"
+ "InnoDB: Doc ID " UINT64PF " is too"
" big. Its difference with largest"
- " used Doc ID "UINT64PF" cannot"
+ " used Doc ID " UINT64PF " cannot"
" exceed or equal to %d\n",
doc_id, next_doc_id - 1,
FTS_DOC_ID_MAX_STEP);
diff --git a/storage/innobase/srv/srv0mon.cc b/storage/innobase/srv/srv0mon.cc
index ea346566e57..64417b1e5fb 100644
--- a/storage/innobase/srv/srv0mon.cc
+++ b/storage/innobase/srv/srv0mon.cc
@@ -41,8 +41,8 @@ Created 12/9/2009 Jimmy Yang
/* Macro to standardize the counter names for counters in the
"monitor_buf_page" module as they have very structured defines */
#define MONITOR_BUF_PAGE(name, description, code, op, op_code) \
- {"buffer_page_"op"_"name, "buffer_page_io", \
- "Number of "description" Pages "op, \
+ {"buffer_page_" op "_" name, "buffer_page_io", \
+ "Number of " description " Pages " op, \
MONITOR_GROUP_MODULE, MONITOR_DEFAULT_START, \
MONITOR_##code##_##op_code}
diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc
index 0c04fba421a..1c2bfcbd920 100644
--- a/storage/innobase/srv/srv0start.cc
+++ b/storage/innobase/srv/srv0start.cc
@@ -2197,9 +2197,9 @@ innobase_start_or_create_for_mysql(void)
} else if (size != srv_log_file_size) {
ib_logf(IB_LOG_LEVEL_ERROR,
"Log file %s is"
- " of different size "UINT64PF" bytes"
+ " of different size " UINT64PF " bytes"
" than other log"
- " files "UINT64PF" bytes!",
+ " files " UINT64PF " bytes!",
logfilename,
size << UNIV_PAGE_SIZE_SHIFT,
(os_offset_t) srv_log_file_size
diff --git a/storage/innobase/sync/sync0sync.cc b/storage/innobase/sync/sync0sync.cc
index 5dc2f69b9d3..54018471abc 100644
--- a/storage/innobase/sync/sync0sync.cc
+++ b/storage/innobase/sync/sync0sync.cc
@@ -1555,12 +1555,12 @@ sync_print_wait_info(
FILE* file) /*!< in: file where to print */
{
fprintf(file,
- "Mutex spin waits "UINT64PF", rounds "UINT64PF", "
- "OS waits "UINT64PF"\n"
- "RW-shared spins "UINT64PF", rounds "UINT64PF", "
- "OS waits "UINT64PF"\n"
- "RW-excl spins "UINT64PF", rounds "UINT64PF", "
- "OS waits "UINT64PF"\n",
+ "Mutex spin waits " UINT64PF ", rounds " UINT64PF ", "
+ "OS waits " UINT64PF "\n"
+ "RW-shared spins " UINT64PF ", rounds " UINT64PF ", "
+ "OS waits " UINT64PF "\n"
+ "RW-excl spins " UINT64PF ", rounds " UINT64PF ", "
+ "OS waits " UINT64PF "\n",
(ib_uint64_t) mutex_spin_wait_count,
(ib_uint64_t) mutex_spin_round_count,
(ib_uint64_t) mutex_os_wait_count,
diff --git a/storage/innobase/trx/trx0i_s.cc b/storage/innobase/trx/trx0i_s.cc
index f6360562ae7..01ccfb8a6d0 100644
--- a/storage/innobase/trx/trx0i_s.cc
+++ b/storage/innobase/trx/trx0i_s.cc
@@ -1639,7 +1639,7 @@ trx_i_s_create_lock_id(
} else {
/* table lock */
res_len = ut_snprintf(lock_id, lock_id_size,
- TRX_ID_FMT":"UINT64PF,
+ TRX_ID_FMT":" UINT64PF,
row->lock_trx_id,
row->lock_table_id);
}
diff --git a/storage/innobase/trx/trx0trx.cc b/storage/innobase/trx/trx0trx.cc
index 4f8e4ad4487..796ac316d4b 100644
--- a/storage/innobase/trx/trx0trx.cc
+++ b/storage/innobase/trx/trx0trx.cc
@@ -1803,7 +1803,7 @@ state_ok:
if (trx->undo_no != 0) {
newline = TRUE;
- fprintf(f, ", undo log entries "TRX_ID_FMT, trx->undo_no);
+ fprintf(f, ", undo log entries " TRX_ID_FMT, trx->undo_no);
}
if (newline) {
@@ -2095,7 +2095,7 @@ trx_recover_for_mysql(
ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: Transaction contains changes"
- " to "TRX_ID_FMT" rows\n",
+ " to " TRX_ID_FMT " rows\n",
trx->undo_no);
count++;