bzr merge -r4346 maria/10.0 (maria-10.0.13)

author: Nirbhay Choubey <nirbhay@skysql.com> 2014-08-11 23:55:41 -0400
committer: Nirbhay Choubey <nirbhay@skysql.com> 2014-08-11 23:55:41 -0400
commit: 8358dd53b7406deaa9f50ad09b16a86b7e367632 (patch)
tree: ef8995ad0e400cb6a1842649c3c886c7b3474835 /storage/innobase
parent: e06e12f5b8dfe0ab2e5976eec1b27b25d318441b (diff)
parent: 4105cbf4a230c82ea7dee31d4d2262b798fad9f4 (diff)
download: mariadb-git-8358dd53b7406deaa9f50ad09b16a86b7e367632.tar.gz
36 files changed, 1325 insertions, 503 deletions
diff --git a/storage/innobase/btr/btr0cur.cc b/storage/innobase/btr/btr0cur.cc
index 1d2f313a07c..34a72f360be 100644
--- a/storage/innobase/btr/btr0cur.cc
+++ b/storage/innobase/btr/btr0cur.cc
@@ -202,15 +202,6 @@ btr_rec_free_externally_stored_fields(
 	mtr_t*		mtr);	/*!< in: mini-transaction handle which contains
 				an X-latch to record page and to the index
 				tree */
-/***********************************************************//**
-Gets the externally stored size of a record, in units of a database page.
-@return	externally stored part, in units of a database page */
-static
-ulint
-btr_rec_get_externally_stored_len(
-/*==============================*/
-	const rec_t*	rec,	/*!< in: record */
-	const ulint*	offsets);/*!< in: array returned by rec_get_offsets() */
 #endif /* !UNIV_HOTBACKUP */
 
 /******************************************************//**
@@ -271,6 +262,7 @@ btr_cur_latch_leaves(
 	case BTR_MODIFY_TREE:
 		/* x-latch also brothers from left to right */
 		left_page_no = btr_page_get_prev(page, mtr);
+		mode = latch_mode;
 
 		if (left_page_no != FIL_NULL) {
 			get_block = btr_block_get(
@@ -4043,15 +4035,15 @@ btr_rec_get_field_ref_offs(
 #define btr_rec_get_field_ref(rec, offsets, n)			\
 	((rec) + btr_rec_get_field_ref_offs(offsets, n))
 
-/***********************************************************//**
-Gets the externally stored size of a record, in units of a database page.
+/** Gets the externally stored size of a record, in units of a database page.
+@param[in]	rec	record
+@param[in]	offsets	array returned by rec_get_offsets()
 @return	externally stored part, in units of a database page */
-static
+
 ulint
 btr_rec_get_externally_stored_len(
-/*==============================*/
-	const rec_t*	rec,	/*!< in: record */
-	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
+	const rec_t*	rec,
+	const ulint*	offsets)
 {
 	ulint	n_fields;
 	ulint	total_extern_len = 0;
diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc
index 3cce75abe74..fa2edb90b8e 100644
--- a/storage/innobase/buf/buf0flu.cc
+++ b/storage/innobase/buf/buf0flu.cc
@@ -2183,6 +2183,10 @@ af_get_pct_for_dirty()
 {
 	ulint dirty_pct = buf_get_modified_ratio_pct();
 
+	if (dirty_pct > 0 && srv_max_buf_pool_modified_pct == 0) {
+		return(100);
+	}
+
 	ut_a(srv_max_dirty_pages_pct_lwm
 	     <= srv_max_buf_pool_modified_pct);
 
diff --git a/storage/innobase/buf/buf0lru.cc b/storage/innobase/buf/buf0lru.cc
index ec30c063a72..64409e1993d 100644
--- a/storage/innobase/buf/buf0lru.cc
+++ b/storage/innobase/buf/buf0lru.cc
@@ -2263,6 +2263,24 @@ buf_LRU_block_remove_hashed(
 			" in the hash table\n",
 			(ulong) bpage->space,
 			(ulong) bpage->offset);
+#ifdef UNIV_DEBUG
+		fprintf(stderr,
+			"InnoDB: in_page_hash %lu in_zip_hash %lu\n"
+			" in_free_list %lu in_flush_list %lu in_LRU_list %lu\n"
+			" zip.data %p zip_size %lu page_state %d\n",
+			bpage->in_page_hash, bpage->in_zip_hash,
+			bpage->in_free_list, bpage->in_flush_list,
+			bpage->in_LRU_list, bpage->zip.data,
+			buf_page_get_zip_size(bpage),
+			buf_page_get_state(bpage));
+#else
+		fprintf(stderr,
+			"InnoDB: zip.data %p zip_size %lu page_state %d\n",
+			bpage->zip.data,
+			buf_page_get_zip_size(bpage),
+			buf_page_get_state(bpage));
+#endif
+
 		if (hashed_bpage) {
 			fprintf(stderr,
 				"InnoDB: In hash table we find block"
diff --git a/storage/innobase/dict/dict0dict.cc b/storage/innobase/dict/dict0dict.cc
index 86a903d925e..c53f7e82f58 100644
--- a/storage/innobase/dict/dict0dict.cc
+++ b/storage/innobase/dict/dict0dict.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2014, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2012, Facebook Inc.
 
 This program is free software; you can redistribute it and/or modify it under
@@ -50,6 +50,7 @@ UNIV_INTERN dict_index_t*	dict_ind_compact;
 #include "btr0btr.h"
 #include "btr0cur.h"
 #include "btr0sea.h"
+#include "os0once.h"
 #include "page0zip.h"
 #include "page0page.h"
 #include "pars0pars.h"
@@ -102,7 +103,7 @@ UNIV_INTERN ulong	zip_pad_max = 50;
 UNIV_INTERN mysql_pfs_key_t	dict_operation_lock_key;
 UNIV_INTERN mysql_pfs_key_t	index_tree_rw_lock_key;
 UNIV_INTERN mysql_pfs_key_t	index_online_log_key;
-UNIV_INTERN mysql_pfs_key_t	dict_table_stats_latch_key;
+UNIV_INTERN mysql_pfs_key_t	dict_table_stats_key;
 #endif /* UNIV_PFS_RWLOCK */
 
 #ifdef UNIV_PFS_MUTEX
@@ -121,6 +122,11 @@ UNIV_INTERN mysql_pfs_key_t	dict_foreign_err_mutex_key;
 /** Identifies generated InnoDB foreign key names */
 static char	dict_ibfk[] = "_ibfk_";
 
+bool		innodb_table_stats_not_found = false;
+bool		innodb_index_stats_not_found = false;
+static bool	innodb_table_stats_not_found_reported = false;
+static bool	innodb_index_stats_not_found_reported = false;
+
 /*******************************************************************//**
 Tries to find column names for the index and sets the col field of the
 index.
@@ -319,6 +325,82 @@ dict_mutex_exit_for_mysql(void)
 	mutex_exit(&(dict_sys->mutex));
 }
 
+/** Allocate and init a dict_table_t's stats latch.
+This function must not be called concurrently on the same table object.
+@param[in,out]	table_void	table whose stats latch to create */
+static
+void
+dict_table_stats_latch_alloc(
+	void*	table_void)
+{
+	dict_table_t*	table = static_cast<dict_table_t*>(table_void);
+
+	table->stats_latch = new(std::nothrow) rw_lock_t;
+
+	ut_a(table->stats_latch != NULL);
+
+	rw_lock_create(dict_table_stats_key, table->stats_latch,
+		       SYNC_INDEX_TREE);
+}
+
+/** Deinit and free a dict_table_t's stats latch.
+This function must not be called concurrently on the same table object.
+@param[in,out]	table	table whose stats latch to free */
+static
+void
+dict_table_stats_latch_free(
+	dict_table_t*	table)
+{
+	rw_lock_free(table->stats_latch);
+	delete table->stats_latch;
+}
+
+/** Create a dict_table_t's stats latch or delay for lazy creation.
+This function is only called from either single threaded environment
+or from a thread that has not shared the table object with other threads.
+@param[in,out]	table	table whose stats latch to create
+@param[in]	enabled	if false then the latch is disabled
+and dict_table_stats_lock()/unlock() become noop on this table. */
+
+void
+dict_table_stats_latch_create(
+	dict_table_t*	table,
+	bool		enabled)
+{
+	if (!enabled) {
+		table->stats_latch = NULL;
+		table->stats_latch_created = os_once::DONE;
+		return;
+	}
+
+#ifdef HAVE_ATOMIC_BUILTINS
+	/* We create this lazily the first time it is used. */
+	table->stats_latch = NULL;
+	table->stats_latch_created = os_once::NEVER_DONE;
+#else /* HAVE_ATOMIC_BUILTINS */
+
+	dict_table_stats_latch_alloc(table);
+
+	table->stats_latch_created = os_once::DONE;
+#endif /* HAVE_ATOMIC_BUILTINS */
+}
+
+/** Destroy a dict_table_t's stats latch.
+This function is only called from either single threaded environment
+or from a thread that has not shared the table object with other threads.
+@param[in,out]	table	table whose stats latch to destroy */
+
+void
+dict_table_stats_latch_destroy(
+	dict_table_t*	table)
+{
+	if (table->stats_latch_created == os_once::DONE
+	    && table->stats_latch != NULL) {
+
+		dict_table_stats_latch_free(table);
+	}
+}
+
 /**********************************************************************//**
 Lock the appropriate latch to protect a given table's statistics. */
 UNIV_INTERN
@@ -331,6 +413,14 @@ dict_table_stats_lock(
 	ut_ad(table != NULL);
 	ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
 
+#ifdef HAVE_ATOMIC_BUILTINS
+	os_once::do_or_wait_for_done(
+		&table->stats_latch_created,
+		dict_table_stats_latch_alloc, table);
+#else /* HAVE_ATOMIC_BUILTINS */
+	ut_ad(table->stats_latch_created == os_once::DONE);
+#endif /* HAVE_ATOMIC_BUILTINS */
+
 	if (table->stats_latch == NULL) {
 		/* This is a dummy table object that is private in the current
 		thread and is not shared between multiple threads, thus we
@@ -5212,8 +5302,6 @@ dict_table_print(
 		index = UT_LIST_GET_NEXT(indexes, index);
 	}
 
-	table->stat_initialized = FALSE;
-
 	dict_table_stats_unlock(table, RW_X_LATCH);
 
 	foreign = UT_LIST_GET_FIRST(table->foreign_list);
@@ -6016,14 +6104,34 @@ dict_table_schema_check(
 	table = dict_table_get_low(req_schema->table_name);
 
 	if (table == NULL) {
+		bool should_print=true;
 		/* no such table */
 
-		ut_snprintf(errstr, errstr_sz,
-			    "Table %s not found.",
-			    ut_format_name(req_schema->table_name,
-					   TRUE, buf, sizeof(buf)));
+		if (innobase_strcasecmp(req_schema->table_name, "mysql/innodb_table_stats") == 0) {
+			if (innodb_table_stats_not_found_reported == false) {
+				innodb_table_stats_not_found = true;
+				innodb_table_stats_not_found_reported = true;
+			} else {
+				should_print = false;
+			}
+		} else if (innobase_strcasecmp(req_schema->table_name, "mysql/innodb_index_stats") == 0 ) {
+			if (innodb_index_stats_not_found_reported == false) {
+				innodb_index_stats_not_found = true;
+				innodb_index_stats_not_found_reported = true;
+			} else {
+				should_print = false;
+			}
+		}
 
-		return(DB_TABLE_NOT_FOUND);
+		if (should_print) {
+			ut_snprintf(errstr, errstr_sz,
+				"Table %s not found.",
+				ut_format_name(req_schema->table_name,
+					TRUE, buf, sizeof(buf)));
+			return(DB_TABLE_NOT_FOUND);
+		} else {
+			return(DB_STATS_DO_NOT_EXIST);
+		}
 	}
 
 	if (table->ibd_file_missing) {
diff --git a/storage/innobase/dict/dict0mem.cc b/storage/innobase/dict/dict0mem.cc
index 60daeea3a96..6310b2fd225 100644
--- a/storage/innobase/dict/dict0mem.cc
+++ b/storage/innobase/dict/dict0mem.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2014, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2012, Facebook Inc.
 
 This program is free software; you can redistribute it and/or modify it under
@@ -95,9 +95,9 @@ dict_mem_table_create(
 
 	ut_d(table->magic_n = DICT_TABLE_MAGIC_N);
 
-	table->stats_latch = new rw_lock_t;
-	rw_lock_create(dict_table_stats_latch_key, table->stats_latch,
-		       SYNC_INDEX_TREE);
+	/* true means that the stats latch will be enabled -
+	dict_table_stats_lock() will not be noop. */
+	dict_table_stats_latch_create(table, true);
 
 #ifndef UNIV_HOTBACKUP
 	table->autoinc_lock = static_cast<ib_lock_t*>(
@@ -154,8 +154,7 @@ dict_mem_table_free(
 	mutex_free(&(table->autoinc_mutex));
 #endif /* UNIV_HOTBACKUP */
 
-	rw_lock_free(table->stats_latch);
-	delete table->stats_latch;
+	dict_table_stats_latch_destroy(table);
 
 	ut_free(table->name);
 	mem_heap_free(table->heap);
diff --git a/storage/innobase/dict/dict0stats.cc b/storage/innobase/dict/dict0stats.cc
index 928bdb3f2ef..1eac9e0df51 100644
--- a/storage/innobase/dict/dict0stats.cc
+++ b/storage/innobase/dict/dict0stats.cc
@@ -46,6 +46,7 @@ Created Jan 06, 2010 Vasil Dimov
 #include "ut0rnd.h" /* ut_rnd_interval() */
 #include "ut0ut.h" /* ut_format_name(), ut_time() */
 
+#include <algorithm>
 #include <map>
 #include <vector>
 
@@ -127,10 +128,11 @@ where n=1..n_uniq.
 #endif /* UNIV_STATS_DEBUG */
 
 /* Gets the number of leaf pages to sample in persistent stats estimation */
-#define N_SAMPLE_PAGES(index)				\
-	((index)->table->stats_sample_pages != 0 ?	\
-	 (index)->table->stats_sample_pages :		\
-	 srv_stats_persistent_sample_pages)
+#define N_SAMPLE_PAGES(index)					\
+	static_cast<ib_uint64_t>(				\
+		(index)->table->stats_sample_pages != 0		\
+		? (index)->table->stats_sample_pages		\
+		: srv_stats_persistent_sample_pages)
 
 /* number of distinct records on a given level that are required to stop
 descending to lower levels and fetch N_SAMPLE_PAGES(index) records
@@ -268,10 +270,12 @@ dict_stats_persistent_storage_check(
 		mutex_exit(&(dict_sys->mutex));
 	}
 
-	if (ret != DB_SUCCESS) {
+	if (ret != DB_SUCCESS && ret != DB_STATS_DO_NOT_EXIST) {
 		ut_print_timestamp(stderr);
 		fprintf(stderr, " InnoDB: Error: %s\n", errstr);
 		return(false);
+	} else if (ret == DB_STATS_DO_NOT_EXIST) {
+		return false;
 	}
 	/* else */
 
@@ -430,9 +434,9 @@ dict_stats_table_clone_create(
 	t->corrupted = table->corrupted;
 
 	/* This private object "t" is not shared with other threads, so
-	we do not need the stats_latch. The lock/unlock routines will do
-	nothing if stats_latch is NULL. */
-	t->stats_latch = NULL;
+	we do not need the stats_latch (thus we pass false below). The
+	dict_table_stats_lock()/unlock() routines will do nothing. */
+	dict_table_stats_latch_create(t, false);
 
 	UT_LIST_INIT(t->indexes);
 
@@ -508,6 +512,7 @@ dict_stats_table_clone_free(
 /*========================*/
 	dict_table_t*	t)	/*!< in: dummy table object to free */
 {
+	dict_table_stats_latch_destroy(t);
 	mem_heap_free(t->heap);
 }
 
@@ -1283,35 +1288,40 @@ enum page_scan_method_t {
 };
 /* @} */
 
-/*********************************************************************//**
-Scan a page, reading records from left to right and counting the number
-of distinct records on that page (looking only at the first n_prefix
-columns). If scan_method is QUIT_ON_FIRST_NON_BORING then the function
+/** Scan a page, reading records from left to right and counting the number
+of distinct records (looking only at the first n_prefix
+columns) and the number of external pages pointed by records from this page.
+If scan_method is QUIT_ON_FIRST_NON_BORING then the function
 will return as soon as it finds a record that does not match its neighbor
 to the right, which means that in the case of QUIT_ON_FIRST_NON_BORING the
 returned n_diff can either be 0 (empty page), 1 (the whole page has all keys
 equal) or 2 (the function found a non-boring record and returned).
+@param[out]	out_rec			record, or NULL
+@param[out]	offsets1		rec_get_offsets() working space (must
+be big enough)
+@param[out]	offsets2		rec_get_offsets() working space (must
+be big enough)
+@param[in]	index			index of the page
+@param[in]	page			the page to scan
+@param[in]	n_prefix		look at the first n_prefix columns
+@param[in]	scan_method		scan to the end of the page or not
+@param[out]	n_diff			number of distinct records encountered
+@param[out]	n_external_pages	if this is non-NULL then it will be set
+to the number of externally stored pages which were encountered
 @return offsets1 or offsets2 (the offsets of *out_rec),
 or NULL if the page is empty and does not contain user records. */
-UNIV_INLINE __attribute__((nonnull))
+UNIV_INLINE
 ulint*
 dict_stats_scan_page(
-/*=================*/
-	const rec_t**		out_rec,	/*!< out: record, or NULL */
-	ulint*			offsets1,	/*!< out: rec_get_offsets()
-						working space (must be big
-						enough) */
-	ulint*			offsets2,	/*!< out: rec_get_offsets()
-						working space (must be big
-						enough) */
-	dict_index_t*		index,		/*!< in: index of the page */
-	const page_t*		page,		/*!< in: the page to scan */
-	ulint			n_prefix,	/*!< in: look at the first
-						n_prefix columns */
-	page_scan_method_t	scan_method,	/*!< in: scan to the end of
-						the page or not */
-	ib_uint64_t*		n_diff)		/*!< out: number of distinct
-						records encountered */
+	const rec_t**		out_rec,
+	ulint*			offsets1,
+	ulint*			offsets2,
+	dict_index_t*		index,
+	const page_t*		page,
+	ulint			n_prefix,
+	page_scan_method_t	scan_method,
+	ib_uint64_t*		n_diff,
+	ib_uint64_t*		n_external_pages)
 {
 	ulint*		offsets_rec		= offsets1;
 	ulint*		offsets_next_rec	= offsets2;
@@ -1329,6 +1339,12 @@ dict_stats_scan_page(
 		get_next = page_rec_get_next_const;
 	}
 
+	const bool	should_count_external_pages = n_external_pages != NULL;
+
+	if (should_count_external_pages) {
+		*n_external_pages = 0;
+	}
+
 	rec = get_next(page_get_infimum_rec(page));
 
 	if (page_rec_is_supremum(rec)) {
@@ -1341,6 +1357,11 @@ dict_stats_scan_page(
 	offsets_rec = rec_get_offsets(rec, index, offsets_rec,
 				      ULINT_UNDEFINED, &heap);
 
+	if (should_count_external_pages) {
+		*n_external_pages += btr_rec_get_externally_stored_len(
+			rec, offsets_rec);
+	}
+
 	next_rec = get_next(rec);
 
 	*n_diff = 1;
@@ -1391,6 +1412,11 @@ dict_stats_scan_page(
 			offsets_next_rec = offsets_tmp;
 		}
 
+		if (should_count_external_pages) {
+			*n_external_pages += btr_rec_get_externally_stored_len(
+				rec, offsets_rec);
+		}
+
 		next_rec = get_next(next_rec);
 	}
 
@@ -1401,19 +1427,25 @@ func_exit:
 	return(offsets_rec);
 }
 
-/*********************************************************************//**
-Dive below the current position of a cursor and calculate the number of
+/** Dive below the current position of a cursor and calculate the number of
 distinct records on the leaf page, when looking at the fist n_prefix
-columns.
+columns. Also calculate the number of external pages pointed by records
+on the leaf page.
+@param[in]	cur			cursor
+@param[in]	n_prefix		look at the first n_prefix columns
+when comparing records
+@param[out]	n_diff			number of distinct records
+@param[out]	n_external_pages	number of external pages
+@param[in,out]	mtr			mini-transaction
 @return number of distinct records on the leaf page */
 static
-ib_uint64_t
+void
 dict_stats_analyze_index_below_cur(
-/*===============================*/
-	const btr_cur_t*cur,		/*!< in: cursor */
-	ulint		n_prefix,	/*!< in: look at the first n_prefix
-					columns when comparing records */
-	mtr_t*		mtr)		/*!< in/out: mini-transaction */
+	const btr_cur_t*	cur,
+	ulint			n_prefix,
+	ib_uint64_t*		n_diff,
+	ib_uint64_t*		n_external_pages,
+	mtr_t*			mtr)
 {
 	dict_index_t*	index;
 	ulint		space;
@@ -1426,7 +1458,6 @@ dict_stats_analyze_index_below_cur(
 	ulint*		offsets1;
 	ulint*		offsets2;
 	ulint*		offsets_rec;
-	ib_uint64_t	n_diff; /* the result */
 	ulint		size;
 
 	index = btr_cur_get_index(cur);
@@ -1462,6 +1493,10 @@ dict_stats_analyze_index_below_cur(
 
 	page_no = btr_node_ptr_get_child_page_no(rec, offsets_rec);
 
+	/* assume no external pages by default - in case we quit from this
+	function without analyzing any leaf pages */
+	*n_external_pages = 0;
+
 	/* descend to the leaf level on the B-tree */
 	for (;;) {
 
@@ -1480,20 +1515,24 @@ dict_stats_analyze_index_below_cur(
 		/* search for the first non-boring record on the page */
 		offsets_rec = dict_stats_scan_page(
 			&rec, offsets1, offsets2, index, page, n_prefix,
-			QUIT_ON_FIRST_NON_BORING, &n_diff);
+			QUIT_ON_FIRST_NON_BORING, n_diff, NULL);
 
 		/* pages on level > 0 are not allowed to be empty */
 		ut_a(offsets_rec != NULL);
 		/* if page is not empty (offsets_rec != NULL) then n_diff must
 		be > 0, otherwise there is a bug in dict_stats_scan_page() */
-		ut_a(n_diff > 0);
+		ut_a(*n_diff > 0);
 
-		if (n_diff == 1) {
+		if (*n_diff == 1) {
 			/* page has all keys equal and the end of the page
 			was reached by dict_stats_scan_page(), no need to
 			descend to the leaf level */
 			mem_heap_free(heap);
-			return(1);
+			/* can't get an estimate for n_external_pages here
+			because we do not dive to the leaf level, assume no
+			external pages (*n_external_pages was assigned to 0
+			above). */
+			return;
 		}
 		/* else */
 
@@ -1501,7 +1540,7 @@ dict_stats_analyze_index_below_cur(
 		first non-boring record it finds, then the returned n_diff
 		can either be 0 (empty page), 1 (page has all keys equal) or
 		2 (non-boring record was found) */
-		ut_a(n_diff == 2);
+		ut_a(*n_diff == 2);
 
 		/* we have a non-boring record in rec, descend below it */
 
@@ -1512,11 +1551,14 @@ dict_stats_analyze_index_below_cur(
 	ut_ad(btr_page_get_level(page, mtr) == 0);
 
 	/* scan the leaf page and find the number of distinct keys,
-	when looking only at the first n_prefix columns */
+	when looking only at the first n_prefix columns; also estimate
+	the number of externally stored pages pointed by records on this
+	page */
 
 	offsets_rec = dict_stats_scan_page(
 		&rec, offsets1, offsets2, index, page, n_prefix,
-		COUNT_ALL_NON_BORING_AND_SKIP_DEL_MARKED, &n_diff);
+		COUNT_ALL_NON_BORING_AND_SKIP_DEL_MARKED, n_diff,
+		n_external_pages);
 
 #if 0
 	DEBUG_PRINTF("      %s(): n_diff below page_no=%lu: " UINT64PF "\n",
@@ -1524,133 +1566,146 @@ dict_stats_analyze_index_below_cur(
 #endif
 
 	mem_heap_free(heap);
-
-	return(n_diff);
 }
 
-/*********************************************************************//**
-For a given level in an index select N_SAMPLE_PAGES(index)
-(or less) records from that level and dive below them to the corresponding
-leaf pages, then scan those leaf pages and save the sampling results in
-index->stat_n_diff_key_vals[n_prefix - 1] and the number of pages scanned in
-index->stat_n_sample_sizes[n_prefix - 1]. */
+/** Input data that is used to calculate dict_index_t::stat_n_diff_key_vals[]
+for each n-columns prefix (n from 1 to n_uniq). */
+struct n_diff_data_t {
+	/** Index of the level on which the descent through the btree
+	stopped. level 0 is the leaf level. This is >= 1 because we
+	avoid scanning the leaf level because it may contain too many
+	pages and doing so is useless when combined with the random dives -
+	if we are to scan the leaf level, this means a full scan and we can
+	simply do that instead of fiddling with picking random records higher
+	in the tree and to dive below them. At the start of the analyzing
+	we may decide to do full scan of the leaf level, but then this
+	structure is not used in that code path. */
+	ulint		level;
+
+	/** Number of records on the level where the descend through the btree
+	stopped. When we scan the btree from the root, we stop at some mid
+	level, choose some records from it and dive below them towards a leaf
+	page to analyze. */
+	ib_uint64_t	n_recs_on_level;
+
+	/** Number of different key values that were found on the mid level. */
+	ib_uint64_t	n_diff_on_level;
+
+	/** Number of leaf pages that are analyzed. This is also the same as
+	the number of records that we pick from the mid level and dive below
+	them. */
+	ib_uint64_t	n_leaf_pages_to_analyze;
+
+	/** Cumulative sum of the number of different key values that were
+	found on all analyzed pages. */
+	ib_uint64_t	n_diff_all_analyzed_pages;
+
+	/** Cumulative sum of the number of external pages (stored outside of
+	the btree but in the same file segment). */
+	ib_uint64_t	n_external_pages_sum;
+};
+
+/** Estimate the number of different key values in an index when looking at
+the first n_prefix columns. For a given level in an index select
+n_diff_data->n_leaf_pages_to_analyze records from that level and dive below
+them to the corresponding leaf pages, then scan those leaf pages and save the
+sampling results in n_diff_data->n_diff_all_analyzed_pages.
+@param[in]	index			index
+@param[in]	n_prefix		look at first 'n_prefix' columns when
+comparing records
+@param[in]	boundaries		a vector that contains
+n_diff_data->n_diff_on_level integers each of which represents the index (on
+level 'level', counting from left/smallest to right/biggest from 0) of the
+last record from each group of distinct keys
+@param[in,out]	n_diff_data		n_diff_all_analyzed_pages and
+n_external_pages_sum in this structure will be set by this function. The
+members level, n_diff_on_level and n_leaf_pages_to_analyze must be set by the
+caller in advance - they are used by some calculations inside this function
+@param[in,out]	mtr			mini-transaction */
 static
 void
 dict_stats_analyze_index_for_n_prefix(
-/*==================================*/
-	dict_index_t*	index,		/*!< in/out: index */
-	ulint		level,		/*!< in: level, must be >= 1 */
-	ib_uint64_t	total_recs_on_level,
-					/*!< in: total number of
-					records on the given level */
-	ulint		n_prefix,	/*!< in: look at first
-					n_prefix columns when
-					comparing records */
-	ib_uint64_t	n_diff_for_this_prefix,
-					/*!< in: number of distinct
-					records on the given level,
-					when looking at the first
-					n_prefix columns */
-	boundaries_t*	boundaries,	/*!< in: array that contains
-					n_diff_for_this_prefix
-					integers each of which
-					represents the index (on the
-					level, counting from
-					left/smallest to right/biggest
-					from 0) of the last record
-					from each group of distinct
-					keys */
-	mtr_t*		mtr)		/*!< in/out: mini-transaction */
+	dict_index_t*		index,
+	ulint			n_prefix,
+	const boundaries_t*	boundaries,
+	n_diff_data_t*		n_diff_data,
+	mtr_t*			mtr)
 {
 	btr_pcur_t	pcur;
 	const page_t*	page;
 	ib_uint64_t	rec_idx;
-	ib_uint64_t	last_idx_on_level;
-	ib_uint64_t	n_recs_to_dive_below;
-	ib_uint64_t	n_diff_sum_of_all_analyzed_pages;
 	ib_uint64_t	i;
 
 #if 0
 	DEBUG_PRINTF("    %s(table=%s, index=%s, level=%lu, n_prefix=%lu, "
-		     "n_diff_for_this_prefix=" UINT64PF ")\n",
+		     "n_diff_on_level=" UINT64PF ")\n",
 		     __func__, index->table->name, index->name, level,
-		     n_prefix, n_diff_for_this_prefix);
+		     n_prefix, n_diff_data->n_diff_on_level);
 #endif
 
 	ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
 				MTR_MEMO_S_LOCK));
 
-	/* if some of those is 0 then this means that there is exactly one
-	page in the B-tree and it is empty and we should have done full scan
-	and should not be here */
-	ut_ad(total_recs_on_level > 0);
-	ut_ad(n_diff_for_this_prefix > 0);
-
-	/* this must be at least 1 */
-	ut_ad(N_SAMPLE_PAGES(index) > 0);
-
 	/* Position pcur on the leftmost record on the leftmost page
 	on the desired level. */
 
 	btr_pcur_open_at_index_side(
 		true, index, BTR_SEARCH_LEAF | BTR_ALREADY_S_LATCHED,
-		&pcur, true, level, mtr);
+		&pcur, true, n_diff_data->level, mtr);
 	btr_pcur_move_to_next_on_page(&pcur);
 
 	page = btr_pcur_get_page(&pcur);
 
+	const rec_t*	first_rec = btr_pcur_get_rec(&pcur);
+
+	/* We shouldn't be scanning the leaf level. The caller of this function
+	should have stopped the descend on level 1 or higher. */
+	ut_ad(n_diff_data->level > 0);
+	ut_ad(!page_is_leaf(page));
+
 	/* The page must not be empty, except when
 	it is the root page (and the whole index is empty). */
-	ut_ad(btr_pcur_is_on_user_rec(&pcur) || page_is_leaf(page));
-	ut_ad(btr_pcur_get_rec(&pcur)
-	      == page_rec_get_next_const(page_get_infimum_rec(page)));
+	ut_ad(btr_pcur_is_on_user_rec(&pcur));
+	ut_ad(first_rec == page_rec_get_next_const(page_get_infimum_rec(page)));
 
 	/* check that we are indeed on the desired level */
-	ut_a(btr_page_get_level(page, mtr) == level);
+	ut_a(btr_page_get_level(page, mtr) == n_diff_data->level);
 
 	/* there should not be any pages on the left */
 	ut_a(btr_page_get_prev(page, mtr) == FIL_NULL);
 
 	/* check whether the first record on the leftmost page is marked
-	as such, if we are on a non-leaf level */
-	ut_a((level == 0)
-	     == !(REC_INFO_MIN_REC_FLAG & rec_get_info_bits(
-			  btr_pcur_get_rec(&pcur), page_is_comp(page))));
+	as such; we are on a non-leaf level */
+	ut_a(rec_get_info_bits(first_rec, page_is_comp(page))
+	     & REC_INFO_MIN_REC_FLAG);
 
-	last_idx_on_level = boundaries->at(
-		static_cast<unsigned int>(n_diff_for_this_prefix - 1));
+	const ib_uint64_t	last_idx_on_level = boundaries->at(
+		static_cast<unsigned>(n_diff_data->n_diff_on_level - 1));
 
 	rec_idx = 0;
 
-	n_diff_sum_of_all_analyzed_pages = 0;
-
-	n_recs_to_dive_below = ut_min(N_SAMPLE_PAGES(index),
-				      n_diff_for_this_prefix);
-
-	for (i = 0; i < n_recs_to_dive_below; i++) {
-		ib_uint64_t	left;
-		ib_uint64_t	right;
-		ib_uint64_t	rnd;
-		ib_uint64_t	dive_below_idx;
+	n_diff_data->n_diff_all_analyzed_pages = 0;
+	n_diff_data->n_external_pages_sum = 0;
 
-		/* there are n_diff_for_this_prefix elements
+	for (i = 0; i < n_diff_data->n_leaf_pages_to_analyze; i++) {
+		/* there are n_diff_on_level elements
 		in 'boundaries' and we divide those elements
-		into n_recs_to_dive_below segments, for example:
+		into n_leaf_pages_to_analyze segments, for example:
 
-		let n_diff_for_this_prefix=100, n_recs_to_dive_below=4, then:
+		let n_diff_on_level=100, n_leaf_pages_to_analyze=4, then:
 		segment i=0:  [0, 24]
 		segment i=1: [25, 49]
 		segment i=2: [50, 74]
 		segment i=3: [75, 99] or
 
-		let n_diff_for_this_prefix=1, n_recs_to_dive_below=1, then:
+		let n_diff_on_level=1, n_leaf_pages_to_analyze=1, then:
 		segment i=0: [0, 0] or
 
-		let n_diff_for_this_prefix=2, n_recs_to_dive_below=2, then:
+		let n_diff_on_level=2, n_leaf_pages_to_analyze=2, then:
 		segment i=0: [0, 0]
 		segment i=1: [1, 1] or
 
-		let n_diff_for_this_prefix=13, n_recs_to_dive_below=7, then:
+		let n_diff_on_level=13, n_leaf_pages_to_analyze=7, then:
 		segment i=0:  [0,  0]
 		segment i=1:  [1,  2]
 		segment i=2:  [3,  4]
@@ -1661,9 +1716,12 @@ dict_stats_analyze_index_for_n_prefix(
 
 		then we select a random record from each segment and dive
 		below it */
-		left = n_diff_for_this_prefix * i / n_recs_to_dive_below;
-		right = n_diff_for_this_prefix * (i + 1)
-			/ n_recs_to_dive_below - 1;
+		const ib_uint64_t	n_diff = n_diff_data->n_diff_on_level;
+		const ib_uint64_t	n_pick
+			= n_diff_data->n_leaf_pages_to_analyze;
+
+		const ib_uint64_t	left = n_diff * i / n_pick;
+		const ib_uint64_t	right = n_diff * (i + 1) / n_pick - 1;
 
 		ut_a(left <= right);
 		ut_a(right <= last_idx_on_level);
@@ -1671,11 +1729,11 @@ dict_stats_analyze_index_for_n_prefix(
 		/* we do not pass (left, right) because we do not want to ask
 		ut_rnd_interval() to work with too big numbers since
 		ib_uint64_t could be bigger than ulint */
-		rnd = static_cast<ib_uint64_t>(
-			ut_rnd_interval(0, static_cast<ulint>(right - left)));
+		const ulint	rnd = ut_rnd_interval(
+			0, static_cast<ulint>(right - left));
 
-		dive_below_idx = boundaries->at(
-			static_cast<unsigned int>(left + rnd));
+		const ib_uint64_t	dive_below_idx
+			= boundaries->at(static_cast<unsigned>(left + rnd));
 
 #if 0
 		DEBUG_PRINTF("    %s(): dive below record with index="
@@ -1711,9 +1769,13 @@ dict_stats_analyze_index_for_n_prefix(
 		ut_a(rec_idx == dive_below_idx);
 
 		ib_uint64_t	n_diff_on_leaf_page;
+		ib_uint64_t	n_external_pages;
 
-		n_diff_on_leaf_page = dict_stats_analyze_index_below_cur(
-			btr_pcur_get_btr_cur(&pcur), n_prefix, mtr);
+		dict_stats_analyze_index_below_cur(btr_pcur_get_btr_cur(&pcur),
+						   n_prefix,
+						   &n_diff_on_leaf_page,
+						   &n_external_pages,
+						   mtr);
 
 		/* We adjust n_diff_on_leaf_page here to avoid counting
 		one record twice - once as the last on some page and once
@@ -1733,37 +1795,86 @@ dict_stats_analyze_index_for_n_prefix(
 			n_diff_on_leaf_page--;
 		}
 
-		n_diff_sum_of_all_analyzed_pages += n_diff_on_leaf_page;
-	}
-
-	/* n_diff_sum_of_all_analyzed_pages can be 0 here if all the leaf
-	pages sampled contained only delete-marked records. In this case
-	we should assign 0 to index->stat_n_diff_key_vals[n_prefix - 1], which
-	the formula below does. */
+		n_diff_data->n_diff_all_analyzed_pages += n_diff_on_leaf_page;
 
-	/* See REF01 for an explanation of the algorithm */
-	index->stat_n_diff_key_vals[n_prefix - 1]
-		= index->stat_n_leaf_pages
-
-		* n_diff_for_this_prefix
-		/ total_recs_on_level
-
-		* n_diff_sum_of_all_analyzed_pages
-		/ n_recs_to_dive_below;
+		n_diff_data->n_external_pages_sum += n_external_pages;
+	}
 
-	index->stat_n_sample_sizes[n_prefix - 1] = n_recs_to_dive_below;
+	btr_pcur_close(&pcur);
+}
 
-	DEBUG_PRINTF("    %s(): n_diff=" UINT64PF " for n_prefix=%lu "
-		     "(%lu"
-		     " * " UINT64PF " / " UINT64PF
-		     " * " UINT64PF " / " UINT64PF ")\n",
-		     __func__, index->stat_n_diff_key_vals[n_prefix - 1],
-		     n_prefix,
-		     index->stat_n_leaf_pages,
-		     n_diff_for_this_prefix, total_recs_on_level,
-		     n_diff_sum_of_all_analyzed_pages, n_recs_to_dive_below);
+/** Set dict_index_t::stat_n_diff_key_vals[] and stat_n_sample_sizes[].
+@param[in]	n_diff_data	input data to use to derive the results
+@param[in,out]	index		index whose stat_n_diff_key_vals[] to set */
+UNIV_INLINE
+void
+dict_stats_index_set_n_diff(
+	const n_diff_data_t*	n_diff_data,
+	dict_index_t*		index)
+{
+	for (ulint n_prefix = dict_index_get_n_unique(index);
+	     n_prefix >= 1;
+	     n_prefix--) {
+		/* n_diff_all_analyzed_pages can be 0 here if
+		all the leaf pages sampled contained only
+		delete-marked records. In this case we should assign
+		0 to index->stat_n_diff_key_vals[n_prefix - 1], which
+		the formula below does. */
+
+		const n_diff_data_t*	data = &n_diff_data[n_prefix - 1];
+
+		ut_ad(data->n_leaf_pages_to_analyze > 0);
+		ut_ad(data->n_recs_on_level > 0);
+
+		ulint	n_ordinary_leaf_pages;
+
+		if (data->level == 1) {
+			/* If we know the number of records on level 1, then
+			this number is the same as the number of pages on
+			level 0 (leaf). */
+			n_ordinary_leaf_pages = data->n_recs_on_level;
+		} else {
+			/* If we analyzed D ordinary leaf pages and found E
+			external pages in total linked from those D ordinary
+			leaf pages, then this means that the ratio
+			ordinary/external is D/E. Then the ratio ordinary/total
+			is D / (D + E). Knowing that the total number of pages
+			is T (including ordinary and external) then we estimate
+			that the total number of ordinary leaf pages is
+			T * D / (D + E). */
+			n_ordinary_leaf_pages
+				= index->stat_n_leaf_pages
+				* data->n_leaf_pages_to_analyze
+				/ (data->n_leaf_pages_to_analyze
+				   + data->n_external_pages_sum);
+		}
 
-	btr_pcur_close(&pcur);
+		/* See REF01 for an explanation of the algorithm */
+		index->stat_n_diff_key_vals[n_prefix - 1]
+			= n_ordinary_leaf_pages
+
+			* data->n_diff_on_level
+			/ data->n_recs_on_level
+
+			* data->n_diff_all_analyzed_pages
+			/ data->n_leaf_pages_to_analyze;
+
+		index->stat_n_sample_sizes[n_prefix - 1]
+			= data->n_leaf_pages_to_analyze;
+
+		DEBUG_PRINTF("    %s(): n_diff=" UINT64PF " for n_prefix=%lu"
+			     " (%lu"
+			     " * " UINT64PF " / " UINT64PF
+			     " * " UINT64PF " / " UINT64PF ")\n",
+			     __func__,
+			     index->stat_n_diff_key_vals[n_prefix - 1],
+			     n_prefix,
+			     index->stat_n_leaf_pages,
+			     data->n_diff_on_level,
+			     data->n_recs_on_level,
+			     data->n_diff_all_analyzed_pages,
+			     data->n_leaf_pages_to_analyze);
+	}
 }
 
 /*********************************************************************//**
@@ -1781,10 +1892,8 @@ dict_stats_analyze_index(
 	bool		level_is_analyzed;
 	ulint		n_uniq;
 	ulint		n_prefix;
-	ib_uint64_t*	n_diff_on_level;
 	ib_uint64_t	total_recs;
 	ib_uint64_t	total_pages;
-	boundaries_t*	n_diff_boundaries;
 	mtr_t		mtr;
 	ulint		size;
 	DBUG_ENTER("dict_stats_analyze_index");
@@ -1870,11 +1979,18 @@ dict_stats_analyze_index(
 		DBUG_VOID_RETURN;
 	}
 
-	/* set to zero */
-	n_diff_on_level = reinterpret_cast<ib_uint64_t*>
-		(mem_zalloc(n_uniq * sizeof(ib_uint64_t)));
+	/* For each level that is being scanned in the btree, this contains the
+	number of different key values for all possible n-column prefixes. */
+	ib_uint64_t*		n_diff_on_level = new ib_uint64_t[n_uniq];
 
-	n_diff_boundaries = new boundaries_t[n_uniq];
+	/* For each level that is being scanned in the btree, this contains the
+	index of the last record from each group of equal records (when
+	comparing only the first n columns, n=1..n_uniq). */
+	boundaries_t*		n_diff_boundaries = new boundaries_t[n_uniq];
+
+	/* For each n-column prefix this array contains the input data that is
+	used to calculate dict_index_t::stat_n_diff_key_vals[]. */
+	n_diff_data_t*		n_diff_data = new n_diff_data_t[n_uniq];
 
 	/* total_recs is also used to estimate the number of pages on one
 	level below, so at the start we have 1 page (the root) */
@@ -1986,12 +2102,12 @@ dict_stats_analyze_index(
 
 			level_is_analyzed = true;
 
-			if (n_diff_on_level[n_prefix - 1]
-			    >= N_DIFF_REQUIRED(index)
-			    || level == 1) {
-				/* we found a good level with many distinct
-				records or we have reached the last level we
-				could scan */
+			if (level == 1
+			    || n_diff_on_level[n_prefix - 1]
+			    >= N_DIFF_REQUIRED(index)) {
+				/* we have reached the last level we could scan
+				or we found a good level with many distinct
+				records */
 				break;
 			}
 
@@ -2004,7 +2120,6 @@ found_level:
 			     " distinct records for n_prefix=%lu\n",
 			     __func__, level, n_diff_on_level[n_prefix - 1],
 			     n_prefix);
-
 		/* here we are either on level 1 or the level that we are on
 		contains >= N_DIFF_REQUIRED distinct keys or we did not scan
 		deeper levels because they would contain too many pages */
@@ -2013,20 +2128,47 @@ found_level:
 
 		ut_ad(level_is_analyzed);
 
+		/* if any of these is 0 then there is exactly one page in the
+		B-tree and it is empty and we should have done full scan and
+		should not be here */
+		ut_ad(total_recs > 0);
+		ut_ad(n_diff_on_level[n_prefix - 1] > 0);
+
+		ut_ad(N_SAMPLE_PAGES(index) > 0);
+
+		n_diff_data_t*	data = &n_diff_data[n_prefix - 1];
+
+		data->level = level;
+
+		data->n_recs_on_level = total_recs;
+
+		data->n_diff_on_level = n_diff_on_level[n_prefix - 1];
+
+		data->n_leaf_pages_to_analyze = std::min(
+			N_SAMPLE_PAGES(index),
+			n_diff_on_level[n_prefix - 1]);
+
 		/* pick some records from this level and dive below them for
 		the given n_prefix */
 
 		dict_stats_analyze_index_for_n_prefix(
-			index, level, total_recs, n_prefix,
-			n_diff_on_level[n_prefix - 1],
-			&n_diff_boundaries[n_prefix - 1], &mtr);
+			index, n_prefix, &n_diff_boundaries[n_prefix - 1],
+			data, &mtr);
 	}
 
 	mtr_commit(&mtr);
 
 	delete[] n_diff_boundaries;
 
-	mem_free(n_diff_on_level);
+	delete[] n_diff_on_level;
+
+	/* n_prefix == 0 means that the above loop did not end up prematurely
+	due to tree being changed and so n_diff_data[] is set up. */
+	if (n_prefix == 0) {
+		dict_stats_index_set_n_diff(n_diff_data, index);
+	}
+
+	delete[] n_diff_data;
 
 	dict_stats_assert_initialized_index(index);
 	DBUG_VOID_RETURN;
@@ -2201,17 +2343,21 @@ dict_stats_save_index_stat(
 		"END;", trx);
 
 	if (ret != DB_SUCCESS) {
-		char	buf_table[MAX_FULL_NAME_LEN];
-		char	buf_index[MAX_FULL_NAME_LEN];
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			" InnoDB: Cannot save index statistics for table "
-			"%s, index %s, stat name \"%s\": %s\n",
-			ut_format_name(index->table->name, TRUE,
-				       buf_table, sizeof(buf_table)),
-			ut_format_name(index->name, FALSE,
-				       buf_index, sizeof(buf_index)),
-			stat_name, ut_strerr(ret));
+		if (innodb_index_stats_not_found == false &&
+		    index->stats_error_printed == false) {
+			char	buf_table[MAX_FULL_NAME_LEN];
+			char	buf_index[MAX_FULL_NAME_LEN];
+			ut_print_timestamp(stderr);
+			fprintf(stderr,
+				" InnoDB: Cannot save index statistics for table "
+				"%s, index %s, stat name \"%s\": %s\n",
+				ut_format_name(index->table->name, TRUE,
+					buf_table, sizeof(buf_table)),
+				ut_format_name(index->name, FALSE,
+					buf_index, sizeof(buf_index)),
+				stat_name, ut_strerr(ret));
+			index->stats_error_printed = true;
+		}
 	}
 
 	return(ret);
@@ -2900,20 +3046,24 @@ dict_stats_update_for_index(
 		}
 		/* else */
 
-		/* Fall back to transient stats since the persistent
-		storage is not present or is corrupted */
-		char	buf_table[MAX_FULL_NAME_LEN];
-		char	buf_index[MAX_FULL_NAME_LEN];
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			" InnoDB: Recalculation of persistent statistics "
-			"requested for table %s index %s but the required "
-			"persistent statistics storage is not present or is "
-			"corrupted. Using transient stats instead.\n",
-			ut_format_name(index->table->name, TRUE,
-				       buf_table, sizeof(buf_table)),
-			ut_format_name(index->name, FALSE,
-				       buf_index, sizeof(buf_index)));
+		if (innodb_index_stats_not_found == false &&
+		    index->stats_error_printed == false) {
+			/* Fall back to transient stats since the persistent
+			storage is not present or is corrupted */
+			char	buf_table[MAX_FULL_NAME_LEN];
+			char	buf_index[MAX_FULL_NAME_LEN];
+			ut_print_timestamp(stderr);
+			fprintf(stderr,
+				" InnoDB: Recalculation of persistent statistics "
+				"requested for table %s index %s but the required "
+				"persistent statistics storage is not present or is "
+				"corrupted. Using transient stats instead.\n",
+				ut_format_name(index->table->name, TRUE,
+					buf_table, sizeof(buf_table)),
+				ut_format_name(index->name, FALSE,
+					buf_index, sizeof(buf_index)));
+			index->stats_error_printed = false;
+		}
 	}
 
 	dict_table_stats_lock(index->table, RW_X_LATCH);
@@ -2998,13 +3148,17 @@ dict_stats_update(
 		/* Fall back to transient stats since the persistent
 		storage is not present or is corrupted */
 
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			" InnoDB: Recalculation of persistent statistics "
-			"requested for table %s but the required persistent "
-			"statistics storage is not present or is corrupted. "
-			"Using transient stats instead.\n",
-			ut_format_name(table->name, TRUE, buf, sizeof(buf)));
+		if (innodb_table_stats_not_found == false &&
+		    table->stats_error_printed == false) {
+			ut_print_timestamp(stderr);
+			fprintf(stderr,
+				" InnoDB: Recalculation of persistent statistics "
+				"requested for table %s but the required persistent "
+				"statistics storage is not present or is corrupted. "
+				"Using transient stats instead.\n",
+				ut_format_name(table->name, TRUE, buf, sizeof(buf)));
+			table->stats_error_printed = true;
+		}
 
 		goto transient;
 
@@ -3048,17 +3202,21 @@ dict_stats_update(
 			/* persistent statistics storage does not exist
 			or is corrupted, calculate the transient stats */
 
-			ut_print_timestamp(stderr);
-			fprintf(stderr,
-				" InnoDB: Error: Fetch of persistent "
-				"statistics requested for table %s but the "
-				"required system tables %s and %s are not "
-				"present or have unexpected structure. "
-				"Using transient stats instead.\n",
-				ut_format_name(table->name, TRUE,
-					       buf, sizeof(buf)),
-				TABLE_STATS_NAME_PRINT,
-				INDEX_STATS_NAME_PRINT);
+			if (innodb_table_stats_not_found == false &&
+			    table->stats_error_printed == false) {
+				ut_print_timestamp(stderr);
+				fprintf(stderr,
+					" InnoDB: Error: Fetch of persistent "
+					"statistics requested for table %s but the "
+					"required system tables %s and %s are not "
+					"present or have unexpected structure. "
+					"Using transient stats instead.\n",
+					ut_format_name(table->name, TRUE,
+						buf, sizeof(buf)),
+					TABLE_STATS_NAME_PRINT,
+					INDEX_STATS_NAME_PRINT);
+				table->stats_error_printed = true;
+			}
 
 			goto transient;
 		}
@@ -3128,16 +3286,19 @@ dict_stats_update(
 
 			dict_stats_table_clone_free(t);
 
-			ut_print_timestamp(stderr);
-			fprintf(stderr,
-				" InnoDB: Error fetching persistent statistics "
-				"for table %s from %s and %s: %s. "
-				"Using transient stats method instead.\n",
-				ut_format_name(table->name, TRUE, buf,
-					       sizeof(buf)),
-				TABLE_STATS_NAME,
-				INDEX_STATS_NAME,
-				ut_strerr(err));
+			if (innodb_table_stats_not_found == false &&
+			    table->stats_error_printed == false) {
+				ut_print_timestamp(stderr);
+				fprintf(stderr,
+					" InnoDB: Error fetching persistent statistics "
+					"for table %s from %s and %s: %s. "
+					"Using transient stats method instead.\n",
+					ut_format_name(table->name, TRUE, buf,
+						sizeof(buf)),
+					TABLE_STATS_NAME,
+					INDEX_STATS_NAME,
+					ut_strerr(err));
+			}
 
 			goto transient;
 		}
diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc
index 81fcba47812..f4e5721caa7 100644
--- a/storage/innobase/fil/fil0fil.cc
+++ b/storage/innobase/fil/fil0fil.cc
@@ -758,7 +758,7 @@ fil_node_open_file(
 			fprintf(stderr,
 				"InnoDB: Error: the size of single-table"
 				" tablespace file %s\n"
-				"InnoDB: is only "UINT64PF","
+				"InnoDB: is only " UINT64PF ","
 				" should be at least %lu!\n",
 				node->name,
 				size_bytes,
@@ -5725,7 +5725,7 @@ fil_io(
 	ret = os_aio(type, mode | wake_later, node->name, node->handle, buf,
 		     offset, len, node, message);
 #endif /* UNIV_HOTBACKUP */
-	ut_a(ret);
+
 
 	if (mode == OS_AIO_SYNC) {
 		/* The i/o operation is already completed when we return from
@@ -5740,7 +5740,10 @@ fil_io(
 		ut_ad(fil_validate_skip());
 	}
 
-	return(DB_SUCCESS);
+	if (!ret) {
+		return(DB_OUT_OF_FILE_SPACE);
+	} else {
+	}	return(DB_SUCCESS);
 }
 
 #ifndef UNIV_HOTBACKUP
diff --git a/storage/innobase/fts/fts0fts.cc b/storage/innobase/fts/fts0fts.cc
index 4a667686795..f503cc487b7 100644
--- a/storage/innobase/fts/fts0fts.cc
+++ b/storage/innobase/fts/fts0fts.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2011, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2011, 2014, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -44,6 +44,13 @@ Full Text Search interface
 /** Column name from the FTS config table */
 #define FTS_MAX_CACHE_SIZE_IN_MB	"cache_size_in_mb"
 
+/** Verify if a aux table name is a obsolete table
+by looking up the key word in the obsolete table names */
+#define FTS_IS_OBSOLETE_AUX_TABLE(table_name)			\
+	(strstr((table_name), "DOC_ID") != NULL			\
+	 || strstr((table_name), "ADDED") != NULL		\
+	 || strstr((table_name), "STOPWORDS") != NULL)
+
 /** This is maximum FTS cache for each table and would be
 a configurable variable */
 UNIV_INTERN ulong	fts_max_cache_size;
@@ -5837,6 +5844,12 @@ fts_is_aux_table_name(
 			}
 		}
 
+		/* Could be obsolete common tables. */
+		if (strncmp(ptr, "ADDED", len) == 0
+		    || strncmp(ptr, "STOPWORDS", len) == 0) {
+			return(true);
+		}
+
 		/* Try and read the index id. */
 		if (!fts_read_object_id(&table->index_id, ptr)) {
 			return(FALSE);
@@ -6433,6 +6446,56 @@ fts_check_and_drop_orphaned_tables(
 
 				mem_free(path);
 			}
+		} else {
+			if (FTS_IS_OBSOLETE_AUX_TABLE(aux_table->name)) {
+
+				/* Current table could be one of the three
+				obsolete tables, in this case, we should
+				always try to drop it but not rename it.
+				This could happen when we try to upgrade
+				from older server to later one, which doesn't
+				contain these obsolete tables. */
+				drop = true;
+
+				dberr_t	err;
+				trx_t*	trx_drop =
+					trx_allocate_for_background();
+
+				trx_drop->op_info = "Drop obsolete aux tables";
+				trx_drop->dict_operation_lock_mode = RW_X_LATCH;
+
+				trx_start_for_ddl(trx_drop, TRX_DICT_OP_TABLE);
+
+				err = row_drop_table_for_mysql(
+					aux_table->name, trx_drop, false, true);
+
+				trx_drop->dict_operation_lock_mode = 0;
+
+				if (err != DB_SUCCESS) {
+					/* We don't need to worry about the
+					failure, since server would try to
+					drop it on next restart, even if
+					the table was broken. */
+
+					ib_logf(IB_LOG_LEVEL_WARN,
+						"Fail to drop obsolete aux"
+						" table '%s', which is"
+						" harmless. will try to drop"
+						" it on next restart.",
+						aux_table->name);
+
+					fts_sql_rollback(trx_drop);
+				} else {
+					ib_logf(IB_LOG_LEVEL_INFO,
+						"Dropped obsolete aux"
+						" table '%s'.",
+						aux_table->name);
+
+					fts_sql_commit(trx_drop);
+				}
+
+				trx_free_for_background(trx_drop);
+			}
 		}
 #ifdef _WIN32
 		if (!drop && rename) {
diff --git a/storage/innobase/fts/fts0opt.cc b/storage/innobase/fts/fts0opt.cc
index a9f3a25530d..910a00cd521 100644
--- a/storage/innobase/fts/fts0opt.cc
+++ b/storage/innobase/fts/fts0opt.cc
@@ -95,7 +95,7 @@ enum fts_msg_type_t {
 /** Compressed list of words that have been read from FTS INDEX
 that needs to be optimized. */
 struct fts_zip_t {
-	ulint		status;		/*!< Status of (un)/zip operation */
+	lint		status;		/*!< Status of (un)/zip operation */
 
 	ulint		n_words;	/*!< Number of words compressed */
 
diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
index 79c994a78a0..a33d9a1d5bb 100644
--- a/storage/innobase/handler/ha_innodb.cc
+++ b/storage/innobase/handler/ha_innodb.cc
@@ -4,6 +4,7 @@ Copyright (c) 2000, 2014, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2008, 2009 Google Inc.
 Copyright (c) 2009, Percona Inc.
 Copyright (c) 2012, Facebook Inc.
+Copyright (c) 2013, 2014 SkySQL Ab. All Rights Reserved.
 
 Portions of this file contain modifications contributed and copyrighted by
 Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -432,7 +433,7 @@ static PSI_rwlock_info all_innodb_rwlocks[] = {
 	{&trx_purge_latch_key, "trx_purge_latch", 0},
 	{&index_tree_rw_lock_key, "index_tree_rw_lock", 0},
 	{&index_online_log_key, "index_online_log", 0},
-	{&dict_table_stats_latch_key, "dict_table_stats", 0},
+	{&dict_table_stats_key, "dict_table_stats", 0},
 	{&hash_table_rw_lock_key, "hash_table_locks", 0}
 };
 # endif /* UNIV_PFS_RWLOCK */
@@ -3504,6 +3505,14 @@ innobase_end(
 
 	if (innodb_inited) {
 
+		THD *thd= current_thd;
+		if (thd) { // may be UNINSTALL PLUGIN statement
+		 	trx_t* trx = thd_to_trx(thd);
+		 	if (trx) {
+		 		trx_free_for_mysql(trx);
+		 	}
+		}
+
 		srv_fast_shutdown = (ulint) innobase_fast_shutdown;
 
 		innodb_inited = 0;
@@ -4254,7 +4263,7 @@ innobase_close_connection(
 
 		sql_print_warning(
 			"MySQL is closing a connection that has an active "
-			"InnoDB transaction.  "TRX_ID_FMT" row modifications "
+			"InnoDB transaction.  " TRX_ID_FMT " row modifications "
 			"will roll back.",
 			trx->undo_no);
 	}
@@ -4317,16 +4326,23 @@ innobase_kill_query(
 #endif /* WITH_WSREP */
 	trx = thd_to_trx(thd);
 
-        if (trx)
-        {
-          /* Cancel a pending lock request. */
-          lock_mutex_enter();
-          trx_mutex_enter(trx);
-          if (trx->lock.wait_lock)
-            lock_cancel_waiting_and_release(trx->lock.wait_lock);
-          trx_mutex_exit(trx);
-          lock_mutex_exit();
-        }
+	if (trx) {
+		THD *cur = current_thd;
+		THD *owner = trx->current_lock_mutex_owner;
+
+		/* Cancel a pending lock request. */
+		if (owner != cur) {
+			lock_mutex_enter();
+		}
+		trx_mutex_enter(trx);
+		if (trx->lock.wait_lock) {
+			lock_cancel_waiting_and_release(trx->lock.wait_lock);
+		}
+		trx_mutex_exit(trx);
+		if (owner != cur) {
+			lock_mutex_exit();
+		}
+	}
 
 	DBUG_VOID_RETURN;
 }
@@ -4373,14 +4389,11 @@ handler::Table_flags
 ha_innobase::table_flags() const
 /*============================*/
 {
-	THD *thd = ha_thd();
 	/* Need to use tx_isolation here since table flags is (also)
 	called before prebuilt is inited. */
-	ulong const tx_isolation = thd_tx_isolation(thd);
+	ulong const tx_isolation = thd_tx_isolation(ha_thd());
 
-	if (tx_isolation <= ISO_READ_COMMITTED &&
-	    !(tx_isolation == ISO_READ_COMMITTED &&
-	      thd_rpl_is_parallel(thd))) {
+	if (tx_isolation <= ISO_READ_COMMITTED) {
 		return(int_table_flags);
 	}
 
@@ -7871,7 +7884,7 @@ calc_row_difference(
 			if (doc_id < prebuilt->table->fts->cache->next_doc_id) {
 				fprintf(stderr,
 					"InnoDB: FTS Doc ID must be larger than"
-					" "IB_ID_FMT" for table",
+					" " IB_ID_FMT " for table",
 					innodb_table->fts->cache->next_doc_id
 					- 1);
 				ut_print_name(stderr, trx,
@@ -7883,9 +7896,9 @@ calc_row_difference(
 				    - prebuilt->table->fts->cache->next_doc_id)
 				   >= FTS_DOC_ID_MAX_STEP) {
 				fprintf(stderr,
-					"InnoDB: Doc ID "UINT64PF" is too"
+					"InnoDB: Doc ID " UINT64PF " is too"
 					" big. Its difference with largest"
-					" Doc ID used "UINT64PF" cannot"
+					" Doc ID used " UINT64PF " cannot"
 					" exceed or equal to %d\n",
 					doc_id,
 					prebuilt->table->fts->cache->next_doc_id - 1,
@@ -8625,6 +8638,29 @@ ha_innobase::innobase_get_index(
 		index = innobase_index_lookup(share, keynr);
 
 		if (index) {
+			if (!key || ut_strcmp(index->name, key->name) != 0) {
+				fprintf(stderr, "InnoDB: [Error] Index for key no %u"
+					" mysql name %s , InnoDB name %s for table %s\n",
+					keynr, key ? key->name : "NULL",
+					index->name,
+					prebuilt->table->name);
+
+				for(ulint i=0; i < table->s->keys; i++) {
+					index = innobase_index_lookup(share, i);
+					key = table->key_info + keynr;
+
+					if (index) {
+
+						fprintf(stderr, "InnoDB: [Note] Index for key no %u"
+							" mysql name %s , InnoDB name %s for table %s\n",
+							keynr, key ? key->name : "NULL",
+							index->name,
+							prebuilt->table->name);
+					}
+				}
+
+			}
+
 			ut_a(ut_strcmp(index->name, key->name) == 0);
 		} else {
 			/* Can't find index with keynr in the translation
@@ -12501,6 +12537,34 @@ ha_innobase::info_low(
 					break;
 				}
 
+	DBUG_EXECUTE_IF("ib_ha_innodb_stat_not_initialized",
+					index->table->stat_initialized = FALSE;);
+
+				if (!ib_table->stat_initialized ||
+					(index->table != ib_table ||
+						!index->table->stat_initialized)) {
+					fprintf(stderr,
+						"InnoDB: Warning: Index %s points to table %s"									        " and ib_table %s statistics is initialized %d "
+						" but index table %s initialized %d "
+					        " mysql table is %s. Have you mixed "
+						"up .frm files from different "
+					       	"installations? "
+						"See " REFMAN
+						"innodb-troubleshooting.html\n",
+						index->name,
+						index->table->name,
+						ib_table->name,
+						ib_table->stat_initialized,
+						index->table->name,
+						index->table->stat_initialized,
+						table->s->table_name.str
+						);
+
+					/* This is better than
+					assert on below function */
+					dict_stats_init(index->table);
+				}
+
 				rec_per_key = innodb_rec_per_key(
 					index, j, stats.records);
 
@@ -18191,6 +18255,11 @@ static MYSQL_SYSVAR_ULONG(saved_page_number_debug,
   NULL, innodb_save_page_no, 0, 0, UINT_MAX32, 0);
 #endif /* UNIV_DEBUG */
 
+static MYSQL_SYSVAR_UINT(simulate_comp_failures, srv_simulate_comp_failures,
+  PLUGIN_VAR_NOCMDARG,
+  "Simulate compression failures.",
+  NULL, NULL, 0, 0, 99, 0);
+
 static struct st_mysql_sys_var* innobase_system_variables[]= {
   MYSQL_SYSVAR(additional_mem_pool_size),
   MYSQL_SYSVAR(api_trx_level),
@@ -18351,6 +18420,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
   MYSQL_SYSVAR(fil_make_page_dirty_debug),
   MYSQL_SYSVAR(saved_page_number_debug),
 #endif /* UNIV_DEBUG */
+  MYSQL_SYSVAR(simulate_comp_failures),
   NULL
 };
 
@@ -18680,7 +18750,7 @@ ib_senderrf(
 
 	va_start(args, code);
 
-	myf	l;
+	myf	l=0;
 
 	switch(level) {
 	case IB_LOG_LEVEL_INFO:
diff --git a/storage/innobase/include/btr0cur.h b/storage/innobase/include/btr0cur.h
index 833166e783c..f1e4406fcf7 100644
--- a/storage/innobase/include/btr0cur.h
+++ b/storage/innobase/include/btr0cur.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2014, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -576,6 +576,17 @@ void
 btr_estimate_number_of_different_key_vals(
 /*======================================*/
 	dict_index_t*	index);	/*!< in: index */
+
+/** Gets the externally stored size of a record, in units of a database page.
+@param[in]	rec	record
+@param[in]	offsets	array returned by rec_get_offsets()
+@return externally stored part, in units of a database page */
+
+ulint
+btr_rec_get_externally_stored_len(
+	const rec_t*	rec,
+	const ulint*	offsets);
+
 /*******************************************************************//**
 Marks non-updated off-page fields as disowned by this record. The ownership
 must be transferred to the updated record which is inserted elsewhere in the
diff --git a/storage/innobase/include/dict0dict.h b/storage/innobase/include/dict0dict.h
index ce709a2e912..026187b2000 100644
--- a/storage/innobase/include/dict0dict.h
+++ b/storage/innobase/include/dict0dict.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2014, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2012, Facebook Inc.
 
 This program is free software; you can redistribute it and/or modify it under
@@ -43,6 +43,9 @@ Created 1/8/1996 Heikki Tuuri
 #include "trx0types.h"
 #include "row0types.h"
 
+extern bool innodb_table_stats_not_found;
+extern bool innodb_index_stats_not_found;
+
 #ifndef UNIV_HOTBACKUP
 # include "sync0sync.h"
 # include "sync0rw.h"
@@ -1435,6 +1438,28 @@ UNIV_INTERN
 void
 dict_mutex_exit_for_mysql(void);
 /*===========================*/
+
+/** Create a dict_table_t's stats latch or delay for lazy creation.
+This function is only called from either single threaded environment
+or from a thread that has not shared the table object with other threads.
+@param[in,out]	table	table whose stats latch to create
+@param[in]	enabled	if false then the latch is disabled
+and dict_table_stats_lock()/unlock() become noop on this table. */
+
+void
+dict_table_stats_latch_create(
+	dict_table_t*	table,
+	bool		enabled);
+
+/** Destroy a dict_table_t's stats latch.
+This function is only called from either single threaded environment
+or from a thread that has not shared the table object with other threads.
+@param[in,out]	table	table whose stats latch to destroy */
+
+void
+dict_table_stats_latch_destroy(
+	dict_table_t*	table);
+
 /**********************************************************************//**
 Lock the appropriate latch to protect a given table's statistics.
 table->id is used to pick the corresponding latch from a global array of
diff --git a/storage/innobase/include/dict0mem.h b/storage/innobase/include/dict0mem.h
index c5ed8d92cb0..0e3981a2946 100644
--- a/storage/innobase/include/dict0mem.h
+++ b/storage/innobase/include/dict0mem.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2014, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2012, Facebook Inc.
 
 This program is free software; you can redistribute it and/or modify it under
@@ -46,6 +46,7 @@ Created 1/8/1996 Heikki Tuuri
 #include "hash0hash.h"
 #include "trx0types.h"
 #include "fts0fts.h"
+#include "os0once.h"
 
 /* Forward declaration. */
 struct ib_rbt_t;
@@ -627,6 +628,9 @@ struct dict_index_t{
 	ulint		stat_n_leaf_pages;
 				/*!< approximate number of leaf pages in the
 				index tree */
+	bool		stats_error_printed;
+				/*!< has persistent statistics error printed
+				for this index ? */
 	/* @} */
 	rw_lock_t	lock;	/*!< read-write lock protecting the
 				upper levels of the index tree */
@@ -842,6 +846,10 @@ struct dict_table_t{
 				initialized in dict_table_add_to_cache() */
 				/** Statistics for query optimization */
 				/* @{ */
+
+	volatile os_once::state_t	stats_latch_created;
+				/*!< Creation state of 'stats_latch'. */
+
 	rw_lock_t*	stats_latch; /*!< this latch protects:
 				dict_table_t::stat_initialized
 				dict_table_t::stat_n_rows (*)
@@ -950,6 +958,9 @@ struct dict_table_t{
 				/*!< see BG_STAT_* above.
 				Writes are covered by dict_sys->mutex.
 				Dirty reads are possible. */
+	bool		stats_error_printed;
+				/*!< Has persistent stats error beein
+				already printed for this table ? */
 				/* @} */
 	/*----------------------*/
 				/**!< The following fields are used by the
diff --git a/storage/innobase/include/lock0lock.h b/storage/innobase/include/lock0lock.h
index 385853bdb68..88246afebdc 100644
--- a/storage/innobase/include/lock0lock.h
+++ b/storage/innobase/include/lock0lock.h
@@ -289,7 +289,7 @@ lock_rec_insert_check_and_lock(
 				inserted record maybe should inherit
 				LOCK_GAP type locks from the successor
 				record */
-	__attribute__((nonnull, warn_unused_result));
+	__attribute__((nonnull(2,3,4,6,7), warn_unused_result));
 /*********************************************************************//**
 Checks if locks of other transactions prevent an immediate modify (update,
 delete mark, or delete unmark) of a clustered index record. If they do,
diff --git a/storage/innobase/include/os0once.h b/storage/innobase/include/os0once.h
new file mode 100644
index 00000000000..a8bbaf1d2d4
--- /dev/null
+++ b/storage/innobase/include/os0once.h
@@ -0,0 +1,125 @@
+/*****************************************************************************
+
+Copyright (c) 2014, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/os0once.h
+A class that aids executing a given function exactly once in a multi-threaded
+environment.
+
+Created Feb 20, 2014 Vasil Dimov
+*******************************************************/
+
+#ifndef os0once_h
+#define os0once_h
+
+#include "univ.i"
+
+#include "os0sync.h"
+#include "ut0ut.h"
+
+/** Execute a given function exactly once in a multi-threaded environment
+or wait for the function to be executed by another thread.
+
+Example usage:
+First the user must create a control variable of type os_once::state_t and
+assign it os_once::NEVER_DONE.
+Then the user must pass this variable, together with a function to be
+executed to os_once::do_or_wait_for_done().
+
+Multiple threads can call os_once::do_or_wait_for_done() simultaneously with
+the same (os_once::state_t) control variable. The provided function will be
+called exactly once and when os_once::do_or_wait_for_done() returns then this
+function has completed execution, by this or another thread. In other words
+os_once::do_or_wait_for_done() will either execute the provided function or
+will wait for its execution to complete if it is already called by another
+thread or will do nothing if the function has already completed its execution
+earlier.
+
+This mimics pthread_once(3), but unfortunatelly pthread_once(3) does not
+support passing arguments to the init_routine() function. We should use
+std::call_once() when we start compiling with C++11 enabled. */
+class os_once {
+public:
+	/** Control variables' state type */
+	typedef ib_uint32_t	state_t;
+
+	/** Not yet executed. */
+	static const state_t	NEVER_DONE = 0;
+
+	/** Currently being executed by this or another thread. */
+	static const state_t	IN_PROGRESS = 1;
+
+	/** Finished execution. */
+	static const state_t	DONE = 2;
+
+#ifdef HAVE_ATOMIC_BUILTINS
+	/** Call a given function or wait its execution to complete if it is
+	already called by another thread.
+	@param[in,out]	state		control variable
+	@param[in]	do_func		function to call
+	@param[in,out]	do_func_arg	an argument to pass to do_func(). */
+	static
+	void
+	do_or_wait_for_done(
+		volatile state_t*	state,
+		void			(*do_func)(void*),
+		void*			do_func_arg)
+	{
+		/* Avoid calling os_compare_and_swap_uint32() in the most
+		common case. */
+		if (*state == DONE) {
+			return;
+		}
+
+		if (os_compare_and_swap_uint32(state,
+					       NEVER_DONE, IN_PROGRESS)) {
+			/* We are the first. Call the function. */
+
+			do_func(do_func_arg);
+
+			const bool	swapped = os_compare_and_swap_uint32(
+				state, IN_PROGRESS, DONE);
+
+			ut_a(swapped);
+		} else {
+			/* The state is not NEVER_DONE, so either it is
+			IN_PROGRESS (somebody is calling the function right
+			now or DONE (it has already been called and completed).
+			Wait for it to become DONE. */
+			for (;;) {
+				const state_t	s = *state;
+
+				switch (s) {
+				case DONE:
+					return;
+				case IN_PROGRESS:
+					break;
+				case NEVER_DONE:
+					/* fall through */
+				default:
+					ut_error;
+				}
+
+				UT_RELAX_CPU();
+			}
+		}
+	}
+#endif /* HAVE_ATOMIC_BUILTINS */
+};
+
+#endif /* os0once_h */
diff --git a/storage/innobase/include/os0sync.h b/storage/innobase/include/os0sync.h
index 9b4ce2343c5..6d3dd850e08 100644
--- a/storage/innobase/include/os0sync.h
+++ b/storage/innobase/include/os0sync.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2008, Google Inc.
 
 Portions of this file contain modifications contributed and copyrighted by
@@ -434,6 +434,9 @@ Returns the old value of *ptr, atomically sets *ptr to new_val */
 # define os_atomic_test_and_set_ulint(ptr, new_val) \
 	__sync_lock_test_and_set(ptr, new_val)
 
+# define os_atomic_lock_release_byte(ptr) \
+	__sync_lock_release(ptr)
+
 #elif defined(HAVE_IB_SOLARIS_ATOMICS)
 
 # define HAVE_ATOMIC_BUILTINS
@@ -515,6 +518,9 @@ Returns the old value of *ptr, atomically sets *ptr to new_val */
 # define os_atomic_test_and_set_ulint(ptr, new_val) \
 	atomic_swap_ulong(ptr, new_val)
 
+# define os_atomic_lock_release_byte(ptr) \
+	(void) atomic_swap_uchar(ptr, 0)
+
 #elif defined(HAVE_WINDOWS_ATOMICS)
 
 # define HAVE_ATOMIC_BUILTINS
@@ -574,7 +580,8 @@ Returns true if swapped, ptr is pointer to target, old_val is value to
 compare to, new_val is the value to swap in. */
 
 # define os_compare_and_swap_uint32(ptr, old_val, new_val) \
-	(win_cmp_and_xchg_dword(ptr, new_val, old_val) == old_val)
+	(InterlockedCompareExchange(reinterpret_cast<volatile long*>(ptr), \
+				    new_val, old_val) == old_val)
 
 # define os_compare_and_swap_ulint(ptr, old_val, new_val) \
 	(win_cmp_and_xchg_ulint(ptr, new_val, old_val) == old_val)
@@ -637,6 +644,9 @@ clobbered */
 # define os_atomic_test_and_set_ulong(ptr, new_val) \
 	InterlockedExchange(ptr, new_val)
 
+# define os_atomic_lock_release_byte(ptr) \
+	(void) InterlockedExchange(ptr, 0)
+
 #else
 # define IB_ATOMICS_STARTUP_MSG \
 	"Mutexes and rw_locks use InnoDB's own implementation"
diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
index 7922b14cc86..2b58e0717fb 100644
--- a/storage/innobase/include/srv0srv.h
+++ b/storage/innobase/include/srv0srv.h
@@ -3,6 +3,7 @@
 Copyright (c) 1995, 2013, Oracle and/or its affiliates. All rights reserved.
 Copyright (c) 2008, 2009, Google Inc.
 Copyright (c) 2009, Percona Inc.
+Copyright (c) 2013, 2014, SkySQL Ab. All Rights Reserved.
 
 Portions of this file contain modifications contributed and copyrighted by
 Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -453,6 +454,9 @@ extern struct export_var_t export_vars;
 /** Global counters */
 extern srv_stats_t	srv_stats;
 
+/** Simulate compression failures. */
+extern uint srv_simulate_comp_failures;
+
 # ifdef UNIV_PFS_THREAD
 /* Keys to register InnoDB threads with performance schema */
 extern mysql_pfs_key_t	buf_page_cleaner_thread_key;
diff --git a/storage/innobase/include/sync0rw.h b/storage/innobase/include/sync0rw.h
index 34cd8ef4bd6..b36e04f2810 100644
--- a/storage/innobase/include/sync0rw.h
+++ b/storage/innobase/include/sync0rw.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2008, Google Inc.
 
 Portions of this file contain modifications contributed and copyrighted by
@@ -108,14 +108,8 @@ extern ib_mutex_t		rw_lock_list_mutex;
 #ifdef UNIV_SYNC_DEBUG
 /* The global mutex which protects debug info lists of all rw-locks.
 To modify the debug info list of an rw-lock, this mutex has to be
-
 acquired in addition to the mutex protecting the lock. */
-extern ib_mutex_t		rw_lock_debug_mutex;
-extern os_event_t	rw_lock_debug_event;	/*!< If deadlock detection does
-					not get immediately the mutex it
-					may wait for this event */
-extern ibool		rw_lock_debug_waiters;	/*!< This is set to TRUE, if
-					there may be waiters for the event */
+extern os_fast_mutex_t		rw_lock_debug_mutex;
 #endif /* UNIV_SYNC_DEBUG */
 
 /** Counters for RW locks. */
@@ -141,7 +135,7 @@ extern	mysql_pfs_key_t	trx_i_s_cache_lock_key;
 extern	mysql_pfs_key_t	trx_purge_latch_key;
 extern	mysql_pfs_key_t	index_tree_rw_lock_key;
 extern	mysql_pfs_key_t	index_online_log_key;
-extern	mysql_pfs_key_t	dict_table_stats_latch_key;
+extern	mysql_pfs_key_t	dict_table_stats_key;
 extern  mysql_pfs_key_t trx_sys_rw_lock_key;
 extern  mysql_pfs_key_t hash_table_rw_lock_key;
 #endif /* UNIV_PFS_RWLOCK */
diff --git a/storage/innobase/include/sync0sync.ic b/storage/innobase/include/sync0sync.ic
index f34f3f90b63..cb6f6efbed8 100644
--- a/storage/innobase/include/sync0sync.ic
+++ b/storage/innobase/include/sync0sync.ic
@@ -108,10 +108,7 @@ mutex_reset_lock_word(
 	ib_mutex_t*	mutex)	/*!< in: mutex */
 {
 #if defined(HAVE_ATOMIC_BUILTINS)
-	/* In theory __sync_lock_release should be used to release the lock.
-	Unfortunately, it does not work properly alone. The workaround is
-	that more conservative __sync_lock_test_and_set is used instead. */
-	os_atomic_test_and_set_byte(&mutex->lock_word, 0);
+	os_atomic_lock_release_byte(&mutex->lock_word);
 #else
 	mutex->lock_word = 0;
 
diff --git a/storage/innobase/include/trx0trx.h b/storage/innobase/include/trx0trx.h
index a30bbdbebb2..7c92445b796 100644
--- a/storage/innobase/include/trx0trx.h
+++ b/storage/innobase/include/trx0trx.h
@@ -992,6 +992,11 @@ struct trx_t{
 					count of tables being flushed. */
 
 	/*------------------------------*/
+	THD*		current_lock_mutex_owner;
+					/*!< If this is equal to current_thd,
+					then in innobase_kill_query() we know we
+					already hold the lock_sys->mutex. */
+	/*------------------------------*/
 #ifdef UNIV_DEBUG
 	ulint		start_line;	/*!< Track where it was started from */
 	const char*	start_file;	/*!< Filename where it was started */
diff --git a/storage/innobase/include/univ.i b/storage/innobase/include/univ.i
index 98c5512bd0b..bc359746a0b 100644
--- a/storage/innobase/include/univ.i
+++ b/storage/innobase/include/univ.i
@@ -44,7 +44,7 @@ Created 1/20/1994 Heikki Tuuri
 
 #define INNODB_VERSION_MAJOR	5
 #define INNODB_VERSION_MINOR	6
-#define INNODB_VERSION_BUGFIX	17
+#define INNODB_VERSION_BUGFIX	19
 
 /* The following is the InnoDB version as shown in
 SELECT plugin_version FROM information_schema.plugins;
@@ -439,10 +439,10 @@ typedef unsigned __int64 ib_uint64_t;
 typedef unsigned __int32 ib_uint32_t;
 #else
 /* Use the integer types and formatting strings defined in the C99 standard. */
-# define UINT32PF	"%"PRIu32
-# define INT64PF	"%"PRId64
-# define UINT64PF	"%"PRIu64
-# define UINT64PFx	"%016"PRIx64
+# define UINT32PF	"%" PRIu32
+# define INT64PF	"%" PRId64
+# define UINT64PF	"%" PRIu64
+# define UINT64PFx	"%016" PRIx64
 # define DBUG_LSN_PF    UINT64PF
 typedef int64_t ib_int64_t;
 typedef uint64_t ib_uint64_t;
diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc
index f99c34294cd..659b2e5b62a 100644
--- a/storage/innobase/lock/lock0lock.cc
+++ b/storage/innobase/lock/lock0lock.cc
@@ -49,6 +49,7 @@ Created 5/7/1996 Heikki Tuuri
 #include "btr0btr.h"
 #include "dict0boot.h"
 #include <set>
+#include "mysql/plugin.h"
 
 #ifdef WITH_WSREP
 extern my_bool wsrep_debug;
@@ -378,6 +379,11 @@ struct lock_stack_t {
 	ulint		heap_no;		/*!< heap number if rec lock */
 };
 
+extern "C" void thd_report_wait_for(const MYSQL_THD thd, MYSQL_THD other_thd);
+extern "C" int thd_need_wait_for(const MYSQL_THD thd);
+extern "C"
+int thd_need_ordering_with(const MYSQL_THD thd, const MYSQL_THD other_thd);
+
 /** Stack to use during DFS search. Currently only a single stack is required
 because there is no parallel deadlock check. This stack is protected by
 the lock_sys_t::mutex. */
@@ -393,6 +399,14 @@ UNIV_INTERN mysql_pfs_key_t	lock_sys_mutex_key;
 UNIV_INTERN mysql_pfs_key_t	lock_sys_wait_mutex_key;
 #endif /* UNIV_PFS_MUTEX */
 
+/* Buffer to collect THDs to report waits for. */
+struct thd_wait_reports {
+	struct thd_wait_reports *next;	/*!< List link */
+	ulint used;			/*!< How many elements in waitees[] */
+	trx_t *waitees[64];		/*!< Trxs for thd_report_wait_for() */
+};
+
+
 #ifdef UNIV_DEBUG
 UNIV_INTERN ibool	lock_print_waits	= FALSE;
 
@@ -1023,6 +1037,32 @@ lock_rec_has_to_wait(
 			return(FALSE);
 		}
 
+		if ((type_mode & LOCK_GAP || lock_rec_get_gap(lock2)) &&
+		    !thd_need_ordering_with(trx->mysql_thd,
+					    lock2->trx->mysql_thd)) {
+			/* If the upper server layer has already decided on the
+			commit order between the transaction requesting the
+			lock and the transaction owning the lock, we do not
+			need to wait for gap locks. Such ordeering by the upper
+			server layer happens in parallel replication, where the
+			commit order is fixed to match the original order on the
+			master.
+
+			Such gap locks are mainly needed to get serialisability
+			between transactions so that they will be binlogged in
+			the correct order so that statement-based replication
+			will give the correct results. Since the right order
+			was already determined on the master, we do not need
+			to enforce it again here.
+
+			Skipping the locks is not essential for correctness,
+			since in case of deadlock we will just kill the later
+			transaction and retry it. But it can save some
+			unnecessary rollbacks and retries. */
+
+			return (FALSE);
+		}
+
 #ifdef WITH_WSREP
 		/* if BF thread is locking and has conflict with another BF
 		   thread, we need to look at trx ordering and lock types */
@@ -4069,7 +4109,8 @@ static
 trx_id_t
 lock_deadlock_search(
 /*=================*/
-	lock_deadlock_ctx_t*	ctx)	/*!< in/out: deadlock context */
+	lock_deadlock_ctx_t*	ctx,	/*!< in/out: deadlock context */
+	struct thd_wait_reports*waitee_ptr) /*!< in/out: list of waitees */
 {
 	const lock_t*	lock;
 	ulint		heap_no;
@@ -4149,38 +4190,59 @@ lock_deadlock_search(
 			/* Select the joining transaction as the victim. */
 			return(ctx->start->id);
 
-		} else if (lock->trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
+		} else {
+			/* We do not need to report autoinc locks to the upper
+			layer. These locks are released before commit, so they
+			can not cause deadlocks with binlog-fixed commit
+			order. */
+			if (waitee_ptr &&
+			    (lock_get_type_low(lock) != LOCK_TABLE ||
+			     lock_get_mode(lock) != LOCK_AUTO_INC)) {
+				if (waitee_ptr->used ==
+				    sizeof(waitee_ptr->waitees) /
+				    sizeof(waitee_ptr->waitees[0])) {
+					waitee_ptr->next =
+						(struct thd_wait_reports *)
+						mem_alloc(sizeof(*waitee_ptr));
+					waitee_ptr = waitee_ptr->next;
+					if (!waitee_ptr) {
+						ctx->too_deep = TRUE;
+						return(ctx->start->id);
+					}
+					waitee_ptr->next = NULL;
+					waitee_ptr->used = 0;
+				}
+				waitee_ptr->waitees[waitee_ptr->used++] = lock->trx;
+			}
 
-			/* Another trx ahead has requested a lock in an
-			incompatible mode, and is itself waiting for a lock. */
+			if (lock->trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
 
-			++ctx->cost;
+				/* Another trx ahead has requested a lock in an
+				incompatible mode, and is itself waiting for a lock. */
 
-			/* Save current search state. */
-			if (!lock_deadlock_push(ctx, lock, heap_no)) {
+				++ctx->cost;
 
-				/* Unable to save current search state, stack
-				size not big enough. */
+				/* Save current search state. */
+				if (!lock_deadlock_push(ctx, lock, heap_no)) {
 
-				ctx->too_deep = TRUE;
+					/* Unable to save current search state, stack
+					size not big enough. */
+
+					ctx->too_deep = TRUE;
 
-#ifdef WITH_WSREP
-				if (wsrep_thd_is_BF(ctx->start->mysql_thd, TRUE))
-					return(lock->trx->id);
-				else
-#endif /* WITH_WSREP */
 					return(ctx->start->id);
-			}
+				}
 
-			ctx->wait_lock = lock->trx->lock.wait_lock;
-			lock = lock_get_first_lock(ctx, &heap_no);
+				ctx->wait_lock = lock->trx->lock.wait_lock;
+				lock = lock_get_first_lock(ctx, &heap_no);
 
-			if (lock->trx->lock.deadlock_mark > ctx->mark_start) {
+				if (lock->trx->lock.deadlock_mark > ctx->mark_start) {
+					lock = lock_get_next_lock(ctx, lock, heap_no);
+				}
+
+			} else {
 				lock = lock_get_next_lock(ctx, lock, heap_no);
 			}
-
-		} else {
-			lock = lock_get_next_lock(ctx, lock, heap_no);
 		}
 	}
 
@@ -4245,6 +4307,48 @@ lock_deadlock_trx_rollback(
 	trx_mutex_exit(trx);
 }
 
+static
+void
+lock_report_waiters_to_mysql(
+/*=======================*/
+	struct thd_wait_reports*	waitee_buf_ptr,	/*!< in: set of trxs */
+	THD*				mysql_thd,	/*!< in: THD */
+	trx_id_t			victim_trx_id)	/*!< in: Trx selected
+							as deadlock victim, if
+							any */
+{
+	struct thd_wait_reports*	p;
+	struct thd_wait_reports*	q;
+	ulint				i;
+
+	p = waitee_buf_ptr;
+	while (p) {
+		i = 0;
+		while (i < p->used) {
+			trx_t *w_trx = p->waitees[i];
+			/*  There is no need to report waits to a trx already
+			selected as a victim. */
+			if (w_trx->id != victim_trx_id) {
+				/* If thd_report_wait_for() decides to kill the
+				transaction, then we will get a call back into
+				innobase_kill_query. We mark this by setting
+				current_lock_mutex_owner, so we can avoid trying
+				to recursively take lock_sys->mutex. */
+				w_trx->current_lock_mutex_owner = mysql_thd;
+				thd_report_wait_for(mysql_thd, w_trx->mysql_thd);
+				w_trx->current_lock_mutex_owner = NULL;
+			}
+			++i;
+		}
+		q = p->next;
+		if (p != waitee_buf_ptr) {
+			mem_free(p);
+		}
+		p = q;
+	}
+}
+
+
 /********************************************************************//**
 Checks if a joining lock request results in a deadlock. If a deadlock is
 found this function will resolve the dadlock by choosing a victim transaction
@@ -4260,13 +4364,23 @@ lock_deadlock_check_and_resolve(
 	const lock_t*	lock,	/*!< in: lock the transaction is requesting */
 	const trx_t*	trx)	/*!< in: transaction */
 {
-	trx_id_t	victim_trx_id;
+	trx_id_t		victim_trx_id;
+	struct thd_wait_reports	waitee_buf;
+	struct thd_wait_reports*waitee_buf_ptr;
+	THD*			start_mysql_thd;
 
 	ut_ad(trx != NULL);
 	ut_ad(lock != NULL);
 	ut_ad(lock_mutex_own());
 	assert_trx_in_list(trx);
 
+	start_mysql_thd = trx->mysql_thd;
+	if (start_mysql_thd && thd_need_wait_for(start_mysql_thd)) {
+		waitee_buf_ptr = &waitee_buf;
+	} else {
+		waitee_buf_ptr = NULL;
+	}
+
 	/* Try and resolve as many deadlocks as possible. */
 	do {
 		lock_deadlock_ctx_t	ctx;
@@ -4279,7 +4393,19 @@ lock_deadlock_check_and_resolve(
 		ctx.wait_lock = lock;
 		ctx.mark_start = lock_mark_counter;
 
-		victim_trx_id = lock_deadlock_search(&ctx);
+		if (waitee_buf_ptr) {
+			waitee_buf_ptr->next = NULL;
+			waitee_buf_ptr->used = 0;
+		}
+
+		victim_trx_id = lock_deadlock_search(&ctx, waitee_buf_ptr);
+
+		/* Report waits to upper layer, as needed. */
+		if (waitee_buf_ptr) {
+			lock_report_waiters_to_mysql(waitee_buf_ptr,
+						     start_mysql_thd,
+						     victim_trx_id);
+		}
 
 		/* Search too deep, we rollback the joining transaction. */
 		if (ctx.too_deep) {
diff --git a/storage/innobase/os/os0file.cc b/storage/innobase/os/os0file.cc
index 992b1e79b58..1ec08da8a83 100644
--- a/storage/innobase/os/os0file.cc
+++ b/storage/innobase/os/os0file.cc
@@ -2679,7 +2679,7 @@ try_again:
 	}
 
 	ib_logf(IB_LOG_LEVEL_ERROR,
-		"Tried to read "ULINTPF" bytes at offset " UINT64PF". "
+		"Tried to read " ULINTPF " bytes at offset " UINT64PF ". "
 		"Was only able to read %ld.", n, offset, (lint) ret);
 #endif /* __WIN__ */
 #ifdef __WIN__
@@ -2866,6 +2866,7 @@ os_file_write_func(
 	DWORD		high;
 	ulint		n_retries	= 0;
 	ulint		err;
+	DWORD		saved_error = 0;
 #ifndef UNIV_HOTBACKUP
 	ulint		i;
 #endif /* !UNIV_HOTBACKUP */
@@ -2955,8 +2956,10 @@ retry:
 	}
 
 	if (!os_has_said_disk_full) {
+		char *winmsg = NULL;
 
-		err = (ulint) GetLastError();
+		saved_error = GetLastError();
+		err = (ulint) saved_error;
 
 		ut_print_timestamp(stderr);
 
@@ -2973,6 +2976,23 @@ retry:
 			name, offset,
 			(ulong) n, (ulong) len, (ulong) err);
 
+		/* Ask Windows to prepare a standard message for a
+		GetLastError() */
+
+		FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER |
+			FORMAT_MESSAGE_FROM_SYSTEM |
+			FORMAT_MESSAGE_IGNORE_INSERTS,
+			NULL, saved_error,
+			MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
+			(LPSTR)&winmsg, 0, NULL);
+
+		if (winmsg) {
+			fprintf(stderr,
+				"InnoDB: FormatMessage: Error number %lu means '%s'.\n",
+				(ulong) saved_error, winmsg);
+			LocalFree(winmsg);
+		}
+
 		if (strerror((int) err) != NULL) {
 			fprintf(stderr,
 				"InnoDB: Error number %lu means '%s'.\n",
@@ -3001,12 +3021,11 @@ retry:
 	}
 
 	if (!os_has_said_disk_full) {
-
 		ut_print_timestamp(stderr);
 
 		fprintf(stderr,
 			" InnoDB: Error: Write to file %s failed"
-			" at offset "UINT64PF".\n"
+			" at offset " UINT64PF ".\n"
 			"InnoDB: %lu bytes should have been written,"
 			" only %ld were written.\n"
 			"InnoDB: Operating system error number %lu.\n"
@@ -4592,11 +4611,16 @@ os_aio_func(
 	wake_later = mode & OS_AIO_SIMULATED_WAKE_LATER;
 	mode = mode & (~OS_AIO_SIMULATED_WAKE_LATER);
 
+	DBUG_EXECUTE_IF("ib_os_aio_func_io_failure_28",
+			mode = OS_AIO_SYNC;);
+
 	if (mode == OS_AIO_SYNC
 #ifdef WIN_ASYNC_IO
 	    && !srv_use_native_aio
 #endif /* WIN_ASYNC_IO */
 	    ) {
+		ibool ret;
+
 		/* This is actually an ordinary synchronous read or write:
 		no need to use an i/o-handler thread. NOTE that if we use
 		Windows async i/o, Windows does not allow us to use
@@ -4611,13 +4635,23 @@ os_aio_func(
 		and os_file_write_func() */
 
 		if (type == OS_FILE_READ) {
-			return(os_file_read_func(file, buf, offset, n));
+			ret = os_file_read_func(file, buf, offset, n);
+		} else {
+
+			ut_ad(!srv_read_only_mode);
+			ut_a(type == OS_FILE_WRITE);
+
+			ret = os_file_write_func(name, file, buf, offset, n);
 		}
 
-		ut_ad(!srv_read_only_mode);
-		ut_a(type == OS_FILE_WRITE);
+		DBUG_EXECUTE_IF("ib_os_aio_func_io_failure_28",
+			os_has_said_disk_full = FALSE;);
+		DBUG_EXECUTE_IF("ib_os_aio_func_io_failure_28",
+			ret = 0;);
+		DBUG_EXECUTE_IF("ib_os_aio_func_io_failure_28",
+			errno = 28;);
 
-		return(os_file_write_func(name, file, buf, offset, n));
+		return ret;
 	}
 
 try_again:
@@ -5442,7 +5476,13 @@ consecutive_loop:
 			aio_slot->offset, total_len);
 	}
 
-	ut_a(ret);
+	DBUG_EXECUTE_IF("ib_os_aio_func_io_failure_28_2",
+		os_has_said_disk_full = FALSE;);
+	DBUG_EXECUTE_IF("ib_os_aio_func_io_failure_28_2",
+			ret = 0;);
+	DBUG_EXECUTE_IF("ib_os_aio_func_io_failure_28_2",
+			errno = 28;);
+
 	srv_set_io_thread_op_info(global_segment, "file i/o done");
 
 	if (aio_slot->type == OS_FILE_READ && n_consecutive > 1) {
diff --git a/storage/innobase/page/page0zip.cc b/storage/innobase/page/page0zip.cc
index ab7a19795a3..4b19a35925e 100644
--- a/storage/innobase/page/page0zip.cc
+++ b/storage/innobase/page/page0zip.cc
@@ -1309,6 +1309,30 @@ page_zip_compress(
 
 	MONITOR_INC(MONITOR_PAGE_COMPRESS);
 
+	/* Simulate a compression failure with a probability determined by
+	innodb_simulate_comp_failures, only if the page has 2 or more
+	records. */
+
+	if (srv_simulate_comp_failures
+	    && !dict_index_is_ibuf(index)
+	    && page_get_n_recs(page) >= 2
+	    && ((ulint)(rand() % 100) < srv_simulate_comp_failures)
+	    && strcasecmp(index->table_name, "IBUF_DUMMY") != 0) {
+
+#ifdef UNIV_DEBUG
+		fprintf(stderr,
+			"InnoDB: Simulating a compression failure"
+			" for table %s, index %s, page %lu (%s)\n",
+			index->table_name,
+			index->name,
+			page_get_page_no(page),
+			page_is_leaf(page) ? "leaf" : "non-leaf");
+
+#endif
+
+		goto err_exit;
+	}
+
 	heap = mem_heap_create(page_zip_get_size(page_zip)
 			       + n_fields * (2 + sizeof(ulint))
 			       + REC_OFFS_HEADER_SIZE
diff --git a/storage/innobase/row/row0ins.cc b/storage/innobase/row/row0ins.cc
index e6487730a77..c144ca890f8 100644
--- a/storage/innobase/row/row0ins.cc
+++ b/storage/innobase/row/row0ins.cc
@@ -151,35 +151,37 @@ row_ins_alloc_sys_fields(
 	ut_ad(row && table && heap);
 	ut_ad(dtuple_get_n_fields(row) == dict_table_get_n_cols(table));
 
-	/* 1. Allocate buffer for row id */
+	/* allocate buffer to hold the needed system created hidden columns. */
+	uint len = DATA_ROW_ID_LEN + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
+	ptr = static_cast<byte*>(mem_heap_zalloc(heap, len));
 
+	/* 1. Populate row-id */
 	col = dict_table_get_sys_col(table, DATA_ROW_ID);
 
 	dfield = dtuple_get_nth_field(row, dict_col_get_no(col));
 
-	ptr = static_cast<byte*>(mem_heap_zalloc(heap, DATA_ROW_ID_LEN));
-
 	dfield_set_data(dfield, ptr, DATA_ROW_ID_LEN);
 
 	node->row_id_buf = ptr;
 
-	/* 3. Allocate buffer for trx id */
+	ptr += DATA_ROW_ID_LEN;
 
+	/* 2. Populate trx id */
 	col = dict_table_get_sys_col(table, DATA_TRX_ID);
 
 	dfield = dtuple_get_nth_field(row, dict_col_get_no(col));
-	ptr = static_cast<byte*>(mem_heap_zalloc(heap, DATA_TRX_ID_LEN));
 
 	dfield_set_data(dfield, ptr, DATA_TRX_ID_LEN);
 
 	node->trx_id_buf = ptr;
 
-	/* 4. Allocate buffer for roll ptr */
+	ptr += DATA_TRX_ID_LEN;
+
+	/* 3. Populate roll ptr */
 
 	col = dict_table_get_sys_col(table, DATA_ROLL_PTR);
 
 	dfield = dtuple_get_nth_field(row, dict_col_get_no(col));
-	ptr = static_cast<byte*>(mem_heap_zalloc(heap, DATA_ROLL_PTR_LEN));
 
 	dfield_set_data(dfield, ptr, DATA_ROLL_PTR_LEN);
 }
diff --git a/storage/innobase/row/row0merge.cc b/storage/innobase/row/row0merge.cc
index 56cf9f1943c..86b47c9f3bd 100644
--- a/storage/innobase/row/row0merge.cc
+++ b/storage/innobase/row/row0merge.cc
@@ -786,7 +786,7 @@ row_merge_read(
 	if (UNIV_UNLIKELY(!success)) {
 		ut_print_timestamp(stderr);
 		fprintf(stderr,
-			"  InnoDB: failed to read merge block at "UINT64PF"\n",
+			"  InnoDB: failed to read merge block at " UINT64PF "\n",
 			ofs);
 	}
 
diff --git a/storage/innobase/row/row0mysql.cc b/storage/innobase/row/row0mysql.cc
index 93d13ea49ee..dd7af8a3526 100644
--- a/storage/innobase/row/row0mysql.cc
+++ b/storage/innobase/row/row0mysql.cc
@@ -1359,7 +1359,7 @@ error_exit:
 			if (doc_id < next_doc_id) {
 				fprintf(stderr,
 					"InnoDB: FTS Doc ID must be large than"
-					" "UINT64PF" for table",
+					" " UINT64PF " for table",
 					next_doc_id - 1);
 				ut_print_name(stderr, trx, TRUE, table->name);
 				putc('\n', stderr);
@@ -1374,9 +1374,9 @@ error_exit:
 
 			if (doc_id - next_doc_id >= FTS_DOC_ID_MAX_STEP) {
 				fprintf(stderr,
-					"InnoDB: Doc ID "UINT64PF" is too"
+					"InnoDB: Doc ID " UINT64PF " is too"
 					" big. Its difference with largest"
-					" used Doc ID "UINT64PF" cannot"
+					" used Doc ID " UINT64PF " cannot"
 					" exceed or equal to %d\n",
 					doc_id, next_doc_id - 1,
 					FTS_DOC_ID_MAX_STEP);
diff --git a/storage/innobase/row/row0sel.cc b/storage/innobase/row/row0sel.cc
index 359ae3f2c21..e5a7694cb93 100644
--- a/storage/innobase/row/row0sel.cc
+++ b/storage/innobase/row/row0sel.cc
@@ -877,16 +877,15 @@ row_sel_get_clust_rec(
 
 	if (!node->read_view) {
 		/* Try to place a lock on the index record */
-
-		/* If innodb_locks_unsafe_for_binlog option is used
-		or this session is using READ COMMITTED isolation level
-		we lock only the record, i.e., next-key locking is
-		not used. */
 		ulint	lock_type;
 		trx_t*	trx;
 
 		trx = thr_get_trx(thr);
 
+		/* If innodb_locks_unsafe_for_binlog option is used
+		or this session is using READ COMMITTED or lower isolation level
+		we lock only the record, i.e., next-key locking is
+		not used. */
 		if (srv_locks_unsafe_for_binlog
 		    || trx->isolation_level <= TRX_ISO_READ_COMMITTED) {
 			lock_type = LOCK_REC_NOT_GAP;
@@ -1502,12 +1501,6 @@ rec_loop:
 		search result set, resulting in the phantom problem. */
 
 		if (!consistent_read) {
-
-			/* If innodb_locks_unsafe_for_binlog option is used
-			or this session is using READ COMMITTED isolation
-			level, we lock only the record, i.e., next-key
-			locking is not used. */
-
 			rec_t*	next_rec = page_rec_get_next(rec);
 			ulint	lock_type;
 			trx_t*	trx;
@@ -1517,6 +1510,10 @@ rec_loop:
 			offsets = rec_get_offsets(next_rec, index, offsets,
 						  ULINT_UNDEFINED, &heap);
 
+			/* If innodb_locks_unsafe_for_binlog option is used
+			or this session is using READ COMMITTED or lower isolation
+			level, we lock only the record, i.e., next-key
+			locking is not used. */
 			if (srv_locks_unsafe_for_binlog
 			    || trx->isolation_level
 			    <= TRX_ISO_READ_COMMITTED) {
@@ -1565,12 +1562,6 @@ skip_lock:
 
 	if (!consistent_read) {
 		/* Try to place a lock on the index record */
-
-		/* If innodb_locks_unsafe_for_binlog option is used
-		or this session is using READ COMMITTED isolation level,
-		we lock only the record, i.e., next-key locking is
-		not used. */
-
 		ulint	lock_type;
 		trx_t*	trx;
 
@@ -1579,6 +1570,10 @@ skip_lock:
 
 		trx = thr_get_trx(thr);
 
+		/* If innodb_locks_unsafe_for_binlog option is used
+		or this session is using READ COMMITTED or lower isolation level,
+		we lock only the record, i.e., next-key locking is
+		not used. */
 		if (srv_locks_unsafe_for_binlog
 		    || trx->isolation_level <= TRX_ISO_READ_COMMITTED) {
 
@@ -4227,7 +4222,7 @@ rec_loop:
 			/* Try to place a lock on the index record */
 
 			/* If innodb_locks_unsafe_for_binlog option is used
-			or this session is using a READ COMMITTED isolation
+			or this session is using a READ COMMITTED or lower isolation
 			level we do not lock gaps. Supremum record is really
 			a gap and therefore we do not set locks there. */
 
@@ -4369,7 +4364,7 @@ wrong_offs:
 				/* Try to place a gap lock on the index
 				record only if innodb_locks_unsafe_for_binlog
 				option is not set or this session is not
-				using a READ COMMITTED isolation level. */
+				using a READ COMMITTED or lower isolation level. */
 
 				err = sel_set_rec_lock(
 					btr_pcur_get_block(pcur),
@@ -4418,7 +4413,7 @@ wrong_offs:
 				/* Try to place a gap lock on the index
 				record only if innodb_locks_unsafe_for_binlog
 				option is not set or this session is not
-				using a READ COMMITTED isolation level. */
+				using a READ COMMITTED or lower isolation level. */
 
 				err = sel_set_rec_lock(
 					btr_pcur_get_block(pcur),
diff --git a/storage/innobase/srv/srv0mon.cc b/storage/innobase/srv/srv0mon.cc
index ea346566e57..64417b1e5fb 100644
--- a/storage/innobase/srv/srv0mon.cc
+++ b/storage/innobase/srv/srv0mon.cc
@@ -41,8 +41,8 @@ Created 12/9/2009 Jimmy Yang
 /* Macro to standardize the counter names for counters in the
 "monitor_buf_page" module as they have very structured defines */
 #define	MONITOR_BUF_PAGE(name, description, code, op, op_code)	\
-	{"buffer_page_"op"_"name, "buffer_page_io",		\
-	 "Number of "description" Pages "op,			\
+	{"buffer_page_" op "_" name, "buffer_page_io",		\
+	 "Number of " description " Pages " op,			\
 	 MONITOR_GROUP_MODULE, MONITOR_DEFAULT_START,		\
 	 MONITOR_##code##_##op_code}
 
diff --git a/storage/innobase/srv/srv0srv.cc b/storage/innobase/srv/srv0srv.cc
index 6a410285f2b..6e03f715f28 100644
--- a/storage/innobase/srv/srv0srv.cc
+++ b/storage/innobase/srv/srv0srv.cc
@@ -3,6 +3,7 @@
 Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2008, 2009 Google Inc.
 Copyright (c) 2009, Percona Inc.
+Copyright (c) 2013, 2014, SkySQL Ab. All Rights Reserved.
 
 Portions of this file contain modifications contributed and copyrighted by
 Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -473,6 +474,9 @@ current_time % 5 != 0. */
 #endif /* MEM_PERIODIC_CHECK */
 # define	SRV_MASTER_DICT_LRU_INTERVAL		(47)
 
+/** Simulate compression failures. */
+UNIV_INTERN uint srv_simulate_comp_failures = 0;
+
 /** Acquire the system_mutex. */
 #define srv_sys_mutex_enter() do {			\
 	mutex_enter(&srv_sys->mutex);			\
diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc
index 0c04fba421a..1c2bfcbd920 100644
--- a/storage/innobase/srv/srv0start.cc
+++ b/storage/innobase/srv/srv0start.cc
@@ -2197,9 +2197,9 @@ innobase_start_or_create_for_mysql(void)
 			} else if (size != srv_log_file_size) {
 				ib_logf(IB_LOG_LEVEL_ERROR,
 					"Log file %s is"
-					" of different size "UINT64PF" bytes"
+					" of different size " UINT64PF " bytes"
 					" than other log"
-					" files "UINT64PF" bytes!",
+					" files " UINT64PF " bytes!",
 					logfilename,
 					size << UNIV_PAGE_SIZE_SHIFT,
 					(os_offset_t) srv_log_file_size
diff --git a/storage/innobase/sync/sync0arr.cc b/storage/innobase/sync/sync0arr.cc
index 2cfb693f8ba..986010039f9 100644
--- a/storage/innobase/sync/sync0arr.cc
+++ b/storage/innobase/sync/sync0arr.cc
@@ -182,6 +182,33 @@ sync_array_get_nth_cell(
 }
 
 /******************************************************************//**
+Looks for a cell with the given thread id.
+@return	pointer to cell or NULL if not found */
+static
+sync_cell_t*
+sync_array_find_thread(
+/*===================*/
+	sync_array_t*	arr,	/*!< in: wait array */
+	os_thread_id_t	thread)	/*!< in: thread id */
+{
+	ulint		i;
+	sync_cell_t*	cell;
+
+	for (i = 0; i < arr->n_cells; i++) {
+
+		cell = sync_array_get_nth_cell(arr, i);
+
+		if (cell->wait_object != NULL
+		    && os_thread_eq(cell->thread, thread)) {
+
+			return(cell);	/* Found */
+		}
+	}
+
+	return(NULL);	/* Not found */
+}
+
+/******************************************************************//**
 Reserves the mutex semaphore protecting a sync array. */
 static
 void
@@ -432,8 +459,10 @@ static
 void
 sync_array_cell_print(
 /*==================*/
-	FILE*		file,	/*!< in: file where to print */
-	sync_cell_t*	cell)	/*!< in: sync cell */
+	FILE*		file,		/*!< in: file where to print */
+	sync_cell_t*	cell,		/*!< in: sync cell */
+	os_thread_id_t* reserver)	/*!< out: write reserver or
+					0 */
 {
 	ib_mutex_t*	mutex;
 	rw_lock_t*	rwlock;
@@ -454,19 +483,21 @@ sync_array_cell_print(
 		been freed meanwhile */
 		mutex = cell->old_wait_mutex;
 
-		fprintf(file,
-			"Mutex at %p created file %s line %lu, lock var %lu\n"
+		if (mutex) {
+			fprintf(file,
+				"Mutex at %p created file %s line %lu, lock var %lu\n"
 #ifdef UNIV_SYNC_DEBUG
-			"Last time reserved in file %s line %lu, "
+				"Last time reserved in file %s line %lu, "
 #endif /* UNIV_SYNC_DEBUG */
-			"waiters flag %lu\n",
-			(void*) mutex, innobase_basename(mutex->cfile_name),
-			(ulong) mutex->cline,
-			(ulong) mutex->lock_word,
+				"waiters flag %lu\n",
+				(void*) mutex, innobase_basename(mutex->cfile_name),
+				(ulong) mutex->cline,
+				(ulong) mutex->lock_word,
 #ifdef UNIV_SYNC_DEBUG
-			mutex->file_name, (ulong) mutex->line,
+				mutex->file_name, (ulong) mutex->line,
 #endif /* UNIV_SYNC_DEBUG */
-			(ulong) mutex->waiters);
+				(ulong) mutex->waiters);
+		}
 
 	} else if (type == RW_LOCK_EX
 		   || type == RW_LOCK_WAIT_EX
@@ -478,33 +509,36 @@ sync_array_cell_print(
 
 		rwlock = cell->old_wait_rw_lock;
 
-		fprintf(file,
-			" RW-latch at %p created in file %s line %lu\n",
-			(void*) rwlock, innobase_basename(rwlock->cfile_name),
-			(ulong) rwlock->cline);
-		writer = rw_lock_get_writer(rwlock);
-		if (writer != RW_LOCK_NOT_LOCKED) {
+		if (rwlock) {
 			fprintf(file,
-				"a writer (thread id %lu) has"
-				" reserved it in mode %s",
-				(ulong) os_thread_pf(rwlock->writer_thread),
-				writer == RW_LOCK_EX
-				? " exclusive\n"
-				: " wait exclusive\n");
-		}
+				" RW-latch at %p created in file %s line %lu\n",
+				(void*) rwlock, innobase_basename(rwlock->cfile_name),
+				(ulong) rwlock->cline);
+			writer = rw_lock_get_writer(rwlock);
+			if (writer != RW_LOCK_NOT_LOCKED) {
+				fprintf(file,
+					"a writer (thread id %lu) has"
+					" reserved it in mode %s",
+					(ulong) os_thread_pf(rwlock->writer_thread),
+					writer == RW_LOCK_EX
+					? " exclusive\n"
+					: " wait exclusive\n");
+				*reserver = rwlock->writer_thread;
+			}
 
-		fprintf(file,
-			"number of readers %lu, waiters flag %lu, "
-                        "lock_word: %lx\n"
-			"Last time read locked in file %s line %lu\n"
-			"Last time write locked in file %s line %lu\n",
-			(ulong) rw_lock_get_reader_count(rwlock),
-			(ulong) rwlock->waiters,
-			rwlock->lock_word,
-			innobase_basename(rwlock->last_s_file_name),
-			(ulong) rwlock->last_s_line,
-			rwlock->last_x_file_name,
-			(ulong) rwlock->last_x_line);
+			fprintf(file,
+				"number of readers %lu, waiters flag %lu, "
+				"lock_word: %lx\n"
+				"Last time read locked in file %s line %lu\n"
+				"Last time write locked in file %s line %lu\n",
+				(ulong) rw_lock_get_reader_count(rwlock),
+				(ulong) rwlock->waiters,
+				rwlock->lock_word,
+				innobase_basename(rwlock->last_s_file_name),
+				(ulong) rwlock->last_s_line,
+				rwlock->last_x_file_name,
+				(ulong) rwlock->last_x_line);
+		}
 	} else {
 		ut_error;
 	}
@@ -515,32 +549,6 @@ sync_array_cell_print(
 }
 
 #ifdef UNIV_SYNC_DEBUG
-/******************************************************************//**
-Looks for a cell with the given thread id.
-@return	pointer to cell or NULL if not found */
-static
-sync_cell_t*
-sync_array_find_thread(
-/*===================*/
-	sync_array_t*	arr,	/*!< in: wait array */
-	os_thread_id_t	thread)	/*!< in: thread id */
-{
-	ulint		i;
-	sync_cell_t*	cell;
-
-	for (i = 0; i < arr->n_cells; i++) {
-
-		cell = sync_array_get_nth_cell(arr, i);
-
-		if (cell->wait_object != NULL
-		    && os_thread_eq(cell->thread, thread)) {
-
-			return(cell);	/* Found */
-		}
-	}
-
-	return(NULL);	/* Not found */
-}
 
 /******************************************************************//**
 Recursion step for deadlock detection.
@@ -602,6 +610,7 @@ sync_array_detect_deadlock(
 	os_thread_id_t	thread;
 	ibool		ret;
 	rw_lock_debug_t*debug;
+	os_thread_id_t	reserver=0;
 
 	ut_a(arr);
 	ut_a(start);
@@ -637,10 +646,10 @@ sync_array_detect_deadlock(
 						       depth);
 			if (ret) {
 				fprintf(stderr,
-			"Mutex %p owned by thread %lu file %s line %lu\n",
+					"Mutex %p owned by thread %lu file %s line %lu\n",
 					mutex, (ulong) os_thread_pf(mutex->thread_id),
 					mutex->file_name, (ulong) mutex->line);
-				sync_array_cell_print(stderr, cell);
+				sync_array_cell_print(stderr, cell, &reserver);
 
 				return(TRUE);
 			}
@@ -678,7 +687,7 @@ sync_array_detect_deadlock(
 print:
 					fprintf(stderr, "rw-lock %p ",
 						(void*) lock);
-					sync_array_cell_print(stderr, cell);
+					sync_array_cell_print(stderr, cell, &reserver);
 					rw_lock_debug_print(stderr, debug);
 					return(TRUE);
 				}
@@ -921,6 +930,7 @@ sync_array_print_long_waits_low(
 		double		diff;
 		sync_cell_t*	cell;
 		void*		wait_object;
+		os_thread_id_t reserver=0;
 
 		cell = sync_array_get_nth_cell(arr, i);
 
@@ -936,7 +946,7 @@ sync_array_print_long_waits_low(
 		if (diff > SYNC_ARRAY_TIMEOUT) {
 			fputs("InnoDB: Warning: a long semaphore wait:\n",
 			      stderr);
-			sync_array_cell_print(stderr, cell);
+			sync_array_cell_print(stderr, cell, &reserver);
 			*noticed = TRUE;
 		}
 
@@ -951,6 +961,60 @@ sync_array_print_long_waits_low(
 		}
 	}
 
+	/* We found a long semaphore wait, wait all threads that are
+	waiting for a semaphore. */
+	if (*noticed) {
+		for (i = 0; i < arr->n_cells; i++) {
+			void*	wait_object;
+			os_thread_id_t reserver=(os_thread_id_t)ULINT_UNDEFINED;
+			sync_cell_t*	cell;
+			ulint loop = 0;
+
+			cell = sync_array_get_nth_cell(arr, i);
+
+			wait_object = cell->wait_object;
+
+			if (wait_object == NULL || !cell->waiting) {
+
+				continue;
+			}
+
+			fputs("InnoDB: Warning: semaphore wait:\n",
+			      stderr);
+			sync_array_cell_print(stderr, cell, &reserver);
+
+			/* Try to output cell information for writer recursive way */
+			while (reserver != (os_thread_id_t)ULINT_UNDEFINED) {
+				sync_cell_t* reserver_wait;
+
+				reserver_wait = sync_array_find_thread(arr, reserver);
+
+				if (reserver_wait &&
+					reserver_wait->wait_object != NULL &&
+					reserver_wait->waiting) {
+					fputs("InnoDB: Warning: Writer thread is waiting this semaphore:\n",
+						stderr);
+					reserver = (os_thread_id_t)ULINT_UNDEFINED;
+					sync_array_cell_print(stderr, reserver_wait, &reserver);
+					loop++;
+
+					if (reserver_wait->thread == reserver) {
+						reserver = (os_thread_id_t)ULINT_UNDEFINED;
+					}
+				} else {
+					reserver = (os_thread_id_t)ULINT_UNDEFINED;
+				}
+
+				/* This is protection against loop */
+				if (loop > 100) {
+					fputs("InnoDB: Warning: Too many waiting threads.\n", stderr);
+					break;
+				}
+
+			}
+		}
+	}
+
 #undef SYNC_ARRAY_TIMEOUT
 
 	return(fatal);
@@ -1030,6 +1094,7 @@ sync_array_print_info_low(
 {
 	ulint		i;
 	ulint		count = 0;
+	os_thread_id_t	r = 0;
 
 	fprintf(file,
 		"OS WAIT ARRAY INFO: reservation count %ld\n",
@@ -1042,7 +1107,7 @@ sync_array_print_info_low(
 
 		if (cell->wait_object != NULL) {
 			count++;
-			sync_array_cell_print(file, cell);
+			sync_array_cell_print(file, cell, &r);
 		}
 	}
 }
diff --git a/storage/innobase/sync/sync0rw.cc b/storage/innobase/sync/sync0rw.cc
index ebf73917702..e129d39fc9d 100644
--- a/storage/innobase/sync/sync0rw.cc
+++ b/storage/innobase/sync/sync0rw.cc
@@ -151,18 +151,12 @@ UNIV_INTERN mysql_pfs_key_t	rw_lock_mutex_key;
 To modify the debug info list of an rw-lock, this mutex has to be
 acquired in addition to the mutex protecting the lock. */
 
-UNIV_INTERN ib_mutex_t		rw_lock_debug_mutex;
+UNIV_INTERN os_fast_mutex_t	rw_lock_debug_mutex;
 
 # ifdef UNIV_PFS_MUTEX
 UNIV_INTERN mysql_pfs_key_t	rw_lock_debug_mutex_key;
 # endif
 
-/* If deadlock detection does not get immediately the mutex,
-it may wait for this event */
-UNIV_INTERN os_event_t		rw_lock_debug_event;
-/* This is set to TRUE, if there may be waiters for the event */
-UNIV_INTERN ibool		rw_lock_debug_waiters;
-
 /******************************************************************//**
 Creates a debug info struct. */
 static
@@ -690,22 +684,7 @@ void
 rw_lock_debug_mutex_enter(void)
 /*===========================*/
 {
-loop:
-	if (0 == mutex_enter_nowait(&rw_lock_debug_mutex)) {
-		return;
-	}
-
-	os_event_reset(rw_lock_debug_event);
-
-	rw_lock_debug_waiters = TRUE;
-
-	if (0 == mutex_enter_nowait(&rw_lock_debug_mutex)) {
-		return;
-	}
-
-	os_event_wait(rw_lock_debug_event);
-
-	goto loop;
+	os_fast_mutex_lock(&rw_lock_debug_mutex);
 }
 
 /******************************************************************//**
@@ -715,12 +694,7 @@ void
 rw_lock_debug_mutex_exit(void)
 /*==========================*/
 {
-	mutex_exit(&rw_lock_debug_mutex);
-
-	if (rw_lock_debug_waiters) {
-		rw_lock_debug_waiters = FALSE;
-		os_event_set(rw_lock_debug_event);
-	}
+	os_fast_mutex_unlock(&rw_lock_debug_mutex);
 }
 
 /******************************************************************//**
diff --git a/storage/innobase/sync/sync0sync.cc b/storage/innobase/sync/sync0sync.cc
index 5ef8a02fb3f..54018471abc 100644
--- a/storage/innobase/sync/sync0sync.cc
+++ b/storage/innobase/sync/sync0sync.cc
@@ -1472,11 +1472,7 @@ sync_init(void)
 		     SYNC_NO_ORDER_CHECK);
 
 #ifdef UNIV_SYNC_DEBUG
-	mutex_create(rw_lock_debug_mutex_key, &rw_lock_debug_mutex,
-		     SYNC_NO_ORDER_CHECK);
-
-	rw_lock_debug_event = os_event_create();
-	rw_lock_debug_waiters = FALSE;
+	os_fast_mutex_init(rw_lock_debug_mutex_key, &rw_lock_debug_mutex);
 #endif /* UNIV_SYNC_DEBUG */
 }
 
@@ -1544,6 +1540,7 @@ sync_close(void)
 	sync_order_checks_on = FALSE;
 
 	sync_thread_level_arrays_free();
+	os_fast_mutex_free(&rw_lock_debug_mutex);
 #endif /* UNIV_SYNC_DEBUG */
 
 	sync_initialized = FALSE;
@@ -1558,12 +1555,12 @@ sync_print_wait_info(
 	FILE*	file)		/*!< in: file where to print */
 {
 	fprintf(file,
-		"Mutex spin waits "UINT64PF", rounds "UINT64PF", "
-		"OS waits "UINT64PF"\n"
-		"RW-shared spins "UINT64PF", rounds "UINT64PF", "
-		"OS waits "UINT64PF"\n"
-		"RW-excl spins "UINT64PF", rounds "UINT64PF", "
-		"OS waits "UINT64PF"\n",
+		"Mutex spin waits " UINT64PF ", rounds " UINT64PF ", "
+		"OS waits " UINT64PF "\n"
+		"RW-shared spins " UINT64PF ", rounds " UINT64PF ", "
+		"OS waits " UINT64PF "\n"
+		"RW-excl spins " UINT64PF ", rounds " UINT64PF ", "
+		"OS waits " UINT64PF "\n",
 		(ib_uint64_t) mutex_spin_wait_count,
 		(ib_uint64_t) mutex_spin_round_count,
 		(ib_uint64_t) mutex_os_wait_count,
diff --git a/storage/innobase/trx/trx0i_s.cc b/storage/innobase/trx/trx0i_s.cc
index f6360562ae7..01ccfb8a6d0 100644
--- a/storage/innobase/trx/trx0i_s.cc
+++ b/storage/innobase/trx/trx0i_s.cc
@@ -1639,7 +1639,7 @@ trx_i_s_create_lock_id(
 	} else {
 		/* table lock */
 		res_len = ut_snprintf(lock_id, lock_id_size,
-				      TRX_ID_FMT":"UINT64PF,
+				      TRX_ID_FMT":" UINT64PF,
 				      row->lock_trx_id,
 				      row->lock_table_id);
 	}
diff --git a/storage/innobase/trx/trx0trx.cc b/storage/innobase/trx/trx0trx.cc
index 38b21d8d428..272f8377f68 100644
--- a/storage/innobase/trx/trx0trx.cc
+++ b/storage/innobase/trx/trx0trx.cc
@@ -50,6 +50,9 @@ Created 3/26/1996 Heikki Tuuri
 
 #include<set>
 
+extern "C"
+int thd_deadlock_victim_preference(const MYSQL_THD thd1, const MYSQL_THD thd2);
+
 /** Set of table_id */
 typedef std::set<table_id_t>	table_id_set;
 
@@ -1833,7 +1836,7 @@ state_ok:
 
 	if (trx->undo_no != 0) {
 		newline = TRUE;
-		fprintf(f, ", undo log entries "TRX_ID_FMT, trx->undo_no);
+		fprintf(f, ", undo log entries " TRX_ID_FMT, trx->undo_no);
 	}
 
 	if (newline) {
@@ -1936,9 +1939,8 @@ trx_assert_started(
 #endif /* UNIV_DEBUG */
 
 /*******************************************************************//**
-Compares the "weight" (or size) of two transactions. Transactions that
-have edited non-transactional tables are considered heavier than ones
-that have not.
+Compares the "weight" (or size) of two transactions. The heavier the weight,
+the more reluctant we will be to choose the transaction as a deadlock victim.
 @return	TRUE if weight(a) >= weight(b) */
 UNIV_INTERN
 ibool
@@ -1947,26 +1949,19 @@ trx_weight_ge(
 	const trx_t*	a,	/*!< in: the first transaction to be compared */
 	const trx_t*	b)	/*!< in: the second transaction to be compared */
 {
-	ibool	a_notrans_edit;
-	ibool	b_notrans_edit;
-
-	/* If mysql_thd is NULL for a transaction we assume that it has
-	not edited non-transactional tables. */
-
-	a_notrans_edit = a->mysql_thd != NULL
-		&& thd_has_edited_nontrans_tables(a->mysql_thd);
-
-	b_notrans_edit = b->mysql_thd != NULL
-		&& thd_has_edited_nontrans_tables(b->mysql_thd);
-
-	if (a_notrans_edit != b_notrans_edit) {
+	int pref;
 
-		return(a_notrans_edit);
+	/* First ask the upper server layer if it has any preference for which
+	to prefer as a deadlock victim. */
+	pref= thd_deadlock_victim_preference(a->mysql_thd, b->mysql_thd);
+	if (pref < 0) {
+		return FALSE;
+	} else if (pref > 0) {
+		return TRUE;
 	}
 
-	/* Either both had edited non-transactional tables or both had
-	not, we fall back to comparing the number of altered/locked
-	rows. */
+	/* Upper server layer had no preference, we fall back to comparing the
+	number of altered/locked rows. */
 
 #if 0
 	fprintf(stderr,
@@ -2133,7 +2128,7 @@ trx_recover_for_mysql(
 			ut_print_timestamp(stderr);
 			fprintf(stderr,
 				"  InnoDB: Transaction contains changes"
-				" to "TRX_ID_FMT" rows\n",
+				" to " TRX_ID_FMT " rows\n",
 				trx->undo_no);
 
 			count++;
author	Nirbhay Choubey <nirbhay@skysql.com>	2014-08-11 23:55:41 -0400
committer	Nirbhay Choubey <nirbhay@skysql.com>	2014-08-11 23:55:41 -0400
commit	8358dd53b7406deaa9f50ad09b16a86b7e367632 (patch)
tree	ef8995ad0e400cb6a1842649c3c886c7b3474835 /storage/innobase
parent	e06e12f5b8dfe0ab2e5976eec1b27b25d318441b (diff)
parent	4105cbf4a230c82ea7dee31d4d2262b798fad9f4 (diff)
download	mariadb-git-8358dd53b7406deaa9f50ad09b16a86b7e367632.tar.gz