1 files changed, 122 insertions, 68 deletions
diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc
index d74b41b0e8e..c2a9cbcfa16 100644
--- a/storage/innobase/buf/buf0flu.cc
+++ b/storage/innobase/buf/buf0flu.cc
@@ -870,8 +870,9 @@ static void buf_tmp_reserve_compression_buf(buf_tmp_buffer_t* slot)
 static byte* buf_tmp_page_encrypt(ulint offset, const byte* s, byte* d)
 {
   /* Calculate the start offset in a page */
-  uint srclen= srv_page_size - (FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION +
-                                FIL_PAGE_FCRC32_CHECKSUM);
+  uint srclen= static_cast<uint>(srv_page_size) -
+    (FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION +
+     FIL_PAGE_FCRC32_CHECKSUM);
   const byte* src= s + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION;
   byte* dst= d + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION;
 
@@ -897,6 +898,10 @@ a page is written to disk.
 (may be src_frame or an encrypted/compressed copy of it) */
 static byte* buf_page_encrypt(fil_space_t* space, buf_page_t* bpage, byte* s)
 {
+  if (bpage->status == buf_page_t::FREED) {
+	return s;
+  }
+
   ut_ad(space->id == bpage->id.space());
   bpage->real_size = srv_page_size;
 
@@ -1022,6 +1027,57 @@ not_compressed:
   return d;
 }
 
+/** The following function deals with freed page during flushing.
+     i)  Writing zeros to the file asynchronously if scrubbing is enabled
+     ii) Punch the hole to the file synchoronously if page_compressed is
+         enabled for the tablespace
+This function also resets the IO_FIX to IO_NONE and making the
+page status as NORMAL. It initiates the write to the file only after
+releasing the page from flush list and its associated mutex.
+@param[in,out]	bpage	freed buffer page
+@param[in]	space	tablespace object of the freed page */
+static void buf_flush_freed_page(buf_page_t *bpage, fil_space_t *space)
+{
+  ut_ad(buf_page_in_file(bpage));
+  const bool uncompressed= buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE;
+  BPageMutex *block_mutex= uncompressed
+    ? &reinterpret_cast<buf_block_t*>(bpage)->mutex
+    : &buf_pool->zip_mutex;
+
+  mutex_enter(&buf_pool->mutex);
+  mutex_enter(block_mutex);
+
+  buf_page_set_io_fix(bpage, BUF_IO_NONE);
+  bpage->status= buf_page_t::NORMAL;
+  buf_flush_write_complete(bpage, false);
+
+  if (uncompressed)
+    rw_lock_sx_unlock_gen(&reinterpret_cast<buf_block_t*>(bpage)->lock,
+			  BUF_IO_WRITE);
+
+  buf_pool->stat.n_pages_written++;
+  mutex_exit(&buf_pool->mutex);
+  const page_id_t page_id(bpage->id);
+  const auto zip_size= bpage->zip_size();
+  mutex_exit(block_mutex);
+
+  const bool punch_hole=
+#if defined(HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE) || defined(_WIN32)
+    space->is_compressed() ||
+#endif
+    false;
+
+  ut_ad(space->id == page_id.space());
+  ut_ad(space->zip_size() == zip_size);
+
+  if (punch_hole || srv_immediate_scrub_data_uncompressed)
+    fil_io(IORequestWrite, punch_hole, page_id, zip_size, 0,
+           zip_size ? zip_size : srv_page_size,
+           const_cast<byte*>(field_ref_zero), nullptr, false, punch_hole);
+
+  space->release_for_io();
+}
+
 /********************************************************************//**
 Does an asynchronous write of a buffer page. NOTE: when the
 doublewrite buffer is used, we must call
@@ -1084,6 +1140,12 @@ buf_flush_write_block_low(
 			frame = ((buf_block_t*) bpage)->frame;
 		}
 
+		/* Skip the encryption and compression for the
+		freed page */
+		if (bpage->status == buf_page_t::FREED) {
+			break;
+		}
+
 		byte* page = reinterpret_cast<const buf_block_t*>(bpage)->frame;
 
 		if (full_crc32) {
@@ -1111,8 +1173,13 @@ buf_flush_write_block_low(
 		ut_ad(space->atomic_write_supported);
 	}
 
-	const bool use_doublewrite = !bpage->init_on_flush
-		&& space->use_doublewrite();
+	if (bpage->status == buf_page_t::FREED) {
+		buf_flush_freed_page(bpage, space);
+		return;
+	}
+
+	const bool use_doublewrite = bpage->status != buf_page_t::INIT_ON_FLUSH
+			&& space->use_doublewrite();
 
 	if (!use_doublewrite) {
 		ulint	type = IORequest::WRITE;
@@ -1191,17 +1258,14 @@ bool buf_flush_page(buf_page_t* bpage, buf_flush_t flush_type, bool sync)
 
 	ut_ad(buf_flush_ready_for_flush(bpage, flush_type));
 
-	bool	is_uncompressed;
-
-	is_uncompressed = (buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE);
+	bool	is_uncompressed = (buf_page_get_state(bpage)
+				   == BUF_BLOCK_FILE_PAGE);
 	ut_ad(is_uncompressed == (block_mutex != &buf_pool->zip_mutex));
 
-	ibool		flush;
 	rw_lock_t*	rw_lock;
 	bool		no_fix_count = bpage->buf_fix_count == 0;
 
 	if (!is_uncompressed) {
-		flush = TRUE;
 		rw_lock = NULL;
 	} else if (!(no_fix_count || flush_type == BUF_FLUSH_LIST)
 		   || (!no_fix_count
@@ -1211,61 +1275,55 @@ bool buf_flush_page(buf_page_t* bpage, buf_flush_t flush_type, bool sync)
 		/* For table residing in temporary tablespace sync is done
 		using IO_FIX and so before scheduling for flush ensure that
 		page is not fixed. */
-		flush = FALSE;
+		return false;
 	} else {
 		rw_lock = &reinterpret_cast<buf_block_t*>(bpage)->lock;
-		if (flush_type != BUF_FLUSH_LIST) {
-			flush = rw_lock_sx_lock_nowait(rw_lock, BUF_IO_WRITE);
-		} else {
-			/* Will SX lock later */
-			flush = TRUE;
+		if (flush_type != BUF_FLUSH_LIST
+		    && !rw_lock_sx_lock_nowait(rw_lock, BUF_IO_WRITE)) {
+			return false;
 		}
 	}
 
-	if (flush) {
-
-		/* We are committed to flushing by the time we get here */
+	/* We are committed to flushing by the time we get here */
 
-		buf_page_set_io_fix(bpage, BUF_IO_WRITE);
+	buf_page_set_io_fix(bpage, BUF_IO_WRITE);
 
-		buf_page_set_flush_type(bpage, flush_type);
+	buf_page_set_flush_type(bpage, flush_type);
 
-		if (buf_pool->n_flush[flush_type] == 0) {
-			os_event_reset(buf_pool->no_flush[flush_type]);
-		}
-
-		++buf_pool->n_flush[flush_type];
-		ut_ad(buf_pool->n_flush[flush_type] != 0);
+	if (buf_pool->n_flush[flush_type] == 0) {
+		os_event_reset(buf_pool->no_flush[flush_type]);
+	}
 
-		mutex_exit(block_mutex);
+	++buf_pool->n_flush[flush_type];
+	ut_ad(buf_pool->n_flush[flush_type] != 0);
 
-		mutex_exit(&buf_pool->mutex);
+	mutex_exit(block_mutex);
 
-		if (flush_type == BUF_FLUSH_LIST
-		    && is_uncompressed
-		    && !rw_lock_sx_lock_nowait(rw_lock, BUF_IO_WRITE)) {
+	mutex_exit(&buf_pool->mutex);
 
-			if (!fsp_is_system_temporary(bpage->id.space())) {
-				/* avoiding deadlock possibility involves
-				doublewrite buffer, should flush it, because
-				it might hold the another block->lock. */
-				buf_dblwr_flush_buffered_writes();
-			} else {
-				buf_dblwr_sync_datafiles();
-			}
+	if (flush_type == BUF_FLUSH_LIST
+	    && is_uncompressed
+	    && !rw_lock_sx_lock_nowait(rw_lock, BUF_IO_WRITE)) {
 
-			rw_lock_sx_lock_gen(rw_lock, BUF_IO_WRITE);
+		if (!fsp_is_system_temporary(bpage->id.space())) {
+			/* avoiding deadlock possibility involves
+			doublewrite buffer, should flush it, because
+			it might hold the another block->lock. */
+			buf_dblwr_flush_buffered_writes();
+		} else {
+			buf_dblwr_sync_datafiles();
 		}
 
-		/* Even though bpage is not protected by any mutex at this
-		point, it is safe to access bpage, because it is io_fixed and
-		oldest_modification != 0.  Thus, it cannot be relocated in the
-		buffer pool or removed from flush_list or LRU_list. */
-
-		buf_flush_write_block_low(bpage, flush_type, sync);
+		rw_lock_sx_lock_gen(rw_lock, BUF_IO_WRITE);
 	}
 
-	return(flush);
+	/* Even though bpage is not protected by any mutex at this
+	point, it is safe to access bpage, because it is io_fixed and
+	oldest_modification != 0.  Thus, it cannot be relocated in the
+	buffer pool or removed from flush_list or LRU_list. */
+
+	buf_flush_write_block_low(bpage, flush_type, sync);
+	return true;
 }
 
 # if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
@@ -2183,9 +2241,9 @@ af_get_pct_for_dirty()
 	/* 1 + is there to avoid division by zero (in case the buffer
 	pool (including the flush_list) was emptied while we are
 	looking at it) */
-	double	dirty_pct = double(100 * dirty)
-		/ (1 + UT_LIST_GET_LEN(buf_pool->LRU)
-		   + UT_LIST_GET_LEN(buf_pool->free));
+	double	dirty_pct = 100 * static_cast<double>(dirty)
+		/ static_cast<double>(1 + UT_LIST_GET_LEN(buf_pool->LRU)
+				      + UT_LIST_GET_LEN(buf_pool->free));
 
 	ut_a(srv_max_dirty_pages_pct_lwm
 	     <= srv_max_buf_pool_modified_pct);
@@ -2219,8 +2277,9 @@ af_get_pct_for_lsn(
 {
 	lsn_t	max_async_age;
 	lsn_t	lsn_age_factor;
-	lsn_t	af_lwm = (lsn_t) ((srv_adaptive_flushing_lwm
-			* log_get_capacity()) / 100);
+	lsn_t	af_lwm = static_cast<lsn_t>(
+		srv_adaptive_flushing_lwm
+		* static_cast<double>(log_get_capacity()) / 100);
 
 	if (age < af_lwm) {
 		/* No adaptive flushing. */
@@ -2242,10 +2301,11 @@ af_get_pct_for_lsn(
 	lsn_age_factor = (age * 100) / max_async_age;
 
 	ut_ad(srv_max_io_capacity >= srv_io_capacity);
-	return(static_cast<ulint>(
-		((srv_max_io_capacity / srv_io_capacity)
-		* (lsn_age_factor * sqrt((double)lsn_age_factor)))
-		/ 7.5));
+	return static_cast<ulint>(
+		(static_cast<double>(srv_max_io_capacity / srv_io_capacity
+				     * lsn_age_factor)
+		 * sqrt(static_cast<double>(lsn_age_factor))
+		 / 7.5));
 }
 
 /*********************************************************************//**
@@ -2273,14 +2333,7 @@ page_cleaner_flush_pages_recommendation(ulint last_pages_in)
 	ulint			pct_for_lsn = 0;
 	ulint			pct_total = 0;
 
-	cur_lsn = log_get_lsn_nowait();
-
-	/* log_get_lsn_nowait tries to get log_sys.mutex with
-	mutex_enter_nowait, if this does not succeed function
-	returns 0, do not use that value to update stats. */
-	if (cur_lsn == 0) {
-		return(0);
-	}
+	cur_lsn = log_sys.get_lsn();
 
 	if (prev_lsn == 0) {
 		/* First time around. */
@@ -2301,7 +2354,7 @@ page_cleaner_flush_pages_recommendation(ulint last_pages_in)
 	/* We update our variables every srv_flushing_avg_loops
 	iterations to smooth out transition in workload. */
 	if (++n_iterations >= srv_flushing_avg_loops
-	    || time_elapsed >= srv_flushing_avg_loops) {
+	    || time_elapsed >= static_cast<double>(srv_flushing_avg_loops)) {
 
 		if (time_elapsed < 1) {
 			time_elapsed = 1;
@@ -2310,7 +2363,7 @@ page_cleaner_flush_pages_recommendation(ulint last_pages_in)
 		avg_page_rate = static_cast<ulint>(
 			((static_cast<double>(sum_pages)
 			  / time_elapsed)
-			 + avg_page_rate) / 2);
+			 + static_cast<double>(avg_page_rate)) / 2);
 
 		/* How much LSN we have generated since last call. */
 		lsn_rate = static_cast<lsn_t>(
@@ -2431,7 +2484,8 @@ page_cleaner_flush_pages_recommendation(ulint last_pages_in)
 	pages_for_lsn = std::min<ulint>(
 		pages_for_lsn, srv_max_io_capacity * 2);
 
-	n_pages = (PCT_IO(pct_total) + avg_page_rate + pages_for_lsn) / 3;
+	n_pages = (ulint(double(srv_io_capacity) * double(pct_total) / 100.0)
+		   + avg_page_rate + pages_for_lsn) / 3;
 
 	if (n_pages > srv_max_io_capacity) {
 		n_pages = srv_max_io_capacity;
@@ -2939,7 +2993,7 @@ static os_thread_ret_t DECLARE_THREAD(buf_flush_page_cleaner)(void*)
 
 		} else if (ret_sleep == OS_SYNC_TIME_EXCEEDED) {
 			/* no activity, slept enough */
-			buf_flush_lists(PCT_IO(100), LSN_MAX, &n_flushed);
+			buf_flush_lists(srv_io_capacity, LSN_MAX, &n_flushed);
 
 			n_flushed_last += n_flushed;