summaryrefslogtreecommitdiff
path: root/storage/innobase
diff options
context:
space:
mode:
Diffstat (limited to 'storage/innobase')
-rw-r--r--storage/innobase/btr/btr0btr.cc2
-rw-r--r--storage/innobase/btr/btr0cur.cc4
-rw-r--r--storage/innobase/buf/buf0buf.cc14
-rw-r--r--storage/innobase/buf/buf0dblwr.cc43
-rw-r--r--storage/innobase/buf/buf0dump.cc2
-rw-r--r--storage/innobase/buf/buf0flu.cc133
-rw-r--r--storage/innobase/buf/buf0lru.cc15
-rw-r--r--storage/innobase/buf/buf0rea.cc25
-rw-r--r--storage/innobase/dict/drop.cc1
-rw-r--r--storage/innobase/fil/fil0fil.cc75
-rw-r--r--storage/innobase/fsp/fsp0file.cc2
-rw-r--r--storage/innobase/fsp/fsp0space.cc24
-rw-r--r--storage/innobase/fsp/fsp0sysspace.cc12
-rw-r--r--storage/innobase/fts/fts0fts.cc325
-rw-r--r--storage/innobase/fts/fts0opt.cc56
-rw-r--r--storage/innobase/gis/gis0rtree.cc3
-rw-r--r--storage/innobase/handler/ha_innodb.cc93
-rw-r--r--storage/innobase/handler/handler0alter.cc8
-rw-r--r--storage/innobase/handler/i_s.cc8
-rw-r--r--storage/innobase/include/buf0buf.h16
-rw-r--r--storage/innobase/include/buf0dblwr.h45
-rw-r--r--storage/innobase/include/fil0fil.h4
-rw-r--r--storage/innobase/include/fts0fts.h9
-rw-r--r--storage/innobase/include/fts0types.h32
-rw-r--r--storage/innobase/include/ha_prototypes.h10
-rw-r--r--storage/innobase/include/log0log.h10
-rw-r--r--storage/innobase/include/mtr0mtr.h14
-rw-r--r--storage/innobase/include/os0file.h32
-rw-r--r--storage/innobase/include/srv0srv.h3
-rw-r--r--storage/innobase/include/trx0purge.h7
-rw-r--r--storage/innobase/include/trx0rseg.h3
-rw-r--r--storage/innobase/include/trx0undo.h10
-rw-r--r--storage/innobase/log/log0log.cc10
-rw-r--r--storage/innobase/log/log0recv.cc43
-rw-r--r--storage/innobase/mtr/mtr0mtr.cc12
-rw-r--r--storage/innobase/os/os0file.cc115
-rw-r--r--storage/innobase/page/page0zip.cc5
-rw-r--r--storage/innobase/rem/rem0cmp.cc5
-rw-r--r--storage/innobase/rem/rem0rec.cc10
-rw-r--r--storage/innobase/row/row0import.cc1
-rw-r--r--storage/innobase/row/row0ins.cc48
-rw-r--r--storage/innobase/row/row0merge.cc5
-rw-r--r--storage/innobase/row/row0mysql.cc26
-rw-r--r--storage/innobase/row/row0purge.cc5
-rw-r--r--storage/innobase/row/row0quiesce.cc2
-rw-r--r--storage/innobase/row/row0uins.cc5
-rw-r--r--storage/innobase/row/row0vers.cc28
-rw-r--r--storage/innobase/srv/srv0mon.cc40
-rw-r--r--storage/innobase/srv/srv0srv.cc34
-rw-r--r--storage/innobase/srv/srv0start.cc39
-rw-r--r--storage/innobase/trx/trx0purge.cc307
-rw-r--r--storage/innobase/trx/trx0rec.cc10
-rw-r--r--storage/innobase/trx/trx0rseg.cc3
-rw-r--r--storage/innobase/trx/trx0trx.cc27
-rw-r--r--storage/innobase/trx/trx0undo.cc120
55 files changed, 1116 insertions, 824 deletions
diff --git a/storage/innobase/btr/btr0btr.cc b/storage/innobase/btr/btr0btr.cc
index 491e987cac6..06b785b8b39 100644
--- a/storage/innobase/btr/btr0btr.cc
+++ b/storage/innobase/btr/btr0btr.cc
@@ -4463,7 +4463,7 @@ n_field_mismatch:
len -= BTR_EXTERN_FIELD_REF_SIZE;
ulint extern_len = mach_read_from_4(
data + len + BTR_EXTERN_LEN + 4);
- if (fixed_size == extern_len) {
+ if (fixed_size == extern_len + len) {
goto next_field;
}
}
diff --git a/storage/innobase/btr/btr0cur.cc b/storage/innobase/btr/btr0cur.cc
index 7ecbe165a1e..f72915bce88 100644
--- a/storage/innobase/btr/btr0cur.cc
+++ b/storage/innobase/btr/btr0cur.cc
@@ -1017,7 +1017,7 @@ dberr_t btr_cur_t::search_leaf(const dtuple_t *tuple, page_cur_mode_t mode,
if (lock_intention == BTR_INTENTION_DELETE)
{
compress_limit= BTR_CUR_PAGE_COMPRESS_LIMIT(index());
- if (buf_pool.n_pend_reads &&
+ if (os_aio_pending_reads_approx() &&
trx_sys.history_size_approx() > BTR_CUR_FINE_HISTORY_LENGTH)
{
/* Most delete-intended operations are due to the purge of history.
@@ -1723,7 +1723,7 @@ dberr_t btr_cur_t::open_leaf(bool first, dict_index_t *index,
{
compress_limit= BTR_CUR_PAGE_COMPRESS_LIMIT(index);
- if (buf_pool.n_pend_reads &&
+ if (os_aio_pending_reads_approx() &&
trx_sys.history_size_approx() > BTR_CUR_FINE_HISTORY_LENGTH)
{
mtr_x_lock_index(index, mtr);
diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc
index 4de8b4fd175..6a5221cb2c6 100644
--- a/storage/innobase/buf/buf0buf.cc
+++ b/storage/innobase/buf/buf0buf.cc
@@ -1311,11 +1311,11 @@ buf_tmp_buffer_t *buf_pool_t::io_buf_t::reserve()
for (buf_tmp_buffer_t *s= slots, *e= slots + n_slots; s != e; s++)
if (s->acquire())
return s;
- os_aio_wait_until_no_pending_writes();
+ os_aio_wait_until_no_pending_writes(true);
for (buf_tmp_buffer_t *s= slots, *e= slots + n_slots; s != e; s++)
if (s->acquire())
return s;
- os_aio_wait_until_no_pending_reads();
+ os_aio_wait_until_no_pending_reads(true);
}
}
@@ -3185,9 +3185,6 @@ dberr_t buf_page_t::read_complete(const fil_node_t &node)
ut_ad(zip_size() == node.space->zip_size());
ut_ad(!!zip.ssize == !!zip.data);
- ut_d(auto n=) buf_pool.n_pend_reads--;
- ut_ad(n > 0);
-
const byte *read_frame= zip.data ? zip.data : frame;
ut_ad(read_frame);
@@ -3476,9 +3473,8 @@ void buf_pool_t::print()
<< ", modified database pages="
<< UT_LIST_GET_LEN(flush_list)
<< ", n pending decompressions=" << n_pend_unzip
- << ", n pending reads=" << n_pend_reads
<< ", n pending flush LRU=" << n_flush()
- << " list=" << buf_dblwr.pending_writes()
+ << " list=" << os_aio_pending_writes()
<< ", pages made young=" << stat.n_pages_made_young
<< ", not young=" << stat.n_pages_not_made_young
<< ", pages read=" << stat.n_pages_read
@@ -3591,11 +3587,11 @@ void buf_stats_get_pool_info(buf_pool_info_t *pool_info)
pool_info->n_pend_unzip = UT_LIST_GET_LEN(buf_pool.unzip_LRU);
- pool_info->n_pend_reads = buf_pool.n_pend_reads;
+ pool_info->n_pend_reads = os_aio_pending_reads_approx();
pool_info->n_pending_flush_lru = buf_pool.n_flush();
- pool_info->n_pending_flush_list = buf_dblwr.pending_writes();
+ pool_info->n_pending_flush_list = os_aio_pending_writes();
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
current_time = time(NULL);
diff --git a/storage/innobase/buf/buf0dblwr.cc b/storage/innobase/buf/buf0dblwr.cc
index 8d8b92ae560..1260145ed1c 100644
--- a/storage/innobase/buf/buf0dblwr.cc
+++ b/storage/innobase/buf/buf0dblwr.cc
@@ -53,7 +53,6 @@ void buf_dblwr_t::init()
active_slot= &slots[0];
mysql_mutex_init(buf_dblwr_mutex_key, &mutex, nullptr);
pthread_cond_init(&cond, nullptr);
- pthread_cond_init(&write_cond, nullptr);
}
}
@@ -467,7 +466,6 @@ void buf_dblwr_t::close()
ut_ad(!batch_running);
pthread_cond_destroy(&cond);
- pthread_cond_destroy(&write_cond);
for (int i= 0; i < 2; i++)
{
aligned_free(slots[i].write_buf);
@@ -479,38 +477,31 @@ void buf_dblwr_t::close()
}
/** Update the doublewrite buffer on write completion. */
-void buf_dblwr_t::write_completed(bool with_doublewrite)
+void buf_dblwr_t::write_completed()
{
ut_ad(this == &buf_dblwr);
ut_ad(!srv_read_only_mode);
mysql_mutex_lock(&mutex);
- ut_ad(writes_pending);
- if (!--writes_pending)
- pthread_cond_broadcast(&write_cond);
+ ut_ad(is_created());
+ ut_ad(srv_use_doublewrite_buf);
+ ut_ad(batch_running);
+ slot *flush_slot= active_slot == &slots[0] ? &slots[1] : &slots[0];
+ ut_ad(flush_slot->reserved);
+ ut_ad(flush_slot->reserved <= flush_slot->first_free);
- if (with_doublewrite)
+ if (!--flush_slot->reserved)
{
- ut_ad(is_created());
- ut_ad(srv_use_doublewrite_buf);
- ut_ad(batch_running);
- slot *flush_slot= active_slot == &slots[0] ? &slots[1] : &slots[0];
- ut_ad(flush_slot->reserved);
- ut_ad(flush_slot->reserved <= flush_slot->first_free);
-
- if (!--flush_slot->reserved)
- {
- mysql_mutex_unlock(&mutex);
- /* This will finish the batch. Sync data files to the disk. */
- fil_flush_file_spaces();
- mysql_mutex_lock(&mutex);
+ mysql_mutex_unlock(&mutex);
+ /* This will finish the batch. Sync data files to the disk. */
+ fil_flush_file_spaces();
+ mysql_mutex_lock(&mutex);
- /* We can now reuse the doublewrite memory buffer: */
- flush_slot->first_free= 0;
- batch_running= false;
- pthread_cond_broadcast(&cond);
- }
+ /* We can now reuse the doublewrite memory buffer: */
+ flush_slot->first_free= 0;
+ batch_running= false;
+ pthread_cond_broadcast(&cond);
}
mysql_mutex_unlock(&mutex);
@@ -605,7 +596,6 @@ bool buf_dblwr_t::flush_buffered_writes(const ulint size)
const bool multi_batch= block1 + static_cast<uint32_t>(size) != block2 &&
old_first_free > size;
flushing_buffered_writes= 1 + multi_batch;
- pages_submitted+= old_first_free;
/* Now safe to release the mutex. */
mysql_mutex_unlock(&mutex);
#ifdef UNIV_DEBUG
@@ -754,7 +744,6 @@ void buf_dblwr_t::add_to_batch(const IORequest &request, size_t size)
const ulint buf_size= 2 * block_size();
mysql_mutex_lock(&mutex);
- writes_pending++;
for (;;)
{
diff --git a/storage/innobase/buf/buf0dump.cc b/storage/innobase/buf/buf0dump.cc
index 63a28aa1a37..bb705ec5651 100644
--- a/storage/innobase/buf/buf0dump.cc
+++ b/storage/innobase/buf/buf0dump.cc
@@ -651,7 +651,7 @@ buf_load()
ut_free(dump);
if (i == dump_n) {
- os_aio_wait_until_no_pending_reads();
+ os_aio_wait_until_no_pending_reads(true);
}
ut_sprintf_timestamp(now);
diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc
index 0072d0c22b8..57245cbb95e 100644
--- a/storage/innobase/buf/buf0flu.cc
+++ b/storage/innobase/buf/buf0flu.cc
@@ -113,7 +113,12 @@ void buf_pool_t::page_cleaner_wakeup(bool for_LRU)
{
ut_d(buf_flush_validate_skip());
if (!page_cleaner_idle())
+ {
+ if (for_LRU)
+ /* Ensure that the page cleaner is not in a timed wait. */
+ pthread_cond_signal(&do_flush_list);
return;
+ }
double dirty_pct= double(UT_LIST_GET_LEN(buf_pool.flush_list)) * 100.0 /
double(UT_LIST_GET_LEN(buf_pool.LRU) + UT_LIST_GET_LEN(buf_pool.free));
double pct_lwm= srv_max_dirty_pages_pct_lwm;
@@ -209,7 +214,7 @@ void buf_flush_remove_pages(uint32_t id)
if (!deferred)
break;
- buf_dblwr.wait_for_page_writes();
+ os_aio_wait_until_no_pending_writes(true);
}
}
@@ -339,9 +344,9 @@ void buf_page_write_complete(const IORequest &request)
if (request.is_LRU())
{
const bool temp= bpage->oldest_modification() == 2;
- if (!temp)
- buf_dblwr.write_completed(state < buf_page_t::WRITE_FIX_REINIT &&
- request.node->space->use_doublewrite());
+ if (!temp && state < buf_page_t::WRITE_FIX_REINIT &&
+ request.node->space->use_doublewrite())
+ buf_dblwr.write_completed();
/* We must hold buf_pool.mutex while releasing the block, so that
no other thread can access it before we have freed it. */
mysql_mutex_lock(&buf_pool.mutex);
@@ -353,8 +358,9 @@ void buf_page_write_complete(const IORequest &request)
}
else
{
- buf_dblwr.write_completed(state < buf_page_t::WRITE_FIX_REINIT &&
- request.node->space->use_doublewrite());
+ if (state < buf_page_t::WRITE_FIX_REINIT &&
+ request.node->space->use_doublewrite())
+ buf_dblwr.write_completed();
bpage->write_complete(false);
}
}
@@ -847,8 +853,6 @@ bool buf_page_t::flush(bool evict, fil_space_t *space)
ut_ad(lsn >= oldest_modification());
log_write_up_to(lsn, true);
}
- if (UNIV_LIKELY(space->purpose != FIL_TYPE_TEMPORARY))
- buf_dblwr.add_unbuffered();
space->io(IORequest{type, this, slot}, physical_offset(), size,
write_frame, this);
}
@@ -1082,13 +1086,11 @@ static ulint buf_flush_try_neighbors(fil_space_t *space,
mysql_mutex_unlock(&buf_pool.mutex);
}
- ut_ad(!bpage);
-
- if (auto n= count - 1)
+ if (count > 1)
{
MONITOR_INC_VALUE_CUMULATIVE(MONITOR_FLUSH_NEIGHBOR_TOTAL_PAGE,
MONITOR_FLUSH_NEIGHBOR_COUNT,
- MONITOR_FLUSH_NEIGHBOR_PAGES, n);
+ MONITOR_FLUSH_NEIGHBOR_PAGES, count - 1);
}
return count;
@@ -1218,6 +1220,11 @@ static void buf_flush_LRU_list_batch(ulint max, bool evict,
++n->evicted;
/* fall through */
case 1:
+ if (UNIV_LIKELY(scanned & 31))
+ continue;
+ mysql_mutex_unlock(&buf_pool.mutex);
+ reacquire_mutex:
+ mysql_mutex_lock(&buf_pool.mutex);
continue;
}
@@ -1253,32 +1260,37 @@ static void buf_flush_LRU_list_batch(ulint max, bool evict,
auto p= buf_flush_space(space_id);
space= p.first;
last_space_id= space_id;
+ if (!space)
+ {
+ mysql_mutex_lock(&buf_pool.mutex);
+ goto no_space;
+ }
mysql_mutex_lock(&buf_pool.mutex);
- if (p.second)
- buf_pool.stat.n_pages_written+= p.second;
+ buf_pool.stat.n_pages_written+= p.second;
}
else
+ {
ut_ad(!space);
+ goto no_space;
+ }
}
else if (space->is_stopping())
{
space->release();
space= nullptr;
- }
-
- if (!space)
- {
+ no_space:
mysql_mutex_lock(&buf_pool.flush_list_mutex);
buf_flush_discard_page(bpage);
+ continue;
}
- else if (neighbors && space->is_rotational())
+
+ if (neighbors && space->is_rotational())
{
mysql_mutex_unlock(&buf_pool.mutex);
n->flushed+= buf_flush_try_neighbors(space, page_id, bpage,
neighbors == 1,
do_evict, n->flushed, max);
-reacquire_mutex:
- mysql_mutex_lock(&buf_pool.mutex);
+ goto reacquire_mutex;
}
else if (n->flushed >= max && !recv_recovery_is_on())
{
@@ -1645,7 +1657,7 @@ done:
space->release();
if (space->purpose == FIL_TYPE_IMPORT)
- os_aio_wait_until_no_pending_writes();
+ os_aio_wait_until_no_pending_writes(true);
else
buf_dblwr.flush_buffered_writes();
@@ -1957,7 +1969,7 @@ static void buf_flush_wait(lsn_t lsn)
break;
}
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
- buf_dblwr.wait_for_page_writes();
+ os_aio_wait_until_no_pending_writes(false);
mysql_mutex_lock(&buf_pool.flush_list_mutex);
}
}
@@ -1978,8 +1990,6 @@ ATTRIBUTE_COLD void buf_flush_wait_flushed(lsn_t sync_lsn)
if (buf_pool.get_oldest_modification(sync_lsn) < sync_lsn)
{
MONITOR_INC(MONITOR_FLUSH_SYNC_WAITS);
- thd_wait_begin(nullptr, THD_WAIT_DISKIO);
- tpool::tpool_wait_begin();
#if 1 /* FIXME: remove this, and guarantee that the page cleaner serves us */
if (UNIV_UNLIKELY(!buf_page_cleaner_is_active))
@@ -1994,17 +2004,20 @@ ATTRIBUTE_COLD void buf_flush_wait_flushed(lsn_t sync_lsn)
MONITOR_FLUSH_SYNC_COUNT,
MONITOR_FLUSH_SYNC_PAGES, n_pages);
}
- buf_dblwr.wait_for_page_writes();
+ os_aio_wait_until_no_pending_writes(false);
mysql_mutex_lock(&buf_pool.flush_list_mutex);
}
while (buf_pool.get_oldest_modification(sync_lsn) < sync_lsn);
}
else
#endif
+ {
+ thd_wait_begin(nullptr, THD_WAIT_DISKIO);
+ tpool::tpool_wait_begin();
buf_flush_wait(sync_lsn);
-
- tpool::tpool_wait_end();
- thd_wait_end(nullptr);
+ tpool::tpool_wait_end();
+ thd_wait_end(nullptr);
+ }
}
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
@@ -2157,10 +2170,12 @@ Based on various factors it decides if there is a need to do flushing.
@return number of pages recommended to be flushed
@param last_pages_in number of pages flushed in previous batch
@param oldest_lsn buf_pool.get_oldest_modification(0)
+@param pct_lwm innodb_max_dirty_pages_pct_lwm, or 0 to ignore it
@param dirty_blocks UT_LIST_GET_LEN(buf_pool.flush_list)
@param dirty_pct 100*flush_list.count / (LRU.count + free.count) */
static ulint page_cleaner_flush_pages_recommendation(ulint last_pages_in,
lsn_t oldest_lsn,
+ double pct_lwm,
ulint dirty_blocks,
double dirty_pct)
{
@@ -2230,11 +2245,17 @@ func_exit:
sum_pages = 0;
}
- const ulint pct_for_dirty = srv_max_dirty_pages_pct_lwm == 0
- ? (dirty_pct >= max_pct ? 100 : 0)
- : static_cast<ulint>
- (max_pct > 0.0 ? dirty_pct / max_pct : dirty_pct);
- ulint pct_total = std::max(pct_for_dirty, pct_for_lsn);
+ MONITOR_SET(MONITOR_FLUSH_PCT_FOR_LSN, pct_for_lsn);
+
+ double total_ratio;
+ if (pct_lwm == 0.0 || max_pct == 0.0) {
+ total_ratio = 1;
+ } else {
+ total_ratio = std::max(double(pct_for_lsn) / 100,
+ (dirty_pct / max_pct));
+ }
+
+ MONITOR_SET(MONITOR_FLUSH_PCT_FOR_DIRTY, ulint(total_ratio * 100));
/* Estimate pages to be flushed for the lsn progress */
lsn_t target_lsn = oldest_lsn
@@ -2260,7 +2281,7 @@ func_exit:
pages_for_lsn = 1;
}
- n_pages = (ulint(double(srv_io_capacity) * double(pct_total) / 100.0)
+ n_pages = (ulint(double(srv_io_capacity) * total_ratio)
+ avg_page_rate + pages_for_lsn) / 3;
if (n_pages > srv_max_io_capacity) {
@@ -2273,8 +2294,6 @@ func_exit:
MONITOR_SET(MONITOR_FLUSH_AVG_PAGE_RATE, avg_page_rate);
MONITOR_SET(MONITOR_FLUSH_LSN_AVG_RATE, lsn_avg_rate);
- MONITOR_SET(MONITOR_FLUSH_PCT_FOR_DIRTY, pct_for_dirty);
- MONITOR_SET(MONITOR_FLUSH_PCT_FOR_LSN, pct_for_lsn);
goto func_exit;
}
@@ -2362,7 +2381,7 @@ static void buf_flush_page_cleaner()
if (!recv_recovery_is_on() &&
!srv_startup_is_before_trx_rollback_phase &&
- srv_operation == SRV_OPERATION_NORMAL)
+ srv_operation <= SRV_OPERATION_EXPORT_RESTORED)
log_checkpoint();
}
while (false);
@@ -2387,7 +2406,7 @@ static void buf_flush_page_cleaner()
soft_lsn_limit= lsn_limit;
}
- bool idle_flush= false;
+ double pct_lwm= 0.0;
ulint n_flushed= 0, n;
if (UNIV_UNLIKELY(soft_lsn_limit != 0))
@@ -2406,7 +2425,7 @@ static void buf_flush_page_cleaner()
mysql_mutex_unlock(&buf_pool.mutex);
last_pages+= n;
- if (!idle_flush)
+ if (pct_lwm == 0.0)
goto end_of_batch;
/* when idle flushing kicks in page_cleaner is marked active.
@@ -2424,7 +2443,8 @@ static void buf_flush_page_cleaner()
guaranteed to be nonempty, and it is a subset of buf_pool.LRU. */
const double dirty_pct= double(dirty_blocks) * 100.0 /
double(UT_LIST_GET_LEN(buf_pool.LRU) + UT_LIST_GET_LEN(buf_pool.free));
- if (srv_max_dirty_pages_pct_lwm != 0.0)
+ pct_lwm= srv_max_dirty_pages_pct_lwm;
+ if (pct_lwm != 0.0)
{
const ulint activity_count= srv_get_activity_count();
if (activity_count != last_activity_count)
@@ -2432,7 +2452,7 @@ static void buf_flush_page_cleaner()
last_activity_count= activity_count;
goto maybe_unemployed;
}
- else if (buf_pool.page_cleaner_idle() && buf_pool.n_pend_reads == 0)
+ else if (buf_pool.page_cleaner_idle() && !os_aio_pending_reads())
{
/* reaching here means 3 things:
- last_activity_count == activity_count: suggesting server is idle
@@ -2441,13 +2461,16 @@ static void buf_flush_page_cleaner()
- there are no pending reads but there are dirty pages to flush */
buf_pool.update_last_activity_count(activity_count);
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
- idle_flush= true;
goto idle_flush;
}
else
+ {
maybe_unemployed:
- if (dirty_pct < srv_max_dirty_pages_pct_lwm)
+ const bool below{dirty_pct < pct_lwm};
+ pct_lwm= 0.0;
+ if (below)
goto possibly_unemployed;
+ }
}
else if (dirty_pct < srv_max_buf_pool_modified_pct)
possibly_unemployed:
@@ -2478,6 +2501,7 @@ static void buf_flush_page_cleaner()
}
else if ((n= page_cleaner_flush_pages_recommendation(last_pages,
oldest_lsn,
+ pct_lwm,
dirty_blocks,
dirty_pct)) != 0)
{
@@ -2508,7 +2532,7 @@ static void buf_flush_page_cleaner()
mysql_mutex_lock(&buf_pool.flush_list_mutex);
buf_flush_wait_LRU_batch_end();
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
- buf_dblwr.wait_for_page_writes();
+ os_aio_wait_until_no_pending_writes(false);
}
mysql_mutex_lock(&buf_pool.flush_list_mutex);
@@ -2534,7 +2558,7 @@ static void buf_flush_page_cleaner()
ATTRIBUTE_COLD void buf_flush_page_cleaner_init()
{
ut_ad(!buf_page_cleaner_is_active);
- ut_ad(srv_operation == SRV_OPERATION_NORMAL ||
+ ut_ad(srv_operation <= SRV_OPERATION_EXPORT_RESTORED ||
srv_operation == SRV_OPERATION_RESTORE ||
srv_operation == SRV_OPERATION_RESTORE_EXPORT);
buf_flush_async_lsn= 0;
@@ -2558,15 +2582,7 @@ ATTRIBUTE_COLD void buf_flush_buffer_pool()
{
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
buf_flush_list(srv_max_io_capacity);
- if (const size_t pending= buf_dblwr.pending_writes())
- {
- timespec abstime;
- service_manager_extend_timeout(INNODB_EXTEND_TIMEOUT_INTERVAL,
- "Waiting to write %zu pages", pending);
- set_timespec(abstime, INNODB_EXTEND_TIMEOUT_INTERVAL / 2);
- buf_dblwr.wait_for_page_writes(abstime);
- }
-
+ os_aio_wait_until_no_pending_writes(false);
mysql_mutex_lock(&buf_pool.flush_list_mutex);
service_manager_extend_timeout(INNODB_EXTEND_TIMEOUT_INTERVAL,
"Waiting to flush " ULINTPF " pages",
@@ -2574,20 +2590,17 @@ ATTRIBUTE_COLD void buf_flush_buffer_pool()
}
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
- ut_ad(!buf_pool.any_io_pending());
+ ut_ad(!os_aio_pending_reads());
}
/** Synchronously flush dirty blocks during recv_sys_t::apply().
NOTE: The calling thread is not allowed to hold any buffer page latches! */
void buf_flush_sync_batch(lsn_t lsn)
{
- thd_wait_begin(nullptr, THD_WAIT_DISKIO);
- tpool::tpool_wait_begin();
+ lsn= std::max(lsn, log_sys.get_lsn());
mysql_mutex_lock(&buf_pool.flush_list_mutex);
buf_flush_wait(lsn);
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
- tpool::tpool_wait_end();
- thd_wait_end(nullptr);
}
/** Synchronously flush dirty blocks.
diff --git a/storage/innobase/buf/buf0lru.cc b/storage/innobase/buf/buf0lru.cc
index e4e20e8335f..4c42d3eab54 100644
--- a/storage/innobase/buf/buf0lru.cc
+++ b/storage/innobase/buf/buf0lru.cc
@@ -455,15 +455,15 @@ got_block:
mysql_mutex_unlock(&buf_pool.mutex);
mysql_mutex_lock(&buf_pool.flush_list_mutex);
const auto n_flush = buf_pool.n_flush();
+ if (!buf_pool.try_LRU_scan) {
+ buf_pool.page_cleaner_wakeup(true);
+ }
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
mysql_mutex_lock(&buf_pool.mutex);
if (!n_flush) {
goto not_found;
}
if (!buf_pool.try_LRU_scan) {
- mysql_mutex_lock(&buf_pool.flush_list_mutex);
- buf_pool.page_cleaner_wakeup(true);
- mysql_mutex_unlock(&buf_pool.flush_list_mutex);
my_cond_wait(&buf_pool.done_free,
&buf_pool.mutex.m_mutex);
}
@@ -1112,17 +1112,8 @@ static bool buf_LRU_block_remove_hashed(buf_page_t *bpage, const page_id_t id,
break;
case FIL_PAGE_TYPE_ZBLOB:
case FIL_PAGE_TYPE_ZBLOB2:
- break;
case FIL_PAGE_INDEX:
case FIL_PAGE_RTREE:
-#if defined UNIV_ZIP_DEBUG && defined BTR_CUR_HASH_ADAPT
- /* During recovery, we only update the
- compressed page, not the uncompressed one. */
- ut_a(recv_recovery_is_on()
- || page_zip_validate(
- &bpage->zip, page,
- ((buf_block_t*) bpage)->index));
-#endif /* UNIV_ZIP_DEBUG && BTR_CUR_HASH_ADAPT */
break;
default:
ib::error() << "The compressed page to be"
diff --git a/storage/innobase/buf/buf0rea.cc b/storage/innobase/buf/buf0rea.cc
index 7dab411dabb..1f7f9bfba0a 100644
--- a/storage/innobase/buf/buf0rea.cc
+++ b/storage/innobase/buf/buf0rea.cc
@@ -187,9 +187,6 @@ page_exists:
}
buf_pool.stat.n_pages_read++;
- mysql_mutex_unlock(&buf_pool.mutex);
- buf_pool.n_pend_reads++;
- return bpage;
func_exit:
mysql_mutex_unlock(&buf_pool.mutex);
ut_ad(!bpage || bpage->in_file());
@@ -252,8 +249,6 @@ buf_read_page_low(
page_id.page_no() * len, len, dst, bpage);
if (UNIV_UNLIKELY(fio.err != DB_SUCCESS)) {
- ut_d(auto n=) buf_pool.n_pend_reads--;
- ut_ad(n > 0);
buf_pool.corrupted_evict(bpage, buf_page_t::READ_FIX);
} else if (sync) {
thd_wait_end(NULL);
@@ -305,7 +300,8 @@ ulint buf_read_ahead_random(const page_id_t page_id, ulint zip_size)
/* No read-ahead to avoid thread deadlocks */
return 0;
- if (buf_pool.n_pend_reads > buf_pool.curr_size / BUF_READ_AHEAD_PEND_LIMIT)
+ if (os_aio_pending_reads_approx() >
+ buf_pool.curr_size / BUF_READ_AHEAD_PEND_LIMIT)
return 0;
fil_space_t* space= fil_space_t::get(page_id.space());
@@ -511,7 +507,8 @@ ulint buf_read_ahead_linear(const page_id_t page_id, ulint zip_size)
/* No read-ahead to avoid thread deadlocks */
return 0;
- if (buf_pool.n_pend_reads > buf_pool.curr_size / BUF_READ_AHEAD_PEND_LIMIT)
+ if (os_aio_pending_reads_approx() >
+ buf_pool.curr_size / BUF_READ_AHEAD_PEND_LIMIT)
return 0;
const uint32_t buf_read_ahead_area= buf_pool.read_ahead_area;
@@ -693,18 +690,8 @@ void buf_read_recv_pages(uint32_t space_id, st_::span<uint32_t> page_nos)
limit += buf_pool.chunks[j].size / 2;
}
- for (ulint count = 0; buf_pool.n_pend_reads >= limit; ) {
- std::this_thread::sleep_for(
- std::chrono::milliseconds(10));
-
- if (!(++count % 1000)) {
-
- ib::error()
- << "Waited for " << count / 100
- << " seconds for "
- << buf_pool.n_pend_reads
- << " pending reads";
- }
+ if (os_aio_pending_reads() >= limit) {
+ os_aio_wait_until_no_pending_reads(false);
}
buf_pool_t::hash_chain& chain =
diff --git a/storage/innobase/dict/drop.cc b/storage/innobase/dict/drop.cc
index 4ea4f98640a..d92c2e12409 100644
--- a/storage/innobase/dict/drop.cc
+++ b/storage/innobase/dict/drop.cc
@@ -66,6 +66,7 @@ before transaction commit and must be rolled back explicitly are as follows:
#include "dict0stats.h"
#include "dict0stats_bg.h"
+#include "ibuf0ibuf.h"
#include "lock0lock.h"
#include "que0que.h"
diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc
index c1fd916be55..53ea91529ec 100644
--- a/storage/innobase/fil/fil0fil.cc
+++ b/storage/innobase/fil/fil0fil.cc
@@ -96,7 +96,13 @@ bool fil_space_t::try_to_close(bool print_info)
if (!node->is_open())
continue;
- if (const auto n= space.set_closing())
+ /* Other thread is trying to do fil_delete_tablespace()
+ concurrently for the same tablespace. So ignore this
+ tablespace and try to close the other one */
+ const auto n= space.set_closing();
+ if (n & STOPPING)
+ continue;
+ if (n & (PENDING | NEEDS_FSYNC))
{
if (!print_info)
continue;
@@ -288,6 +294,8 @@ fil_node_t* fil_space_t::add(const char* name, pfs_os_file_t handle,
uint32_t size, bool is_raw, bool atomic_write,
uint32_t max_pages)
{
+ mysql_mutex_assert_owner(&fil_system.mutex);
+
fil_node_t* node;
ut_ad(name != NULL);
@@ -312,7 +320,6 @@ fil_node_t* fil_space_t::add(const char* name, pfs_os_file_t handle,
node->atomic_write = atomic_write;
- mysql_mutex_lock(&fil_system.mutex);
this->size += size;
UT_LIST_ADD_LAST(chain, node);
if (node->is_open()) {
@@ -323,7 +330,6 @@ fil_node_t* fil_space_t::add(const char* name, pfs_os_file_t handle,
release();
}
}
- mysql_mutex_unlock(&fil_system.mutex);
return node;
}
@@ -799,8 +805,17 @@ pfs_os_file_t fil_system_t::detach(fil_space_t *space, bool detach_handle)
space_list_t::iterator s= space_list_t::iterator(space);
if (space_list_last_opened == space)
{
- space_list_t::iterator prev= s;
- space_list_last_opened= &*--prev;
+ if (s == space_list.begin())
+ {
+ ut_ad(srv_operation > SRV_OPERATION_EXPORT_RESTORED ||
+ srv_shutdown_state > SRV_SHUTDOWN_NONE);
+ space_list_last_opened= nullptr;
+ }
+ else
+ {
+ space_list_t::iterator prev= s;
+ space_list_last_opened= &*--prev;
+ }
}
space_list.erase(s);
}
@@ -934,6 +949,7 @@ fil_space_t *fil_space_t::create(uint32_t id, uint32_t flags,
{
fil_space_t* space;
+ mysql_mutex_assert_owner(&fil_system.mutex);
ut_ad(fil_system.is_initialised());
ut_ad(fil_space_t::is_valid_flags(flags & ~FSP_FLAGS_MEM_MASK, id));
ut_ad(srv_page_size == UNIV_PAGE_SIZE_ORIG || flags != 0);
@@ -966,8 +982,6 @@ fil_space_t *fil_space_t::create(uint32_t id, uint32_t flags,
space->latch.SRW_LOCK_INIT(fil_space_latch_key);
- mysql_mutex_lock(&fil_system.mutex);
-
if (const fil_space_t *old_space = fil_space_get_by_id(id)) {
ib::error() << "Trying to add tablespace with id " << id
<< " to the cache, but tablespace '"
@@ -975,7 +989,6 @@ fil_space_t *fil_space_t::create(uint32_t id, uint32_t flags,
? old_space->chain.start->name
: "")
<< "' already exists in the cache!";
- mysql_mutex_unlock(&fil_system.mutex);
space->~fil_space_t();
ut_free(space);
return(NULL);
@@ -1022,12 +1035,12 @@ fil_space_t *fil_space_t::create(uint32_t id, uint32_t flags,
if (rotate) {
fil_system.default_encrypt_tables.push_back(*space);
space->is_in_default_encrypt = true;
- }
-
- mysql_mutex_unlock(&fil_system.mutex);
- if (rotate && srv_n_fil_crypt_threads_started) {
- fil_crypt_threads_signal();
+ if (srv_n_fil_crypt_threads_started) {
+ mysql_mutex_unlock(&fil_system.mutex);
+ fil_crypt_threads_signal();
+ mysql_mutex_lock(&fil_system.mutex);
+ }
}
return(space);
@@ -1310,9 +1323,9 @@ void fil_system_t::close()
void fil_system_t::add_opened_last_to_space_list(fil_space_t *space)
{
if (UNIV_LIKELY(space_list_last_opened != nullptr))
- space_list.insert(space_list_t::iterator(space_list_last_opened), *space);
+ space_list.insert(++space_list_t::iterator(space_list_last_opened), *space);
else
- space_list.push_back(*space);
+ space_list.push_front(*space);
space_list_last_opened= space;
}
@@ -1482,7 +1495,10 @@ void fil_space_t::close_all()
for (ulint count= 10000; count--;)
{
- if (!space.set_closing())
+ const auto n= space.set_closing();
+ if (n & STOPPING)
+ goto next;
+ if (!(n & (PENDING | NEEDS_FSYNC)))
{
node->close();
goto next;
@@ -1732,9 +1748,7 @@ pfs_os_file_t fil_delete_tablespace(uint32_t id)
mtr_t mtr;
mtr.start();
mtr.log_file_op(FILE_DELETE, id, space->chain.start->name);
- handle= space->chain.start->handle;
- mtr.commit_file(*space, nullptr);
-
+ mtr.commit_file(*space, nullptr, &handle);
fil_space_free_low(space);
}
@@ -2047,16 +2061,20 @@ err_exit:
DBUG_EXECUTE_IF("checkpoint_after_file_create",
log_make_checkpoint(););
+ mysql_mutex_lock(&fil_system.mutex);
if (fil_space_t* space = fil_space_t::create(space_id, flags,
FIL_TYPE_TABLESPACE,
crypt_data, mode, true)) {
fil_node_t* node = space->add(path, file, size, false, true);
+ mysql_mutex_unlock(&fil_system.mutex);
IF_WIN(node->find_metadata(), node->find_metadata(file, true));
mtr.start();
mtr.set_named_space(space);
ut_a(fsp_header_init(space, size, &mtr) == DB_SUCCESS);
mtr.commit();
return space;
+ } else {
+ mysql_mutex_unlock(&fil_system.mutex);
}
if (space_name.data()) {
@@ -2171,6 +2189,10 @@ func_exit:
must_validate = true;
}
+ const bool operation_not_for_export =
+ srv_operation != SRV_OPERATION_RESTORE_EXPORT
+ && srv_operation != SRV_OPERATION_EXPORT_RESTORED;
+
/* Always look for a file at the default location. But don't log
an error if the tablespace is already open in remote or dict. */
ut_a(df_default.filepath());
@@ -2181,6 +2203,7 @@ func_exit:
drop_garbage_tables_after_restore() a little later. */
const bool strict = validate && !tablespaces_found
+ && operation_not_for_export
&& !(srv_operation == SRV_OPERATION_NORMAL
&& srv_start_after_restore
&& srv_force_recovery < SRV_FORCE_NO_BACKGROUND
@@ -2229,7 +2252,11 @@ func_exit:
goto corrupted;
}
- os_file_get_last_error(true);
+ os_file_get_last_error(operation_not_for_export,
+ !operation_not_for_export);
+ if (!operation_not_for_export) {
+ goto corrupted;
+ }
sql_print_error("InnoDB: Could not find a valid tablespace"
" file for %.*s. %s",
static_cast<int>(name.size()), name.data(),
@@ -2307,8 +2334,10 @@ skip_validate:
first_page)
: NULL;
+ mysql_mutex_lock(&fil_system.mutex);
space = fil_space_t::create(id, flags, purpose, crypt_data);
if (!space) {
+ mysql_mutex_unlock(&fil_system.mutex);
goto error;
}
@@ -2318,6 +2347,7 @@ skip_validate:
space->add(
df_remote.is_open() ? df_remote.filepath() :
df_default.filepath(), OS_FILE_CLOSED, 0, false, true);
+ mysql_mutex_unlock(&fil_system.mutex);
if (must_validate && !srv_read_only_mode) {
df_remote.close();
@@ -2396,6 +2426,7 @@ fil_ibd_discover(
case SRV_OPERATION_RESTORE:
break;
case SRV_OPERATION_NORMAL:
+ case SRV_OPERATION_EXPORT_RESTORED:
size_t len= strlen(db);
if (len <= 4 || strcmp(db + len - 4, dot_ext[IBD])) {
break;
@@ -2602,10 +2633,13 @@ tablespace_check:
return FIL_LOAD_INVALID;
}
+ mysql_mutex_lock(&fil_system.mutex);
+
space = fil_space_t::create(
space_id, flags, FIL_TYPE_TABLESPACE, crypt_data);
if (space == NULL) {
+ mysql_mutex_unlock(&fil_system.mutex);
return(FIL_LOAD_INVALID);
}
@@ -2617,6 +2651,7 @@ tablespace_check:
let fil_node_open() do that task. */
space->add(file.filepath(), OS_FILE_CLOSED, 0, false, false);
+ mysql_mutex_unlock(&fil_system.mutex);
return(FIL_LOAD_OK);
}
diff --git a/storage/innobase/fsp/fsp0file.cc b/storage/innobase/fsp/fsp0file.cc
index 0a4119587f8..e93ca4bc177 100644
--- a/storage/innobase/fsp/fsp0file.cc
+++ b/storage/innobase/fsp/fsp0file.cc
@@ -756,7 +756,7 @@ the double write buffer.
bool
Datafile::restore_from_doublewrite()
{
- if (srv_operation != SRV_OPERATION_NORMAL) {
+ if (srv_operation > SRV_OPERATION_EXPORT_RESTORED) {
return true;
}
diff --git a/storage/innobase/fsp/fsp0space.cc b/storage/innobase/fsp/fsp0space.cc
index 6bdf9fcc4d8..c2152b08590 100644
--- a/storage/innobase/fsp/fsp0space.cc
+++ b/storage/innobase/fsp/fsp0space.cc
@@ -88,25 +88,25 @@ Tablespace::open_or_create(bool is_temp)
ut_ad(!m_files.empty());
for (iterator it = begin(); it != end(); ++it) {
-
if (it->m_exists) {
err = it->open_or_create(
m_ignore_read_only
? false : srv_read_only_mode);
+ if (err != DB_SUCCESS) {
+ return err;
+ }
} else {
err = it->open_or_create(
m_ignore_read_only
? false : srv_read_only_mode);
- /* Set the correct open flags now that we have
- successfully created the file. */
- if (err == DB_SUCCESS) {
- file_found(*it);
+ if (err != DB_SUCCESS) {
+ return err;
}
- }
- if (err != DB_SUCCESS) {
- break;
+ /* Set the correct open flags now that we have
+ successfully created the file. */
+ file_found(*it);
}
/* We can close the handle now and open the tablespace
@@ -130,20 +130,22 @@ Tablespace::open_or_create(bool is_temp)
fsp_flags = FSP_FLAGS_PAGE_SSIZE();
}
+ mysql_mutex_lock(&fil_system.mutex);
space = fil_space_t::create(
m_space_id, fsp_flags,
is_temp
? FIL_TYPE_TEMPORARY : FIL_TYPE_TABLESPACE,
NULL);
if (!space) {
+ mysql_mutex_unlock(&fil_system.mutex);
return DB_ERROR;
}
+ } else {
+ mysql_mutex_lock(&fil_system.mutex);
}
-
- ut_a(fil_validate());
-
space->add(it->m_filepath, OS_FILE_CLOSED, it->m_size,
false, true);
+ mysql_mutex_unlock(&fil_system.mutex);
}
return(err);
diff --git a/storage/innobase/fsp/fsp0sysspace.cc b/storage/innobase/fsp/fsp0sysspace.cc
index ef650436907..9aef6f389ef 100644
--- a/storage/innobase/fsp/fsp0sysspace.cc
+++ b/storage/innobase/fsp/fsp0sysspace.cc
@@ -576,7 +576,7 @@ inline dberr_t SysTablespace::read_lsn_and_check_flags()
ut_a(it->order() == 0);
- if (srv_operation == SRV_OPERATION_NORMAL) {
+ if (srv_operation <= SRV_OPERATION_EXPORT_RESTORED) {
buf_dblwr.init_or_load_pages(it->handle(), it->filepath());
}
@@ -920,6 +920,7 @@ SysTablespace::open_or_create(
/* Close the curent handles, add space and file info to the
fil_system cache and the Data Dictionary, and re-open them
in file_system cache so that they stay open until shutdown. */
+ mysql_mutex_lock(&fil_system.mutex);
ulint node_counter = 0;
for (files_t::iterator it = begin; it != end; ++it) {
it->close();
@@ -933,7 +934,8 @@ SysTablespace::open_or_create(
FIL_TYPE_TEMPORARY, NULL);
ut_ad(space == fil_system.temp_space);
if (!space) {
- return DB_ERROR;
+ err = DB_ERROR;
+ break;
}
ut_ad(!space->is_compressed());
ut_ad(space->full_crc32());
@@ -944,12 +946,11 @@ SysTablespace::open_or_create(
FIL_TYPE_TABLESPACE, NULL);
ut_ad(space == fil_system.sys_space);
if (!space) {
- return DB_ERROR;
+ err = DB_ERROR;
+ break;
}
}
- ut_a(fil_validate());
-
uint32_t max_size = (++node_counter == m_files.size()
? (m_last_file_size_max == 0
? UINT32_MAX
@@ -960,6 +961,7 @@ SysTablespace::open_or_create(
it->m_type != SRV_NOT_RAW, true, max_size);
}
+ mysql_mutex_unlock(&fil_system.mutex);
return(err);
}
diff --git a/storage/innobase/fts/fts0fts.cc b/storage/innobase/fts/fts0fts.cc
index 8623e653991..d2b90542ec6 100644
--- a/storage/innobase/fts/fts0fts.cc
+++ b/storage/innobase/fts/fts0fts.cc
@@ -38,22 +38,6 @@ Full Text Search interface
#include "dict0stats.h"
#include "btr0pcur.h"
-/** The SYNC state of the cache. There is one instance of this struct
-associated with each ADD thread. */
-struct fts_sync_t {
- /** Transaction used for SYNCing the cache to disk */
- trx_t *trx;
- /** Table with FTS index(es) */
- dict_table_t *table;
- /** Max size in bytes of the cache */
- ulint max_cache_size;
- /** The doc id at which the cache was noted as being
- full, we use this to set the upper_limit field */
- doc_id_t max_doc_id;
- /** SYNC start time; only used if fts_enable_diag_print */
- time_t start_time;
-};
-
static const ulint FTS_MAX_ID_LEN = 32;
/** Column name from the FTS config table */
@@ -201,8 +185,15 @@ struct fts_tokenize_param_t {
/** Run SYNC on the table, i.e., write out data from the cache to the
FTS auxiliary INDEX table and clear the cache at the end.
@param[in,out] sync sync state
+@param[in] unlock_cache whether unlock cache lock when write node
+@param[in] wait whether wait when a sync is in progress
@return DB_SUCCESS if all OK */
-static dberr_t fts_sync(fts_sync_t *sync);
+static
+dberr_t
+fts_sync(
+ fts_sync_t* sync,
+ bool unlock_cache,
+ bool wait);
/****************************************************************//**
Release all resources help by the words rb tree e.g., the node ilist. */
@@ -275,6 +266,7 @@ fts_cache_destroy(fts_cache_t* cache)
mysql_mutex_destroy(&cache->init_lock);
mysql_mutex_destroy(&cache->deleted_lock);
mysql_mutex_destroy(&cache->doc_id_lock);
+ pthread_cond_destroy(&cache->sync->cond);
if (cache->stopword_info.cached_stopword) {
rbt_free(cache->stopword_info.cached_stopword);
@@ -574,6 +566,7 @@ fts_index_cache_init(
for (i = 0; i < FTS_NUM_AUX_INDEX; ++i) {
ut_a(index_cache->ins_graph[i] == NULL);
+ ut_a(index_cache->sel_graph[i] == NULL);
}
}
@@ -643,6 +636,7 @@ fts_cache_create(
mem_heap_zalloc(heap, sizeof(fts_sync_t)));
cache->sync->table = table;
+ pthread_cond_init(&cache->sync->cond, nullptr);
/* Create the index cache vector that will hold the inverted indexes. */
cache->indexes = ib_vector_create(
@@ -968,6 +962,10 @@ fts_cache_index_cache_create(
mem_heap_zalloc(static_cast<mem_heap_t*>(
cache->self_heap->arg), n_bytes));
+ index_cache->sel_graph = static_cast<que_t**>(
+ mem_heap_zalloc(static_cast<mem_heap_t*>(
+ cache->self_heap->arg), n_bytes));
+
fts_index_cache_init(cache->sync_heap, index_cache);
if (cache->get_docs) {
@@ -1041,6 +1039,13 @@ fts_cache_clear(
index_cache->ins_graph[j] = NULL;
}
+
+ if (index_cache->sel_graph[j] != NULL) {
+
+ que_graph_free(index_cache->sel_graph[j]);
+
+ index_cache->sel_graph[j] = NULL;
+ }
}
index_cache->doc_stats = NULL;
@@ -1333,7 +1338,8 @@ fts_cache_add_doc(
ib_vector_last(word->nodes));
}
- if (!fts_node || fts_node->ilist_size > FTS_ILIST_MAX_SIZE
+ if (fts_node == NULL || fts_node->synced
+ || fts_node->ilist_size > FTS_ILIST_MAX_SIZE
|| doc_id < fts_node->last_doc_id) {
fts_node = static_cast<fts_node_t*>(
@@ -3320,7 +3326,7 @@ fts_add_doc_from_tuple(
if (cache->total_size > fts_max_cache_size / 5
|| fts_need_sync) {
- fts_sync(cache->sync);
+ fts_sync(cache->sync, true, false);
}
mtr_start(&mtr);
@@ -3356,7 +3362,7 @@ fts_add_doc_by_id(
dict_index_t* fts_id_index;
ibool is_id_cluster;
fts_cache_t* cache = ftt->table->fts->cache;
- bool need_sync= false;
+
ut_ad(cache->get_docs);
/* If Doc ID has been supplied by the user, then the table
@@ -3496,32 +3502,44 @@ fts_add_doc_by_id(
get_doc->index_cache,
doc_id, doc.tokens);
- /** FTS cache sync should happen
- frequently. Because user thread
- shouldn't hold the cache lock for
- longer time. So cache should sync
- whenever cache size exceeds 512 KB */
- need_sync =
- cache->total_size > 512*1024;
+ bool need_sync = !cache->sync->in_progress
+ && (fts_need_sync
+ || (cache->total_size
+ - cache->total_size_at_sync)
+ > fts_max_cache_size / 10);
+ if (need_sync) {
+ cache->total_size_at_sync =
+ cache->total_size;
+ }
mysql_mutex_unlock(&table->fts->cache->lock);
DBUG_EXECUTE_IF(
"fts_instrument_sync",
- fts_sync_table(table);
+ fts_optimize_request_sync_table(table);
+ mysql_mutex_lock(&cache->lock);
+ if (cache->sync->in_progress)
+ my_cond_wait(
+ &cache->sync->cond,
+ &cache->lock.m_mutex);
+ mysql_mutex_unlock(&cache->lock);
);
DBUG_EXECUTE_IF(
"fts_instrument_sync_debug",
- fts_sync(cache->sync);
+ fts_sync(cache->sync, true, true);
);
DEBUG_SYNC_C("fts_instrument_sync_request");
DBUG_EXECUTE_IF(
"fts_instrument_sync_request",
- need_sync= true;
+ fts_optimize_request_sync_table(table);
);
+ if (need_sync) {
+ fts_optimize_request_sync_table(table);
+ }
+
mtr_start(&mtr);
if (i < num_idx - 1) {
@@ -3547,10 +3565,6 @@ func_exit:
ut_free(pcur.old_rec_buf);
mem_heap_free(heap);
-
- if (need_sync) {
- fts_sync_table(table);
- }
}
@@ -3910,13 +3924,15 @@ static MY_ATTRIBUTE((nonnull, warn_unused_result))
dberr_t
fts_sync_write_words(
trx_t* trx,
- fts_index_cache_t* index_cache)
+ fts_index_cache_t* index_cache,
+ bool unlock_cache)
{
fts_table_t fts_table;
ulint n_nodes = 0;
ulint n_words = 0;
const ib_rbt_node_t* rbt_node;
dberr_t error = DB_SUCCESS;
+ ibool print_error = FALSE;
dict_table_t* table = index_cache->index->table;
FTS_INIT_INDEX_TABLE(
@@ -3947,36 +3963,53 @@ fts_sync_write_words(
fts_table.suffix = fts_get_suffix(selected);
+ /* We iterate over all the nodes even if there was an error */
for (i = 0; i < ib_vector_size(word->nodes); ++i) {
fts_node_t* fts_node = static_cast<fts_node_t*>(
ib_vector_get(word->nodes, i));
- error = fts_write_node(
- trx, &index_cache->ins_graph[selected],
- &fts_table, &word->text, fts_node);
+ if (fts_node->synced) {
+ continue;
+ } else {
+ fts_node->synced = true;
+ }
+
+ /*FIXME: we need to handle the error properly. */
+ if (error == DB_SUCCESS) {
+ if (unlock_cache) {
+ mysql_mutex_unlock(
+ &table->fts->cache->lock);
+ }
+
+ error = fts_write_node(
+ trx,
+ &index_cache->ins_graph[selected],
+ &fts_table, &word->text, fts_node);
- DEBUG_SYNC_C("fts_write_node");
- DBUG_EXECUTE_IF("fts_write_node_crash",
+ DEBUG_SYNC_C("fts_write_node");
+ DBUG_EXECUTE_IF("fts_write_node_crash",
DBUG_SUICIDE(););
- DBUG_EXECUTE_IF("fts_instrument_sync_sleep",
+ DBUG_EXECUTE_IF(
+ "fts_instrument_sync_sleep",
std::this_thread::sleep_for(
std::chrono::seconds(1)););
- if (error != DB_SUCCESS) {
- goto err_exit;
+ if (unlock_cache) {
+ mysql_mutex_lock(
+ &table->fts->cache->lock);
+ }
}
}
n_nodes += ib_vector_size(word->nodes);
- if (UNIV_UNLIKELY(error != DB_SUCCESS)) {
-err_exit:
+ if (UNIV_UNLIKELY(error != DB_SUCCESS) && !print_error) {
ib::error() << "(" << error << ") writing"
" word node to FTS auxiliary index table "
<< table->name;
- break;
+ print_error = TRUE;
}
}
@@ -4035,44 +4068,58 @@ fts_sync_index(
ut_ad(rbt_validate(index_cache->words));
- return(fts_sync_write_words(trx, index_cache));
+ return(fts_sync_write_words(trx, index_cache, sync->unlock_cache));
}
-/** Rollback a sync operation
-@param[in,out] sync sync state */
+/** Check if index cache has been synced completely
+@param[in,out] index_cache index cache
+@return true if index is synced, otherwise false. */
static
-void
-fts_sync_rollback(
- fts_sync_t* sync)
+bool
+fts_sync_index_check(
+ fts_index_cache_t* index_cache)
{
- trx_t* trx = sync->trx;
- fts_cache_t* cache = sync->table->fts->cache;
-
- for (ulint i = 0; i < ib_vector_size(cache->indexes); ++i) {
- ulint j;
- fts_index_cache_t* index_cache;
-
- index_cache = static_cast<fts_index_cache_t*>(
- ib_vector_get(cache->indexes, i));
+ const ib_rbt_node_t* rbt_node;
- for (j = 0; fts_index_selector[j].value; ++j) {
+ for (rbt_node = rbt_first(index_cache->words);
+ rbt_node != NULL;
+ rbt_node = rbt_next(index_cache->words, rbt_node)) {
- if (index_cache->ins_graph[j] != NULL) {
+ fts_tokenizer_word_t* word;
+ word = rbt_value(fts_tokenizer_word_t, rbt_node);
- que_graph_free(index_cache->ins_graph[j]);
+ fts_node_t* fts_node;
+ fts_node = static_cast<fts_node_t*>(ib_vector_last(word->nodes));
- index_cache->ins_graph[j] = NULL;
- }
+ if (!fts_node->synced) {
+ return(false);
}
}
- mysql_mutex_unlock(&cache->lock);
+ return(true);
+}
- fts_sql_rollback(trx);
+/** Reset synced flag in index cache when rollback
+@param[in,out] index_cache index cache */
+static
+void
+fts_sync_index_reset(
+ fts_index_cache_t* index_cache)
+{
+ const ib_rbt_node_t* rbt_node;
- /* Avoid assertion in trx_t::free(). */
- trx->dict_operation_lock_mode = false;
- trx->free();
+ for (rbt_node = rbt_first(index_cache->words);
+ rbt_node != NULL;
+ rbt_node = rbt_next(index_cache->words, rbt_node)) {
+
+ fts_tokenizer_word_t* word;
+ word = rbt_value(fts_tokenizer_word_t, rbt_node);
+
+ fts_node_t* fts_node;
+ fts_node = static_cast<fts_node_t*>(ib_vector_last(word->nodes));
+
+ fts_node->synced = false;
+ }
}
/** Commit the SYNC, change state of processed doc ids etc.
@@ -4105,20 +4152,19 @@ fts_sync_commit(
sync, cache->deleted_doc_ids);
}
+ /* We need to do this within the deleted lock since fts_delete() can
+ attempt to add a deleted doc id to the cache deleted id array. */
+ fts_cache_clear(cache);
+ DEBUG_SYNC_C("fts_deleted_doc_ids_clear");
+ fts_cache_init(cache);
+ mysql_mutex_unlock(&cache->lock);
+
if (UNIV_LIKELY(error == DB_SUCCESS)) {
- /* We need to do this within the deleted lock
- since fts_delete() can attempt to add a deleted
- doc id to the cache deleted id array. */
- fts_cache_clear(cache);
- DEBUG_SYNC_C("fts_deleted_doc_ids_clear");
- fts_cache_init(cache);
- mysql_mutex_unlock(&cache->lock);
fts_sql_commit(trx);
} else {
+ fts_sql_rollback(trx);
ib::error() << "(" << error << ") during SYNC of "
"table " << sync->table->name;
- fts_sync_rollback(sync);
- return error;
}
if (UNIV_UNLIKELY(fts_enable_diag_print) && elapsed_time) {
@@ -4138,13 +4184,66 @@ fts_sync_commit(
return(error);
}
+/** Rollback a sync operation
+@param[in,out] sync sync state */
+static
+void
+fts_sync_rollback(
+ fts_sync_t* sync)
+{
+ trx_t* trx = sync->trx;
+ fts_cache_t* cache = sync->table->fts->cache;
+
+ for (ulint i = 0; i < ib_vector_size(cache->indexes); ++i) {
+ ulint j;
+ fts_index_cache_t* index_cache;
+
+ index_cache = static_cast<fts_index_cache_t*>(
+ ib_vector_get(cache->indexes, i));
+
+ /* Reset synced flag so nodes will not be skipped
+ in the next sync, see fts_sync_write_words(). */
+ fts_sync_index_reset(index_cache);
+
+ for (j = 0; fts_index_selector[j].value; ++j) {
+
+ if (index_cache->ins_graph[j] != NULL) {
+
+ que_graph_free(index_cache->ins_graph[j]);
+
+ index_cache->ins_graph[j] = NULL;
+ }
+
+ if (index_cache->sel_graph[j] != NULL) {
+
+ que_graph_free(index_cache->sel_graph[j]);
+
+ index_cache->sel_graph[j] = NULL;
+ }
+ }
+ }
+
+ mysql_mutex_unlock(&cache->lock);
+
+ fts_sql_rollback(trx);
+
+ /* Avoid assertion in trx_t::free(). */
+ trx->dict_operation_lock_mode = false;
+ trx->free();
+}
+
/** Run SYNC on the table, i.e., write out data from the cache to the
FTS auxiliary INDEX table and clear the cache at the end.
@param[in,out] sync sync state
@param[in] unlock_cache whether unlock cache lock when write node
@param[in] wait whether wait when a sync is in progress
@return DB_SUCCESS if all OK */
-static dberr_t fts_sync(fts_sync_t *sync)
+static
+dberr_t
+fts_sync(
+ fts_sync_t* sync,
+ bool unlock_cache,
+ bool wait)
{
if (srv_read_only_mode) {
return DB_READ_ONLY;
@@ -4155,13 +4254,33 @@ static dberr_t fts_sync(fts_sync_t *sync)
fts_cache_t* cache = sync->table->fts->cache;
mysql_mutex_lock(&cache->lock);
+
+ /* Check if cache is being synced.
+ Note: we release cache lock in fts_sync_write_words() to
+ avoid long wait for the lock by other threads. */
+ if (sync->in_progress) {
+ if (!wait) {
+ mysql_mutex_unlock(&cache->lock);
+ return(DB_SUCCESS);
+ }
+ do {
+ my_cond_wait(&sync->cond, &cache->lock.m_mutex);
+ } while (sync->in_progress);
+ }
+
+ sync->unlock_cache = unlock_cache;
+ sync->in_progress = true;
+
DEBUG_SYNC_C("fts_sync_begin");
fts_sync_begin(sync);
+begin_sync:
const size_t fts_cache_size= fts_max_cache_size;
if (cache->total_size > fts_cache_size) {
/* Avoid the case: sync never finish when
insert/update keeps comming. */
+ ut_ad(sync->unlock_cache);
+ sync->unlock_cache = false;
ib::warn() << "Total InnoDB FTS size "
<< cache->total_size << " for the table "
<< cache->sync->table->name
@@ -4185,23 +4304,52 @@ static dberr_t fts_sync(fts_sync_t *sync)
error = fts_sync_index(sync, index_cache);
if (error != DB_SUCCESS) {
- goto err_exit;
+ goto end_sync;
+ }
+
+ if (!sync->unlock_cache
+ && cache->total_size < fts_max_cache_size) {
+ /* Reset the unlock cache if the value
+ is less than innodb_ft_cache_size */
+ sync->unlock_cache = true;
}
}
DBUG_EXECUTE_IF("fts_instrument_sync_interrupted",
+ sync->interrupted = true;
error = DB_INTERRUPTED;
- goto err_exit;
+ goto end_sync;
);
- if (error == DB_SUCCESS) {
+ /* Make sure all the caches are synced. */
+ for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
+ fts_index_cache_t* index_cache;
+
+ index_cache = static_cast<fts_index_cache_t*>(
+ ib_vector_get(cache->indexes, i));
+
+ if (index_cache->index->to_be_dropped
+ || fts_sync_index_check(index_cache)) {
+ continue;
+ }
+
+ goto begin_sync;
+ }
+
+end_sync:
+ if (error == DB_SUCCESS && !sync->interrupted) {
error = fts_sync_commit(sync);
} else {
-err_exit:
fts_sync_rollback(sync);
- return error;
}
+ mysql_mutex_lock(&cache->lock);
+ ut_ad(sync->in_progress);
+ sync->interrupted = false;
+ sync->in_progress = false;
+ pthread_cond_broadcast(&sync->cond);
+ mysql_mutex_unlock(&cache->lock);
+
/* We need to check whether an optimize is required, for that
we make copies of the two variables that control the trigger. These
variables can change behind our back and we don't want to hold the
@@ -4213,7 +4361,6 @@ err_exit:
mysql_mutex_unlock(&cache->deleted_lock);
- DEBUG_SYNC_C("fts_sync_end");
return(error);
}
@@ -4222,12 +4369,12 @@ FTS auxiliary INDEX table and clear the cache at the end.
@param[in,out] table fts table
@param[in] wait whether wait for existing sync to finish
@return DB_SUCCESS on success, error code on failure. */
-dberr_t fts_sync_table(dict_table_t* table)
+dberr_t fts_sync_table(dict_table_t* table, bool wait)
{
ut_ad(table->fts);
return table->space && !table->corrupted && table->fts->cache
- ? fts_sync(table->fts->cache->sync)
+ ? fts_sync(table->fts->cache->sync, !wait, wait)
: DB_SUCCESS;
}
diff --git a/storage/innobase/fts/fts0opt.cc b/storage/innobase/fts/fts0opt.cc
index 7c40a25e6e7..fe31767d901 100644
--- a/storage/innobase/fts/fts0opt.cc
+++ b/storage/innobase/fts/fts0opt.cc
@@ -83,8 +83,9 @@ enum fts_msg_type_t {
FTS_MSG_ADD_TABLE, /*!< Add table to the optimize thread's
work queue */
- FTS_MSG_DEL_TABLE /*!< Remove a table from the optimize
+ FTS_MSG_DEL_TABLE, /*!< Remove a table from the optimize
threads work queue */
+ FTS_MSG_SYNC_TABLE /*!< Sync fts cache of a table */
};
/** Compressed list of words that have been read from FTS INDEX
@@ -2624,6 +2625,36 @@ fts_optimize_remove_table(
mysql_mutex_unlock(&fts_optimize_wq->mutex);
}
+/** Send sync fts cache for the table.
+@param[in] table table to sync */
+void
+fts_optimize_request_sync_table(
+ dict_table_t* table)
+{
+ /* if the optimize system not yet initialized, return */
+ if (!fts_optimize_wq) {
+ return;
+ }
+
+ mysql_mutex_lock(&fts_optimize_wq->mutex);
+
+ /* FTS optimizer thread is already exited */
+ if (fts_opt_start_shutdown) {
+ ib::info() << "Try to sync table " << table->name
+ << " after FTS optimize thread exiting.";
+ } else if (table->fts->sync_message) {
+ /* If the table already has SYNC message in
+ fts_optimize_wq queue then ignore it */
+ } else {
+ add_msg(fts_optimize_create_msg(FTS_MSG_SYNC_TABLE, table));
+ table->fts->sync_message = true;
+ DBUG_EXECUTE_IF("fts_optimize_wq_count_check",
+ DBUG_ASSERT(fts_optimize_wq->length <= 1000););
+ }
+
+ mysql_mutex_unlock(&fts_optimize_wq->mutex);
+}
+
/** Add a table to fts_slots if it doesn't already exist. */
static bool fts_optimize_new_table(dict_table_t* table)
{
@@ -2765,8 +2796,7 @@ static void fts_optimize_sync_table(dict_table_t *table,
if (sync_table->fts && sync_table->fts->cache && sync_table->is_accessible())
{
- fts_sync_table(sync_table);
-
+ fts_sync_table(sync_table, false);
if (process_message)
{
mysql_mutex_lock(&fts_optimize_wq->mutex);
@@ -2866,6 +2896,24 @@ retry_later:
--n_tables;
}
break;
+
+ case FTS_MSG_SYNC_TABLE:
+ if (UNIV_UNLIKELY(wsrep_sst_disable_writes)) {
+ add_msg(msg);
+ goto retry_later;
+ }
+
+ DBUG_EXECUTE_IF(
+ "fts_instrument_msg_sync_sleep",
+ std::this_thread::sleep_for(
+ std::chrono::milliseconds(
+ 300)););
+
+ fts_optimize_sync_table(
+ static_cast<dict_table_t*>(msg->ptr),
+ true);
+ break;
+
default:
ut_error;
}
@@ -2998,7 +3046,7 @@ void fts_sync_during_ddl(dict_table_t* table)
if (!sync_message)
return;
- fts_sync_table(table);
+ fts_sync_table(table, false);
mysql_mutex_lock(&fts_optimize_wq->mutex);
table->fts->sync_message = false;
diff --git a/storage/innobase/gis/gis0rtree.cc b/storage/innobase/gis/gis0rtree.cc
index 60218a132c9..f75eab07cf3 100644
--- a/storage/innobase/gis/gis0rtree.cc
+++ b/storage/innobase/gis/gis0rtree.cc
@@ -1468,7 +1468,8 @@ rtr_ins_enlarge_mbr(
/* Check path info is not empty. */
ut_ad(!btr_cur->rtr_info->parent_path->empty());
- ut_ad(btr_cur->rtr_info->thr || !btr_cur->index()->is_committed());
+ ut_ad(btr_cur->rtr_info->thr || !btr_cur->index()->is_committed()
+ || btr_cur->index()->table->name.is_temporary());
/* Create a memory heap. */
heap = mem_heap_create(1024);
diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
index 211126765f8..6345435af97 100644
--- a/storage/innobase/handler/ha_innodb.cc
+++ b/storage/innobase/handler/ha_innodb.cc
@@ -526,6 +526,7 @@ mysql_pfs_key_t trx_pool_manager_mutex_key;
mysql_pfs_key_t lock_wait_mutex_key;
mysql_pfs_key_t trx_sys_mutex_key;
mysql_pfs_key_t srv_threads_mutex_key;
+mysql_pfs_key_t tpool_cache_mutex_key;
/* all_innodb_mutexes array contains mutexes that are
performance schema instrumented if "UNIV_PFS_MUTEX"
@@ -557,6 +558,7 @@ static PSI_mutex_info all_innodb_mutexes[] = {
PSI_KEY(rtr_match_mutex),
PSI_KEY(rtr_path_mutex),
PSI_KEY(trx_sys_mutex),
+ PSI_KEY(tpool_cache_mutex),
};
# endif /* UNIV_PFS_MUTEX */
@@ -1527,6 +1529,7 @@ static void innodb_drop_database(handlerton*, char *path)
mtr.commit();
for (pfs_os_file_t detached : to_close)
os_file_close(detached);
+
/* Any changes must be persisted before we return. */
log_write_up_to(mtr.commit_lsn(), true);
}
@@ -2036,7 +2039,7 @@ static void innodb_ddl_recovery_done(handlerton*)
{
ut_ad(!ddl_recovery_done);
ut_d(ddl_recovery_done= true);
- if (!srv_read_only_mode && srv_operation == SRV_OPERATION_NORMAL &&
+ if (!srv_read_only_mode && srv_operation <= SRV_OPERATION_EXPORT_RESTORED &&
srv_force_recovery < SRV_FORCE_NO_BACKGROUND)
{
if (srv_start_after_restore && !high_level_read_only)
@@ -8439,6 +8442,37 @@ wsrep_calc_row_hash(
return(0);
}
+
+/** Append table-level exclusive key.
+@param thd MySQL thread handle
+@param table table
+@retval false on success
+@retval true on failure */
+ATTRIBUTE_COLD bool wsrep_append_table_key(MYSQL_THD thd, const dict_table_t &table)
+{
+ char db_buf[NAME_LEN + 1];
+ char tbl_buf[NAME_LEN + 1];
+ ulint db_buf_len, tbl_buf_len;
+
+ if (!table.parse_name(db_buf, tbl_buf, &db_buf_len, &tbl_buf_len))
+ {
+ WSREP_ERROR("Parse_name for table key append failed: %s",
+ wsrep_thd_query(thd));
+ return true;
+ }
+
+ /* Append table-level exclusive key */
+ const int rcode = wsrep_thd_append_table_key(thd, db_buf,
+ tbl_buf, WSREP_SERVICE_KEY_EXCLUSIVE);
+ if (rcode)
+ {
+ WSREP_ERROR("Appending table key failed: %s, %d",
+ wsrep_thd_query(thd), rcode);
+ return true;
+ }
+
+ return false;
+}
#endif /* WITH_WSREP */
/**
@@ -8609,11 +8643,16 @@ func_exit:
&& !wsrep_thd_ignore_table(m_user_thd)) {
DBUG_PRINT("wsrep", ("update row key"));
- if (wsrep_append_keys(m_user_thd,
- wsrep_protocol_version >= 4
- ? WSREP_SERVICE_KEY_UPDATE
- : WSREP_SERVICE_KEY_EXCLUSIVE,
- old_row, new_row)){
+ /* We use table-level exclusive key for SEQUENCES
+ and normal key append for others. */
+ if (table->s->table_type == TABLE_TYPE_SEQUENCE) {
+ if (wsrep_append_table_key(m_user_thd, *m_prebuilt->table))
+ DBUG_RETURN(HA_ERR_INTERNAL_ERROR);
+ } else if (wsrep_append_keys(m_user_thd,
+ wsrep_protocol_version >= 4
+ ? WSREP_SERVICE_KEY_UPDATE
+ : WSREP_SERVICE_KEY_EXCLUSIVE,
+ old_row, new_row)) {
WSREP_DEBUG("WSREP: UPDATE_ROW_KEY FAILED");
DBUG_PRINT("wsrep", ("row key failed"));
DBUG_RETURN(HA_ERR_INTERNAL_ERROR);
@@ -17821,44 +17860,6 @@ exit:
return;
}
-/** Validate SET GLOBAL innodb_buffer_pool_filename.
-On Windows, file names with colon (:) are not allowed.
-@param thd connection
-@param save &srv_buf_dump_filename
-@param value new value to be validated
-@return 0 for valid name */
-static int innodb_srv_buf_dump_filename_validate(THD *thd, st_mysql_sys_var*,
- void *save,
- st_mysql_value *value)
-{
- char buff[OS_FILE_MAX_PATH];
- int len= sizeof buff;
-
- if (const char *buf_name= value->val_str(value, buff, &len))
- {
-#ifdef _WIN32
- if (!is_filename_allowed(buf_name, len, FALSE))
- {
- push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
- ER_WRONG_ARGUMENTS,
- "InnoDB: innodb_buffer_pool_filename "
- "cannot have colon (:) in the file name.");
- return 1;
- }
-#endif /* _WIN32 */
- if (buf_name == buff)
- {
- ut_ad(static_cast<size_t>(len) < sizeof buff);
- buf_name= thd_strmake(thd, buf_name, len);
- }
-
- *static_cast<const char**>(save)= buf_name;
- return 0;
- }
-
- return 1;
-}
-
#ifdef UNIV_DEBUG
static char* srv_buffer_pool_evict;
@@ -18975,9 +18976,9 @@ static MYSQL_SYSVAR_SIZE_T(buffer_pool_chunk_size, srv_buf_pool_chunk_unit,
0, 0, SIZE_T_MAX, 1024 * 1024);
static MYSQL_SYSVAR_STR(buffer_pool_filename, srv_buf_dump_filename,
- PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_MEMALLOC,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"Filename to/from which to dump/load the InnoDB buffer pool",
- innodb_srv_buf_dump_filename_validate, NULL, SRV_BUF_DUMP_FILENAME_DEFAULT);
+ NULL, NULL, SRV_BUF_DUMP_FILENAME_DEFAULT);
static MYSQL_SYSVAR_BOOL(buffer_pool_dump_now, innodb_buffer_pool_dump_now,
PLUGIN_VAR_RQCMDARG,
diff --git a/storage/innobase/handler/handler0alter.cc b/storage/innobase/handler/handler0alter.cc
index 5ce41a9857c..406baf3e662 100644
--- a/storage/innobase/handler/handler0alter.cc
+++ b/storage/innobase/handler/handler0alter.cc
@@ -11604,8 +11604,12 @@ foreign_fail:
ut_d(dict_table_check_for_dup_indexes(
ctx->new_table, CHECK_ABORTED_OK));
- ut_ad(!ctx->new_table->fts
- || fts_check_cached_index(ctx->new_table));
+#ifdef UNIV_DEBUG
+ if (!(ctx->new_table->fts != NULL
+ && ctx->new_table->fts->cache->sync->in_progress)) {
+ ut_a(fts_check_cached_index(ctx->new_table));
+ }
+#endif
}
unlock_and_close_files(deleted, trx);
diff --git a/storage/innobase/handler/i_s.cc b/storage/innobase/handler/i_s.cc
index 589182b73ba..2786c26271d 100644
--- a/storage/innobase/handler/i_s.cc
+++ b/storage/innobase/handler/i_s.cc
@@ -4923,10 +4923,10 @@ static ST_FIELD_INFO innodb_sysindex_fields_info[]=
Column("N_FIELDS", SLong(), NOT_NULL),
#define SYS_INDEX_PAGE_NO 5
- Column("PAGE_NO", SLong(), NOT_NULL),
+ Column("PAGE_NO", SLong(), NULLABLE),
#define SYS_INDEX_SPACE 6
- Column("SPACE", SLong(), NOT_NULL),
+ Column("SPACE", SLong(), NULLABLE),
#define SYS_INDEX_MERGE_THRESHOLD 7
Column("MERGE_THRESHOLD", SLong(), NOT_NULL),
@@ -4978,12 +4978,14 @@ i_s_dict_fill_sys_indexes(
if (index->page == FIL_NULL) {
fields[SYS_INDEX_PAGE_NO]->set_null();
} else {
+ fields[SYS_INDEX_PAGE_NO]->set_notnull();
OK(fields[SYS_INDEX_PAGE_NO]->store(index->page, true));
}
- if (space_id == ULINT_UNDEFINED) {
+ if (space_id == FIL_NULL) {
fields[SYS_INDEX_SPACE]->set_null();
} else {
+ fields[SYS_INDEX_SPACE]->set_notnull();
OK(fields[SYS_INDEX_SPACE]->store(space_id, true));
}
diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h
index 420d4a388e8..9ff65ce7c9c 100644
--- a/storage/innobase/include/buf0buf.h
+++ b/storage/innobase/include/buf0buf.h
@@ -1592,8 +1592,6 @@ public:
/** map of block->frame to buf_block_t blocks that belong
to buf_buddy_alloc(); protected by buf_pool.mutex */
hash_table_t zip_hash;
- /** number of pending read operations */
- Atomic_counter<ulint> n_pend_reads;
Atomic_counter<ulint>
n_pend_unzip; /*!< number of pending decompressions */
@@ -1620,7 +1618,7 @@ public:
/** flush_list size in bytes; protected by flush_list_mutex */
ulint flush_list_bytes;
/** possibly modified persistent pages (a subset of LRU);
- buf_dblwr.pending_writes() is approximately COUNT(is_write_fixed()) */
+ os_aio_pending_writes() is approximately COUNT(is_write_fixed()) */
UT_LIST_BASE_NODE_T(buf_page_t) flush_list;
/** number of blocks ever added to flush_list;
sometimes protected by flush_list_mutex */
@@ -1776,18 +1774,6 @@ public:
/** Reserve a buffer. */
buf_tmp_buffer_t *io_buf_reserve() { return io_buf.reserve(); }
- /** @return whether any I/O is pending */
- bool any_io_pending()
- {
- if (n_pend_reads)
- return true;
- mysql_mutex_lock(&flush_list_mutex);
- const bool any_pending= page_cleaner_status > PAGE_CLEANER_IDLE ||
- buf_dblwr.pending_writes();
- mysql_mutex_unlock(&flush_list_mutex);
- return any_pending;
- }
-
/** Remove a block from flush_list.
@param bpage buffer pool page */
void delete_from_flush_list(buf_page_t *bpage) noexcept;
diff --git a/storage/innobase/include/buf0dblwr.h b/storage/innobase/include/buf0dblwr.h
index d9c9239c0b4..9932b0e521f 100644
--- a/storage/innobase/include/buf0dblwr.h
+++ b/storage/innobase/include/buf0dblwr.h
@@ -66,16 +66,10 @@ class buf_dblwr_t
bool batch_running;
/** number of expected flush_buffered_writes_completed() calls */
unsigned flushing_buffered_writes;
- /** pages submitted to flush_buffered_writes() */
- ulint pages_submitted;
/** number of flush_buffered_writes_completed() calls */
ulint writes_completed;
/** number of pages written by flush_buffered_writes_completed() */
ulint pages_written;
- /** condition variable for !writes_pending */
- pthread_cond_t write_cond;
- /** number of pending page writes */
- size_t writes_pending;
slot slots[2];
slot *active_slot;
@@ -98,9 +92,6 @@ public:
/** Acquire the mutex */
void lock() { mysql_mutex_lock(&mutex); }
- /** @return the number of submitted page writes */
- ulint submitted() const
- { mysql_mutex_assert_owner(&mutex); return pages_submitted; }
/** @return the number of completed batches */
ulint batches() const
{ mysql_mutex_assert_owner(&mutex); return writes_completed; }
@@ -124,7 +115,7 @@ public:
void recover();
/** Update the doublewrite buffer on data page write completion. */
- void write_completed(bool with_doublewrite);
+ void write_completed();
/** Flush possible buffered writes to persistent storage.
It is very important to call this function after a batch of writes has been
posted, and also when we may have to wait for a page latch!
@@ -167,40 +158,6 @@ public:
my_cond_wait(&cond, &mutex.m_mutex);
mysql_mutex_unlock(&mutex);
}
-
- /** Register an unbuffered page write */
- void add_unbuffered()
- {
- mysql_mutex_lock(&mutex);
- writes_pending++;
- mysql_mutex_unlock(&mutex);
- }
-
- size_t pending_writes()
- {
- mysql_mutex_lock(&mutex);
- const size_t pending{writes_pending};
- mysql_mutex_unlock(&mutex);
- return pending;
- }
-
- /** Wait for writes_pending to reach 0 */
- void wait_for_page_writes()
- {
- mysql_mutex_lock(&mutex);
- while (writes_pending)
- my_cond_wait(&write_cond, &mutex.m_mutex);
- mysql_mutex_unlock(&mutex);
- }
-
- /** Wait for writes_pending to reach 0 */
- void wait_for_page_writes(const timespec &abstime)
- {
- mysql_mutex_lock(&mutex);
- while (writes_pending)
- my_cond_timedwait(&write_cond, &mutex.m_mutex, &abstime);
- mysql_mutex_unlock(&mutex);
- }
};
/** The doublewrite buffer */
diff --git a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h
index f53279ecb88..e4e0b2c9c74 100644
--- a/storage/innobase/include/fil0fil.h
+++ b/storage/innobase/include/fil0fil.h
@@ -597,8 +597,7 @@ private:
@return number of pending operations, possibly with NEEDS_FSYNC flag */
uint32_t set_closing()
{
- return n_pending.fetch_or(CLOSING, std::memory_order_acquire) &
- (PENDING | NEEDS_FSYNC);
+ return n_pending.fetch_or(CLOSING, std::memory_order_acquire);
}
public:
@@ -1470,6 +1469,7 @@ public:
if (space_list_last_opened == space)
{
+ ut_ad(s != space_list.begin());
space_list_t::iterator prev= s;
space_list_last_opened= &*--prev;
}
diff --git a/storage/innobase/include/fts0fts.h b/storage/innobase/include/fts0fts.h
index 720fe7f25b9..c0151b44063 100644
--- a/storage/innobase/include/fts0fts.h
+++ b/storage/innobase/include/fts0fts.h
@@ -648,6 +648,12 @@ fts_optimize_remove_table(
void
fts_optimize_shutdown();
+/** Send sync fts cache for the table.
+@param[in] table table to sync */
+void
+fts_optimize_request_sync_table(
+ dict_table_t* table);
+
/**********************************************************************//**
Take a FTS savepoint. */
void
@@ -702,8 +708,9 @@ fts_savepoint_rollback_last_stmt(
/** Run SYNC on the table, i.e., write out data from the cache to the
FTS auxiliary INDEX table and clear the cache at the end.
@param[in,out] table fts table
+@param[in] wait whether to wait for existing sync to finish
@return DB_SUCCESS on success, error code on failure. */
-dberr_t fts_sync_table(dict_table_t* table);
+dberr_t fts_sync_table(dict_table_t* table, bool wait = true);
/****************************************************************//**
Create an FTS index cache. */
diff --git a/storage/innobase/include/fts0types.h b/storage/innobase/include/fts0types.h
index 04e99d595c5..fb278d543c4 100644
--- a/storage/innobase/include/fts0types.h
+++ b/storage/innobase/include/fts0types.h
@@ -75,6 +75,7 @@ struct fts_index_cache_t {
que_t** ins_graph; /*!< Insert query graphs */
+ que_t** sel_graph; /*!< Select query graphs */
CHARSET_INFO* charset; /*!< charset */
};
@@ -86,7 +87,35 @@ struct fts_stopword_t {
CHARSET_INFO* charset; /*!< charset for stopword */
};
-struct fts_sync_t;
+/** The SYNC state of the cache. There is one instance of this struct
+associated with each ADD thread. */
+struct fts_sync_t {
+ trx_t* trx; /*!< The transaction used for SYNCing
+ the cache to disk */
+ dict_table_t* table; /*!< Table with FTS index(es) */
+ ulint max_cache_size; /*!< Max size in bytes of the cache */
+ ibool cache_full; /*!< flag, when true it indicates that
+ we need to sync the cache to disk */
+ ulint lower_index; /*!< the start index of the doc id
+ vector from where to start adding
+ documents to the FTS cache */
+ ulint upper_index; /*!< max index of the doc id vector to
+ add to the FTS cache */
+ ibool interrupted; /*!< TRUE if SYNC was interrupted */
+ doc_id_t min_doc_id; /*!< The smallest doc id added to the
+ cache. It should equal to
+ doc_ids[lower_index] */
+ doc_id_t max_doc_id; /*!< The doc id at which the cache was
+ noted as being full, we use this to
+ set the upper_limit field */
+ time_t start_time; /*!< SYNC start time; only used if
+ fts_enable_diag_print */
+ bool in_progress; /*!< flag whether sync is in progress.*/
+ bool unlock_cache; /*!< flag whether unlock cache when
+ write fts node */
+ /** condition variable for in_progress; used with table->fts->cache->lock */
+ pthread_cond_t cond;
+};
/** The cache for the FTS system. It is a memory-based inverted index
that new entries are added to, until it grows over the configured maximum
@@ -175,6 +204,7 @@ struct fts_node_t {
ulint ilist_size_alloc;
/*!< Allocated size of ilist in
bytes */
+ bool synced; /*!< flag whether the node is synced */
};
/** A tokenizer word. Contains information about one word. */
diff --git a/storage/innobase/include/ha_prototypes.h b/storage/innobase/include/ha_prototypes.h
index da9dec05827..d5239ec3f9a 100644
--- a/storage/innobase/include/ha_prototypes.h
+++ b/storage/innobase/include/ha_prototypes.h
@@ -462,5 +462,15 @@ void destroy_background_thd(MYSQL_THD thd);
void
innobase_reset_background_thd(MYSQL_THD);
+#ifdef WITH_WSREP
+/** Append table-level exclusive key.
+@param thd MySQL thread handle
+@param table table
+@retval false on success
+@retval true on failure */
+struct dict_table_t;
+bool wsrep_append_table_key(MYSQL_THD thd, const dict_table_t &table);
+#endif /* WITH_WSREP */
+
#endif /* !UNIV_INNOCHECKSUM */
#endif /* HA_INNODB_PROTOTYPES_H */
diff --git a/storage/innobase/include/log0log.h b/storage/innobase/include/log0log.h
index 8afa92abc93..cd596b4db46 100644
--- a/storage/innobase/include/log0log.h
+++ b/storage/innobase/include/log0log.h
@@ -382,7 +382,6 @@ public:
lsn_t get_lsn(std::memory_order order= std::memory_order_relaxed) const
{ return lsn.load(order); }
- void set_lsn(lsn_t lsn) { this->lsn.store(lsn, std::memory_order_release); }
lsn_t get_flushed_lsn(std::memory_order order= std::memory_order_acquire)
const noexcept
@@ -519,11 +518,10 @@ public:
/** Redo log system */
extern log_t log_sys;
-inline void log_free_check()
-{
- if (log_sys.check_flush_or_checkpoint())
- log_check_margins();
-}
+/** Wait for a log checkpoint if needed.
+NOTE that this function may only be called while not holding
+any synchronization objects except dict_sys.latch. */
+void log_free_check();
/** Release the latches that protect log resizing. */
void log_resize_release();
diff --git a/storage/innobase/include/mtr0mtr.h b/storage/innobase/include/mtr0mtr.h
index 5576560dca8..81f1f2b0dc9 100644
--- a/storage/innobase/include/mtr0mtr.h
+++ b/storage/innobase/include/mtr0mtr.h
@@ -93,10 +93,16 @@ struct mtr_t {
ATTRIBUTE_COLD void commit_shrink(fil_space_t &space);
/** Commit a mini-transaction that is deleting or renaming a file.
- @param space tablespace that is being renamed or deleted
- @param name new file name (nullptr=the file will be deleted)
+ @param space tablespace that is being renamed or deleted
+ @param name new file name (nullptr=the file will be deleted)
+ @param detached_handle if detached_handle != nullptr and if space is detached
+ during the function execution the file handle if its
+ node will be set to OS_FILE_CLOSED, and the previous
+ value of the file handle will be assigned to the
+ address, pointed by detached_handle.
@return whether the operation succeeded */
- ATTRIBUTE_COLD bool commit_file(fil_space_t &space, const char *name);
+ ATTRIBUTE_COLD bool commit_file(fil_space_t &space, const char *name,
+ pfs_os_file_t *detached_handle= nullptr);
/** Commit a mini-transaction that did not modify any pages,
but generated some redo log on a higher level, such as
@@ -326,7 +332,7 @@ public:
{
mtr_memo_slot_t &slot= m_memo[savepoint];
ut_ad(slot.type <= MTR_MEMO_BUF_FIX);
- ut_ad(type <= MTR_MEMO_BUF_FIX);
+ ut_ad(type < MTR_MEMO_S_LOCK);
slot.type= type;
}
diff --git a/storage/innobase/include/os0file.h b/storage/innobase/include/os0file.h
index c0e5f7683a9..13f9d3de3f8 100644
--- a/storage/innobase/include/os0file.h
+++ b/storage/innobase/include/os0file.h
@@ -933,13 +933,14 @@ os_file_flush_func(
/** Retrieves the last error number if an error occurs in a file io function.
The number should be retrieved before any other OS calls (because they may
overwrite the error number). If the number is not known to this program,
-the OS error number + 100 is returned.
-@param[in] report true if we want an error message printed
- for all errors
-@return error number, or OS error number + 100 */
-ulint
-os_file_get_last_error(
- bool report);
+the OS error number + OS_FILE_ERROR_MAX is returned.
+@param[in] report_all_errors true if we want an error message
+ printed of all errors
+@param[in] on_error_silent true then don't print any diagnostic
+ to the log
+@return error number, or OS error number + OS_FILE_ERROR_MAX */
+ulint os_file_get_last_error(bool report_all_errors,
+ bool on_error_silent= false);
/** NOTE! Use the corresponding macro os_file_read(), not directly this
function!
@@ -1048,11 +1049,20 @@ void os_aio_free();
@retval DB_IO_ERROR on I/O error */
dberr_t os_aio(const IORequest &type, void *buf, os_offset_t offset, size_t n);
-/** Wait until there are no pending asynchronous writes. */
-void os_aio_wait_until_no_pending_writes();
+/** @return number of pending reads */
+size_t os_aio_pending_reads();
+/** @return approximate number of pending reads */
+size_t os_aio_pending_reads_approx();
+/** @return number of pending writes */
+size_t os_aio_pending_writes();
-/** Wait until all pending asynchronous reads have completed. */
-void os_aio_wait_until_no_pending_reads();
+/** Wait until there are no pending asynchronous writes.
+@param declare whether the wait will be declared in tpool */
+void os_aio_wait_until_no_pending_writes(bool declare);
+
+/** Wait until all pending asynchronous reads have completed.
+@param declare whether the wait will be declared in tpool */
+void os_aio_wait_until_no_pending_reads(bool declare);
/** Prints info of the aio arrays.
@param[in/out] file file where to print */
diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
index 0c6aa8a94bb..6f0acbde975 100644
--- a/storage/innobase/include/srv0srv.h
+++ b/storage/innobase/include/srv0srv.h
@@ -317,6 +317,9 @@ extern my_bool srv_immediate_scrub_data_uncompressed;
enum srv_operation_mode {
/** Normal mode (MariaDB Server) */
SRV_OPERATION_NORMAL,
+ /** Mariabackup is executing server to export already restored
+ tablespaces */
+ SRV_OPERATION_EXPORT_RESTORED,
/** Mariabackup taking a backup */
SRV_OPERATION_BACKUP,
/** Mariabackup restoring a backup for subsequent --copy-back */
diff --git a/storage/innobase/include/trx0purge.h b/storage/innobase/include/trx0purge.h
index ac39d3ec45b..bf6f6eb8eff 100644
--- a/storage/innobase/include/trx0purge.h
+++ b/storage/innobase/include/trx0purge.h
@@ -41,10 +41,11 @@ void
trx_purge_add_undo_to_history(const trx_t* trx, trx_undo_t*& undo, mtr_t* mtr);
/**
Run a purge batch.
-@param n_tasks number of purge tasks to submit to the queue
-@param truncate whether to truncate the history at the end of the batch
+@param n_tasks number of purge tasks to submit to the queue
+@param history_size trx_sys.history_size()
+@param truncate whether to truncate the history at the end of the batch
@return number of undo log pages handled in the batch */
-ulint trx_purge(ulint n_tasks, bool truncate);
+ulint trx_purge(ulint n_tasks, ulint history_size, bool truncate);
/** Rollback segements from a given transaction with trx-no
scheduled for purge. */
diff --git a/storage/innobase/include/trx0rseg.h b/storage/innobase/include/trx0rseg.h
index 6e0eb547eb8..43e0c290d63 100644
--- a/storage/innobase/include/trx0rseg.h
+++ b/storage/innobase/include/trx0rseg.h
@@ -129,7 +129,8 @@ public:
#endif
}
/** @return whether the segment is marked for undo truncation */
- bool skip_allocation() const { return ref_load() & SKIP; }
+ bool skip_allocation() const
+ { return ref.load(std::memory_order_acquire) & SKIP; }
/** Increment the reference count */
void acquire()
{ ut_d(auto r=) ref.fetch_add(REF); ut_ad(!(r & SKIP)); }
diff --git a/storage/innobase/include/trx0undo.h b/storage/innobase/include/trx0undo.h
index 4728e7ef2bf..8adff09a3df 100644
--- a/storage/innobase/include/trx0undo.h
+++ b/storage/innobase/include/trx0undo.h
@@ -203,16 +203,18 @@ trx_undo_assign(trx_t* trx, dberr_t* err, mtr_t* mtr)
MY_ATTRIBUTE((nonnull));
/** Assign an undo log for a transaction.
A new undo log is created or a cached undo log reused.
+@tparam is_temp whether this is temporary undo log
@param[in,out] trx transaction
@param[in] rseg rollback segment
@param[out] undo the undo log
-@param[out] err error code
@param[in,out] mtr mini-transaction
+@param[out] err error code
@return the undo log block
-@retval NULL on error */
+@retval nullptr on error */
+template<bool is_temp>
buf_block_t*
-trx_undo_assign_low(trx_t* trx, trx_rseg_t* rseg, trx_undo_t** undo,
- dberr_t* err, mtr_t* mtr)
+trx_undo_assign_low(trx_t *trx, trx_rseg_t *rseg, trx_undo_t **undo,
+ mtr_t *mtr, dberr_t *err)
MY_ATTRIBUTE((nonnull, warn_unused_result));
/******************************************************************//**
Sets the state of the undo log segment at a transaction finish.
diff --git a/storage/innobase/log/log0log.cc b/storage/innobase/log/log0log.cc
index 209e1e60e2a..d763ef10734 100644
--- a/storage/innobase/log/log0log.cc
+++ b/storage/innobase/log/log0log.cc
@@ -1060,6 +1060,16 @@ ATTRIBUTE_COLD void log_check_margins()
while (log_sys.check_flush_or_checkpoint());
}
+/** Wait for a log checkpoint if needed.
+NOTE that this function may only be called while not holding
+any synchronization objects except dict_sys.latch. */
+void log_free_check()
+{
+ ut_ad(!lock_sys.is_writer());
+ if (log_sys.check_flush_or_checkpoint())
+ log_check_margins();
+}
+
extern void buf_resize_shutdown();
/** Make a checkpoint at the latest lsn on shutdown. */
diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc
index 3443369af6c..2d1c2a9419f 100644
--- a/storage/innobase/log/log0recv.cc
+++ b/storage/innobase/log/log0recv.cc
@@ -763,9 +763,10 @@ retry:
if (!os_file_status(name->c_str(), &exists, &ftype) || !exists)
goto processed;
}
- create(it, *name, static_cast<uint32_t>
- (1U << FSP_FLAGS_FCRC32_POS_MARKER |
- FSP_FLAGS_FCRC32_PAGE_SSIZE()), nullptr, 0);
+ if (create(it, *name, static_cast<uint32_t>
+ (1U << FSP_FLAGS_FCRC32_POS_MARKER |
+ FSP_FLAGS_FCRC32_PAGE_SSIZE()), nullptr, 0))
+ mysql_mutex_unlock(&fil_system.mutex);
}
}
else
@@ -794,7 +795,7 @@ processed:
@param flags FSP_SPACE_FLAGS
@param crypt_data encryption metadata
@param size tablespace size in pages
- @return tablespace
+ @return tablespace; the caller must release fil_system.mutex
@retval nullptr if crypt_data is invalid */
static fil_space_t *create(const recv_spaces_t::const_iterator &it,
const std::string &name, uint32_t flags,
@@ -806,6 +807,7 @@ processed:
ut_free(crypt_data);
return nullptr;
}
+ mysql_mutex_lock(&fil_system.mutex);
fil_space_t *space= fil_space_t::create(it->first, flags,
FIL_TYPE_TABLESPACE, crypt_data);
ut_ad(space);
@@ -866,12 +868,13 @@ processed:
space->free_limit= fsp_header_get_field(page, FSP_FREE_LIMIT);
space->free_len= flst_get_len(FSP_HEADER_OFFSET + FSP_FREE + page);
fil_node_t *node= UT_LIST_GET_FIRST(space->chain);
+ mysql_mutex_unlock(&fil_system.mutex);
if (!space->acquire())
- {
+ {
free_space:
fil_space_free(it->first, false);
goto next_item;
- }
+ }
if (os_file_write(IORequestWrite, node->name, node->handle,
page, 0, fil_space_t::physical_size(flags)) !=
DB_SUCCESS)
@@ -941,6 +944,7 @@ bool recv_sys_t::recover_deferred(recv_sys_t::map::iterator &p,
space->free_len= flst_get_len(FSP_HEADER_OFFSET + FSP_FREE + page);
fil_node_t *node= UT_LIST_GET_FIRST(space->chain);
node->deferred= true;
+ mysql_mutex_unlock(&fil_system.mutex);
if (!space->acquire())
goto release_and_fail;
fil_names_dirty(space);
@@ -964,8 +968,10 @@ bool recv_sys_t::recover_deferred(recv_sys_t::map::iterator &p,
uint32_t(file_size / fil_space_t::physical_size(flags));
if (n_pages > size)
{
+ mysql_mutex_lock(&fil_system.mutex);
space->size= node->size= n_pages;
space->set_committed_size();
+ mysql_mutex_unlock(&fil_system.mutex);
goto size_set;
}
}
@@ -1120,7 +1126,7 @@ static void fil_name_process(const char *name, ulint len, uint32_t space_id,
return;
}
- ut_ad(srv_operation == SRV_OPERATION_NORMAL
+ ut_ad(srv_operation <= SRV_OPERATION_EXPORT_RESTORED
|| srv_operation == SRV_OPERATION_RESTORE
|| srv_operation == SRV_OPERATION_RESTORE_EXPORT);
@@ -1239,8 +1245,9 @@ same_space:
ut_ad(space == NULL);
if (srv_force_recovery == 0) {
sql_print_error("InnoDB: Recovery cannot access"
- " file %s (tablespace "
- UINT32PF ")", name, space_id);
+ " file %.*s (tablespace "
+ UINT32PF ")", int(len), name,
+ space_id);
sql_print_information("InnoDB: You may set "
"innodb_force_recovery=1"
" to ignore this and"
@@ -1251,9 +1258,10 @@ same_space:
}
sql_print_warning("InnoDB: Ignoring changes to"
- " file %s (tablespace " UINT32PF ")"
+ " file %.*s (tablespace "
+ UINT32PF ")"
" due to innodb_force_recovery",
- name, space_id);
+ int(len), name, space_id);
}
}
}
@@ -3324,7 +3332,7 @@ static void log_sort_flush_list()
@param last_batch whether it is possible to write more redo log */
void recv_sys_t::apply(bool last_batch)
{
- ut_ad(srv_operation == SRV_OPERATION_NORMAL ||
+ ut_ad(srv_operation <= SRV_OPERATION_EXPORT_RESTORED ||
srv_operation == SRV_OPERATION_RESTORE ||
srv_operation == SRV_OPERATION_RESTORE_EXPORT);
@@ -3475,7 +3483,7 @@ next_free_block:
for (;;)
{
const bool empty= pages.empty();
- if (empty && !buf_pool.n_pend_reads)
+ if (empty && !os_aio_pending_reads())
break;
if (!is_corrupt_fs() && !is_corrupt_log())
@@ -3487,8 +3495,7 @@ next_free_block:
else
{
mysql_mutex_unlock(&mutex);
- os_aio_wait_until_no_pending_reads();
- ut_ad(!buf_pool.n_pend_reads);
+ os_aio_wait_until_no_pending_reads(false);
mysql_mutex_lock(&mutex);
ut_ad(pages.empty());
}
@@ -4040,7 +4047,7 @@ dberr_t recv_recovery_from_checkpoint_start()
{
bool rescan = false;
- ut_ad(srv_operation == SRV_OPERATION_NORMAL
+ ut_ad(srv_operation <= SRV_OPERATION_EXPORT_RESTORED
|| srv_operation == SRV_OPERATION_RESTORE
|| srv_operation == SRV_OPERATION_RESTORE_EXPORT);
ut_d(mysql_mutex_lock(&buf_pool.flush_list_mutex));
@@ -4157,7 +4164,7 @@ read_only_recovery:
rescan = true;
}
- if (srv_operation == SRV_OPERATION_NORMAL) {
+ if (srv_operation <= SRV_OPERATION_EXPORT_RESTORED) {
deferred_spaces.deferred_dblwr();
buf_dblwr.recover();
}
@@ -4220,7 +4227,7 @@ err_exit:
}
log_sys.buf_free = recv_sys.offset;
if (recv_needed_recovery
- && srv_operation == SRV_OPERATION_NORMAL) {
+ && srv_operation <= SRV_OPERATION_EXPORT_RESTORED) {
/* Write a FILE_CHECKPOINT marker as the first thing,
before generating any other redo log. This ensures
that subsequent crash recovery will be possible even
diff --git a/storage/innobase/mtr/mtr0mtr.cc b/storage/innobase/mtr/mtr0mtr.cc
index 6d31a55e8ed..fe90487ebec 100644
--- a/storage/innobase/mtr/mtr0mtr.cc
+++ b/storage/innobase/mtr/mtr0mtr.cc
@@ -594,8 +594,14 @@ void mtr_t::commit_shrink(fil_space_t &space)
/** Commit a mini-transaction that is deleting or renaming a file.
@param space tablespace that is being renamed or deleted
@param name new file name (nullptr=the file will be deleted)
+@param detached_handle if detached_handle != nullptr and if space is detached
+ during the function execution the file handle if its
+ node will be set to OS_FILE_CLOSED, and the previous
+ value of the file handle will be assigned to the
+ address, pointed by detached_handle.
@return whether the operation succeeded */
-bool mtr_t::commit_file(fil_space_t &space, const char *name)
+bool mtr_t::commit_file(fil_space_t &space, const char *name,
+ pfs_os_file_t *detached_handle)
{
ut_ad(is_active());
ut_ad(!high_level_read_only);
@@ -683,7 +689,9 @@ bool mtr_t::commit_file(fil_space_t &space, const char *name)
ut_ad(!space.referenced());
ut_ad(space.is_stopping());
- fil_system.detach(&space, true);
+ pfs_os_file_t handle = fil_system.detach(&space, true);
+ if (detached_handle)
+ *detached_handle = handle;
mysql_mutex_unlock(&fil_system.mutex);
success= true;
diff --git a/storage/innobase/os/os0file.cc b/storage/innobase/os/os0file.cc
index 98bf4fdb8ca..08f525be314 100644
--- a/storage/innobase/os/os0file.cc
+++ b/storage/innobase/os/os0file.cc
@@ -106,9 +106,9 @@ public:
}
/* Wait for completions of all AIO operations */
- void wait(std::unique_lock<std::mutex> &lk)
+ void wait(mysql_mutex_t &m)
{
- m_cache.wait(lk);
+ m_cache.wait(m);
}
void wait()
@@ -131,14 +131,13 @@ public:
wait();
}
- std::mutex& mutex()
+ mysql_mutex_t& mutex()
{
return m_cache.mutex();
}
- void resize(int max_submitted_io, int max_callback_concurrency, std::unique_lock<std::mutex> &lk)
+ void resize(int max_submitted_io, int max_callback_concurrency)
{
- ut_a(lk.owns_lock());
m_cache.resize(max_submitted_io);
m_group.set_max_tasks(max_callback_concurrency);
m_max_aio = max_submitted_io;
@@ -751,22 +750,16 @@ os_file_punch_hole_posix(
return(DB_IO_NO_PUNCH_HOLE);
}
-
-
/** Retrieves the last error number if an error occurs in a file io function.
The number should be retrieved before any other OS calls (because they may
overwrite the error number). If the number is not known to this program,
the OS error number + 100 is returned.
@param[in] report_all_errors true if we want an error message
- printed of all errors
+ printed of all errors
@param[in] on_error_silent true then don't print any diagnostic
to the log
@return error number, or OS error number + 100 */
-static
-ulint
-os_file_get_last_error_low(
- bool report_all_errors,
- bool on_error_silent)
+ulint os_file_get_last_error(bool report_all_errors, bool on_error_silent)
{
int err = errno;
@@ -1801,16 +1794,13 @@ bool os_file_flush_func(os_file_t file)
The number should be retrieved before any other OS calls (because they may
overwrite the error number). If the number is not known to this program,
then OS error number + OS_FILE_ERROR_MAX is returned.
-@param[in] report_all_errors true if we want an error message printed
- of all errors
+@param[in] report_all_errors true if we want an error message
+printed of all errors
@param[in] on_error_silent true then don't print any diagnostic
to the log
@return error number, or OS error number + OS_FILE_ERROR_MAX */
-static
-ulint
-os_file_get_last_error_low(
- bool report_all_errors,
- bool on_error_silent)
+ulint os_file_get_last_error(bool report_all_errors, bool on_error_silent)
+
{
ulint err = (ulint) GetLastError();
@@ -2928,20 +2918,6 @@ os_file_read_func(
return err ? err : DB_IO_ERROR;
}
-/** Retrieves the last error number if an error occurs in a file io function.
-The number should be retrieved before any other OS calls (because they may
-overwrite the error number). If the number is not known to this program,
-the OS error number + 100 is returned.
-@param[in] report_all_errors true if we want an error printed
- for all errors
-@return error number, or OS error number + 100 */
-ulint
-os_file_get_last_error(
- bool report_all_errors)
-{
- return(os_file_get_last_error_low(report_all_errors, false));
-}
-
/** Handle errors for file operations.
@param[in] name name of a file or NULL
@param[in] operation operation
@@ -2958,7 +2934,7 @@ os_file_handle_error_cond_exit(
{
ulint err;
- err = os_file_get_last_error_low(false, on_error_silent);
+ err = os_file_get_last_error(false, on_error_silent);
switch (err) {
case OS_FILE_DISK_FULL:
@@ -3447,9 +3423,8 @@ static void io_callback(tpool::aiocb *cb)
else
{
ut_ad(write_slots->contains(cb));
- const IORequest req{request};
+ fil_aio_callback(request);
write_slots->release(cb);
- fil_aio_callback(req);
}
}
@@ -3647,8 +3622,9 @@ more concurrent threads via thread_group setting.
int os_aio_resize(ulint n_reader_threads, ulint n_writer_threads)
{
/* Lock the slots, and wait until all current IOs finish.*/
- std::unique_lock<std::mutex> lk_read(read_slots->mutex());
- std::unique_lock<std::mutex> lk_write(write_slots->mutex());
+ auto &lk_read= read_slots->mutex(), &lk_write= write_slots->mutex();
+ mysql_mutex_lock(&lk_read);
+ mysql_mutex_lock(&lk_write);
read_slots->wait(lk_read);
write_slots->wait(lk_write);
@@ -3661,20 +3637,25 @@ int os_aio_resize(ulint n_reader_threads, ulint n_writer_threads)
/** Do the Linux AIO dance (this will try to create a new
io context with changed max_events ,etc*/
- if (int ret= srv_thread_pool->reconfigure_aio(srv_use_native_aio, events))
+ int ret= srv_thread_pool->reconfigure_aio(srv_use_native_aio, events);
+
+ if (ret)
{
/** Do the best effort. We can't change the parallel io number,
but we still can adjust the number of concurrent completion handlers.*/
read_slots->task_group().set_max_tasks(static_cast<int>(n_reader_threads));
write_slots->task_group().set_max_tasks(static_cast<int>(n_writer_threads));
- return ret;
+ }
+ else
+ {
+ /* Allocation succeeded, resize the slots*/
+ read_slots->resize(max_read_events, static_cast<int>(n_reader_threads));
+ write_slots->resize(max_write_events, static_cast<int>(n_writer_threads));
}
- /* Allocation succeeded, resize the slots*/
- read_slots->resize(max_read_events, static_cast<int>(n_reader_threads), lk_read);
- write_slots->resize(max_write_events, static_cast<int>(n_writer_threads), lk_write);
-
- return 0;
+ mysql_mutex_unlock(&lk_read);
+ mysql_mutex_unlock(&lk_write);
+ return ret;
}
void os_aio_free()
@@ -3687,9 +3668,9 @@ void os_aio_free()
}
/** Wait until there are no pending asynchronous writes. */
-static void os_aio_wait_until_no_pending_writes_low()
+static void os_aio_wait_until_no_pending_writes_low(bool declare)
{
- bool notify_wait = write_slots->pending_io_count() > 0;
+ const bool notify_wait= declare && write_slots->pending_io_count();
if (notify_wait)
tpool::tpool_wait_begin();
@@ -3700,17 +3681,43 @@ static void os_aio_wait_until_no_pending_writes_low()
tpool::tpool_wait_end();
}
-/** Wait until there are no pending asynchronous writes. */
-void os_aio_wait_until_no_pending_writes()
+/** Wait until there are no pending asynchronous writes.
+@param declare whether the wait will be declared in tpool */
+void os_aio_wait_until_no_pending_writes(bool declare)
{
- os_aio_wait_until_no_pending_writes_low();
+ os_aio_wait_until_no_pending_writes_low(declare);
buf_dblwr.wait_flush_buffered_writes();
}
-/** Wait until all pending asynchronous reads have completed. */
-void os_aio_wait_until_no_pending_reads()
+/** @return number of pending reads */
+size_t os_aio_pending_reads()
+{
+ mysql_mutex_lock(&read_slots->mutex());
+ size_t pending= read_slots->pending_io_count();
+ mysql_mutex_unlock(&read_slots->mutex());
+ return pending;
+}
+
+/** @return approximate number of pending reads */
+size_t os_aio_pending_reads_approx()
+{
+ return read_slots->pending_io_count();
+}
+
+/** @return number of pending writes */
+size_t os_aio_pending_writes()
+{
+ mysql_mutex_lock(&write_slots->mutex());
+ size_t pending= write_slots->pending_io_count();
+ mysql_mutex_unlock(&write_slots->mutex());
+ return pending;
+}
+
+/** Wait until all pending asynchronous reads have completed.
+@param declare whether the wait will be declared in tpool */
+void os_aio_wait_until_no_pending_reads(bool declare)
{
- const auto notify_wait= read_slots->pending_io_count();
+ const bool notify_wait= declare && read_slots->pending_io_count();
if (notify_wait)
tpool::tpool_wait_begin();
diff --git a/storage/innobase/page/page0zip.cc b/storage/innobase/page/page0zip.cc
index c7ad8399304..830a4bef9d9 100644
--- a/storage/innobase/page/page0zip.cc
+++ b/storage/innobase/page/page0zip.cc
@@ -3367,7 +3367,7 @@ page_zip_validate_low(
differed. Let us ignore it. */
page_zip_fail(("page_zip_validate:"
" min_rec_flag"
- " (%s" ULINTPF "," ULINTPF
+ " (%s" UINT32PF "," UINT32PF
",0x%02x)\n",
sloppy ? "ignored, " : "",
page_get_space_id(page),
@@ -3412,7 +3412,8 @@ page_zip_validate_low(
page + PAGE_NEW_INFIMUM, TRUE);
trec = page_rec_get_next_low(
temp_page + PAGE_NEW_INFIMUM, TRUE);
- const ulint n_core = page_is_leaf(page) ? index->n_fields : 0;
+ const ulint n_core = (index && page_is_leaf(page))
+ ? index->n_fields : 0;
do {
if (page_offset(rec) != page_offset(trec)) {
diff --git a/storage/innobase/rem/rem0cmp.cc b/storage/innobase/rem/rem0cmp.cc
index e48cad01530..3620363ff2e 100644
--- a/storage/innobase/rem/rem0cmp.cc
+++ b/storage/innobase/rem/rem0cmp.cc
@@ -300,8 +300,9 @@ int cmp_data(ulint mtype, ulint prtype, bool descending,
DBUG_ASSERT(is_strnncoll_compatible(prtype & DATA_MYSQL_TYPE_MASK));
if (CHARSET_INFO *cs= all_charsets[dtype_get_charset_coll(prtype)])
{
- cmp= cs->coll->strnncollsp_nchars(cs, data1, len1, data2, len2,
- std::max(len1, len2));
+ cmp= cs->coll->
+ strnncollsp_nchars(cs, data1, len1, data2, len2, std::max(len1, len2),
+ MY_STRNNCOLLSP_NCHARS_EMULATE_TRIMMED_TRAILING_SPACES);
goto func_exit;
}
goto no_collation;
diff --git a/storage/innobase/rem/rem0rec.cc b/storage/innobase/rem/rem0rec.cc
index f489669b408..3f8e48763ea 100644
--- a/storage/innobase/rem/rem0rec.cc
+++ b/storage/innobase/rem/rem0rec.cc
@@ -242,9 +242,9 @@ enum rec_leaf_format {
REC_LEAF_INSTANT
};
-#if defined __GNUC__ && !defined __clang__ && __GNUC__ < 11
+#if defined __GNUC__ && !defined __clang__ && __GNUC__ < 12
# pragma GCC diagnostic push
-# pragma GCC diagnostic ignored "-Wconversion" /* GCC 5 to 10 need this */
+# pragma GCC diagnostic ignored "-Wconversion" /* GCC 5 to 11 need this */
#endif
/** Determine the offset to each field in a leaf-page record
in ROW_FORMAT=COMPACT,DYNAMIC,COMPRESSED.
@@ -291,7 +291,9 @@ rec_init_offsets_comp_ordinary(
!= n_core)
? UT_BITS_IN_BYTES(unsigned(index->get_n_nullable(n_core)))
: (redundant_temp
- ? UT_BITS_IN_BYTES(index->n_nullable)
+ ? (index->is_instant()
+ ? UT_BITS_IN_BYTES(index->get_n_nullable(n_core))
+ : UT_BITS_IN_BYTES(index->n_nullable))
: index->n_core_null_bytes);
if (mblob) {
@@ -448,7 +450,7 @@ start:
continue;
}
- len = offs += len;
+ len = offs += static_cast<rec_offs>(len);
} else {
len = offs += field->fixed_len;
}
diff --git a/storage/innobase/row/row0import.cc b/storage/innobase/row/row0import.cc
index 5d7ea475d43..eaf987d115d 100644
--- a/storage/innobase/row/row0import.cc
+++ b/storage/innobase/row/row0import.cc
@@ -3853,6 +3853,7 @@ page_corrupted:
block->page.zip.data = src;
frame_changed = true;
} else if (!page_compressed
+ && type != FIL_PAGE_TYPE_XDES
&& !block->page.zip.data) {
block->page.frame = src;
frame_changed = true;
diff --git a/storage/innobase/row/row0ins.cc b/storage/innobase/row/row0ins.cc
index 24fb6eb39ce..eca839fa7be 100644
--- a/storage/innobase/row/row0ins.cc
+++ b/storage/innobase/row/row0ins.cc
@@ -48,6 +48,7 @@ Created 4/20/1996 Heikki Tuuri
#ifdef WITH_WSREP
#include <wsrep.h>
#include <mysql/service_wsrep.h>
+#include "ha_prototypes.h"
#endif /* WITH_WSREP */
/*************************************************************************
@@ -2580,42 +2581,6 @@ but GCC 4.8.5 does not support pop_options. */
# pragma GCC optimize ("O0")
#endif
-#ifdef WITH_WSREP
-/** Start bulk insert operation for Galera by appending
-table-level exclusive key for bulk insert.
-@param trx transaction
-@param index index
-@retval false on success
-@retval true on failure */
-ATTRIBUTE_COLD static bool row_ins_wsrep_start_bulk(trx_t *trx, const dict_index_t &index)
-{
- char db_buf[NAME_LEN + 1];
- char tbl_buf[NAME_LEN + 1];
- ulint db_buf_len, tbl_buf_len;
-
- if (!index.table->parse_name(db_buf, tbl_buf, &db_buf_len, &tbl_buf_len))
- {
- WSREP_ERROR("Parse_name for bulk insert failed: %s",
- wsrep_thd_query(trx->mysql_thd));
- trx->error_state = DB_ROLLBACK;
- return true;
- }
-
- /* Append table-level exclusive key for bulk insert. */
- const int rcode = wsrep_thd_append_table_key(trx->mysql_thd, db_buf,
- tbl_buf, WSREP_SERVICE_KEY_EXCLUSIVE);
- if (rcode)
- {
- WSREP_ERROR("Appending table key for bulk insert failed: %s, %d",
- wsrep_thd_query(trx->mysql_thd), rcode);
- trx->error_state = DB_ROLLBACK;
- return true;
- }
-
- return false;
-}
-#endif
-
/***************************************************************//**
Tries to insert an entry into a clustered index, ignoring foreign key
constraints. If a record with the same unique key is found, the other
@@ -2780,10 +2745,13 @@ avoid_bulk:
#ifdef WITH_WSREP
if (trx->is_wsrep())
{
- if (!wsrep_thd_is_local_transaction(trx->mysql_thd))
- goto skip_bulk_insert;
- if (row_ins_wsrep_start_bulk(trx, *index))
- goto err_exit;
+ if (!wsrep_thd_is_local_transaction(trx->mysql_thd))
+ goto skip_bulk_insert;
+ if (wsrep_append_table_key(trx->mysql_thd, *index->table))
+ {
+ trx->error_state = DB_ROLLBACK;
+ goto err_exit;
+ }
}
#endif /* WITH_WSREP */
diff --git a/storage/innobase/row/row0merge.cc b/storage/innobase/row/row0merge.cc
index a82bbc7cde6..b35783f1a92 100644
--- a/storage/innobase/row/row0merge.cc
+++ b/storage/innobase/row/row0merge.cc
@@ -758,11 +758,6 @@ error:
row_field, field, col->len,
old_table->space->zip_size(),
conv_heap);
- } else {
- /* Field length mismatch should not
- happen when rebuilding redundant row
- format table. */
- ut_ad(index->table->not_redundant());
}
}
}
diff --git a/storage/innobase/row/row0mysql.cc b/storage/innobase/row/row0mysql.cc
index 81879431096..27f48c1a4de 100644
--- a/storage/innobase/row/row0mysql.cc
+++ b/storage/innobase/row/row0mysql.cc
@@ -66,17 +66,23 @@ Created 9/17/2000 Heikki Tuuri
#include <thread>
-/*******************************************************************//**
-Delays an INSERT, DELETE or UPDATE operation if the purge is lagging. */
-static
-void
-row_mysql_delay_if_needed(void)
-/*===========================*/
+/** Delay an INSERT, DELETE or UPDATE operation if the purge is lagging. */
+static void row_mysql_delay_if_needed()
{
- if (srv_dml_needed_delay) {
- std::this_thread::sleep_for(
- std::chrono::microseconds(srv_dml_needed_delay));
- }
+ const auto delay= srv_dml_needed_delay;
+ if (UNIV_UNLIKELY(delay != 0))
+ {
+ /* Adjust for purge_coordinator_state::refresh() */
+ log_sys.latch.rd_lock(SRW_LOCK_CALL);
+ const lsn_t last= log_sys.last_checkpoint_lsn,
+ max_age= log_sys.max_checkpoint_age;
+ log_sys.latch.rd_unlock();
+ const lsn_t lsn= log_sys.get_lsn();
+ if ((lsn - last) / 4 >= max_age / 5)
+ buf_flush_ahead(last + max_age / 5, false);
+ srv_wake_purge_thread_if_not_active();
+ std::this_thread::sleep_for(std::chrono::microseconds(delay));
+ }
}
/*******************************************************************//**
diff --git a/storage/innobase/row/row0purge.cc b/storage/innobase/row/row0purge.cc
index 0a2647e8d6d..f716625ea59 100644
--- a/storage/innobase/row/row0purge.cc
+++ b/storage/innobase/row/row0purge.cc
@@ -162,8 +162,9 @@ close_and_exit:
ut_ad("corrupted SYS_INDEXES record" == 0);
}
- if (const uint32_t space_id = dict_drop_index_tree(
- &node->pcur, nullptr, &mtr)) {
+ const uint32_t space_id = dict_drop_index_tree(
+ &node->pcur, nullptr, &mtr);
+ if (space_id) {
if (table) {
if (table->get_ref_count() == 0) {
dict_sys.remove(table);
diff --git a/storage/innobase/row/row0quiesce.cc b/storage/innobase/row/row0quiesce.cc
index 059aee6f140..5b0676a3e0d 100644
--- a/storage/innobase/row/row0quiesce.cc
+++ b/storage/innobase/row/row0quiesce.cc
@@ -540,7 +540,7 @@ row_quiesce_table_start(
if (!trx_is_interrupted(trx)) {
/* Ensure that all asynchronous IO is completed. */
- os_aio_wait_until_no_pending_writes();
+ os_aio_wait_until_no_pending_writes(true);
table->space->flush<false>();
if (row_quiesce_write_cfg(table, trx->mysql_thd)
diff --git a/storage/innobase/row/row0uins.cc b/storage/innobase/row/row0uins.cc
index 6b4393d4113..f0f1ba74f26 100644
--- a/storage/innobase/row/row0uins.cc
+++ b/storage/innobase/row/row0uins.cc
@@ -146,8 +146,9 @@ restart:
pfs_os_file_t d = OS_FILE_CLOSED;
- if (const uint32_t space_id = dict_drop_index_tree(
- &node->pcur, node->trx, &mtr)) {
+ const uint32_t space_id = dict_drop_index_tree(
+ &node->pcur, node->trx, &mtr);
+ if (space_id) {
if (table) {
lock_release_on_rollback(node->trx,
table);
diff --git a/storage/innobase/row/row0vers.cc b/storage/innobase/row/row0vers.cc
index a4b52fd2a2f..9b11b7fbc3f 100644
--- a/storage/innobase/row/row0vers.cc
+++ b/storage/innobase/row/row0vers.cc
@@ -859,6 +859,30 @@ static bool dtuple_coll_eq(const dtuple_t &tuple1, const dtuple_t &tuple2)
return true;
}
+/** Find out whether data tuple has missing data type
+for indexed virtual column.
+@param tuple data tuple
+@param index virtual index
+@return true if tuple has missing column type */
+static bool dtuple_vcol_data_missing(const dtuple_t &tuple,
+ dict_index_t *index)
+{
+ for (ulint i= 0; i < index->n_uniq; i++)
+ {
+ dict_col_t *col= index->fields[i].col;
+ if (!col->is_virtual())
+ continue;
+ dict_v_col_t *vcol= reinterpret_cast<dict_v_col_t*>(col);
+ for (ulint j= 0; j < index->table->n_v_cols; j++)
+ {
+ if (vcol == &index->table->v_cols[j]
+ && tuple.v_fields[j].type.mtype == DATA_MISSING)
+ return true;
+ }
+ }
+ return false;
+}
+
/** Finds out if a version of the record, where the version >= the current
purge_sys.view, should have ientry as its secondary index entry. We check
if there is any not delete marked version of the record where the trx
@@ -1068,6 +1092,9 @@ unsafe_to_purge:
if (dict_index_has_virtual(index)) {
if (vrow) {
+ if (dtuple_vcol_data_missing(*vrow, index)) {
+ goto nochange_index;
+ }
/* Keep the virtual row info for the next
version, unless it is changed */
mem_heap_empty(v_heap);
@@ -1078,6 +1105,7 @@ unsafe_to_purge:
if (!cur_vrow) {
/* Nothing for this index has changed,
continue */
+nochange_index:
version = prev_version;
continue;
}
diff --git a/storage/innobase/srv/srv0mon.cc b/storage/innobase/srv/srv0mon.cc
index 987d3d185d9..d4761a080b0 100644
--- a/storage/innobase/srv/srv0mon.cc
+++ b/storage/innobase/srv/srv0mon.cc
@@ -647,16 +647,18 @@ static monitor_info_t innodb_counter_info[] =
{"trx_rseg_history_len", "transaction",
"Length of the TRX_RSEG_HISTORY list",
static_cast<monitor_type_t>(
- MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT | MONITOR_DEFAULT_ON),
+ MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT),
MONITOR_DEFAULT_START, MONITOR_RSEG_HISTORY_LEN},
{"trx_undo_slots_used", "transaction", "Number of undo slots used",
- MONITOR_NONE,
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT),
MONITOR_DEFAULT_START, MONITOR_NUM_UNDO_SLOT_USED},
{"trx_undo_slots_cached", "transaction",
"Number of undo slots cached",
- MONITOR_NONE,
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT | MONITOR_DEFAULT_ON),
MONITOR_DEFAULT_START, MONITOR_NUM_UNDO_SLOT_CACHED},
{"trx_rseg_current_size", "transaction",
@@ -693,7 +695,8 @@ static monitor_info_t innodb_counter_info[] =
{"purge_dml_delay_usec", "purge",
"Microseconds DML to be delayed due to purge lagging",
- MONITOR_DISPLAY_CURRENT,
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT),
MONITOR_DEFAULT_START, MONITOR_DML_PURGE_DELAY},
{"purge_stop_count", "purge",
@@ -1211,6 +1214,24 @@ TPOOL_SUPPRESS_TSAN static ulint srv_mon_get_rseg_size()
return size;
}
+/** @return number of used undo log slots */
+TPOOL_SUPPRESS_TSAN static ulint srv_mon_get_rseg_used()
+{
+ ulint size= 0;
+ for (const auto &rseg : trx_sys.rseg_array)
+ size+= UT_LIST_GET_LEN(rseg.undo_list);
+ return size;
+}
+
+/** @return number of cached undo log slots */
+TPOOL_SUPPRESS_TSAN static ulint srv_mon_get_rseg_cached()
+{
+ ulint size= 0;
+ for (const auto &rseg : trx_sys.rseg_array)
+ size+= UT_LIST_GET_LEN(rseg.undo_cached);
+ return size;
+}
+
/****************************************************************//**
This function consolidates some existing server counters used
by "system status variables". These existing system variables do not have
@@ -1448,7 +1469,15 @@ srv_mon_process_existing_counter(
case MONITOR_RSEG_CUR_SIZE:
value = srv_mon_get_rseg_size();
break;
-
+ case MONITOR_DML_PURGE_DELAY:
+ value = srv_max_purge_lag_delay;
+ break;
+ case MONITOR_NUM_UNDO_SLOT_USED:
+ value = srv_mon_get_rseg_used();
+ break;
+ case MONITOR_NUM_UNDO_SLOT_CACHED:
+ value = srv_mon_get_rseg_cached();
+ break;
case MONITOR_OVLD_N_FILE_OPENED:
value = fil_system.n_open;
break;
@@ -1527,7 +1556,6 @@ srv_mon_process_existing_counter(
case MONITOR_TIMEOUT:
value = lock_sys.timeouts;
break;
-
default:
ut_error;
}
diff --git a/storage/innobase/srv/srv0srv.cc b/storage/innobase/srv/srv0srv.cc
index edc19126558..a86a2337813 100644
--- a/storage/innobase/srv/srv0srv.cc
+++ b/storage/innobase/srv/srv0srv.cc
@@ -882,12 +882,13 @@ srv_export_innodb_status(void)
export_vars.innodb_data_writes = os_n_file_writes;
buf_dblwr.lock();
- ulint dblwr = buf_dblwr.submitted();
- export_vars.innodb_dblwr_pages_written = buf_dblwr.written();
+ ulint dblwr = buf_dblwr.written();
+ export_vars.innodb_dblwr_pages_written = dblwr;
export_vars.innodb_dblwr_writes = buf_dblwr.batches();
buf_dblwr.unlock();
- export_vars.innodb_data_written = srv_stats.data_written + dblwr;
+ export_vars.innodb_data_written = srv_stats.data_written
+ + (dblwr << srv_page_size_shift);
export_vars.innodb_buffer_pool_bytes_data =
buf_pool.stat.LRU_bytes
@@ -1468,27 +1469,32 @@ fewer_threads:
m_history_length= history_size;
- if (history_size &&
- trx_purge(n_use_threads,
- !(++count % srv_purge_rseg_truncate_frequency) ||
- purge_sys.truncate.current ||
- (srv_shutdown_state != SRV_SHUTDOWN_NONE &&
- srv_fast_shutdown == 0)))
+ if (!history_size)
+ srv_dml_needed_delay= 0;
+ else if (trx_purge(n_use_threads, history_size,
+ !(++count % srv_purge_rseg_truncate_frequency) ||
+ purge_sys.truncate.current ||
+ (srv_shutdown_state != SRV_SHUTDOWN_NONE &&
+ srv_fast_shutdown == 0)))
continue;
- if (m_running == sigcount)
+ if (srv_dml_needed_delay);
+ else if (m_running == sigcount)
{
/* Purge was not woken up by srv_wake_purge_thread_if_not_active() */
/* The magic number 5000 is an approximation for the case where we have
cached undo log records which prevent truncate of rollback segments. */
- wakeup= history_size &&
- (history_size >= 5000 ||
- history_size != trx_sys.history_size_approx());
+ wakeup= history_size >= 5000 ||
+ (history_size && history_size != trx_sys.history_size_approx());
break;
}
- else if (!trx_sys.history_exists())
+
+ if (!trx_sys.history_exists())
+ {
+ srv_dml_needed_delay= 0;
break;
+ }
if (!srv_purge_should_exit())
goto loop;
diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc
index 0a96a4fbaab..12643dd1768 100644
--- a/storage/innobase/srv/srv0start.cc
+++ b/storage/innobase/srv/srv0start.cc
@@ -188,7 +188,11 @@ static dberr_t create_log_file(bool create_new_db, lsn_t lsn)
ib_logfile0 in log_t::rename_resized(). */
delete_log_files();
- DBUG_ASSERT(!buf_pool.any_io_pending());
+ ut_ad(!os_aio_pending_reads());
+ ut_ad(!os_aio_pending_writes());
+ ut_d(mysql_mutex_lock(&buf_pool.flush_list_mutex));
+ ut_ad(!buf_pool.get_oldest_modification(0));
+ ut_d(mysql_mutex_unlock(&buf_pool.flush_list_mutex));
log_sys.latch.wr_lock(SRW_LOCK_CALL);
log_sys.set_capacity();
@@ -687,14 +691,12 @@ err_exit:
fil_set_max_space_id_if_bigger(space_id);
+ mysql_mutex_lock(&fil_system.mutex);
fil_space_t *space= fil_space_t::create(space_id, fsp_flags,
FIL_TYPE_TABLESPACE, nullptr,
FIL_ENCRYPTION_DEFAULT, true);
- ut_a(fil_validate());
- ut_a(space);
-
+ ut_ad(space);
fil_node_t *file= space->add(name, fh, 0, false, true);
- mysql_mutex_lock(&fil_system.mutex);
if (create)
{
@@ -838,7 +840,7 @@ dberr_t srv_undo_tablespaces_init(bool create_new_undo, mtr_t *mtr)
ut_ad(!create_new_undo || mtr);
ut_a(srv_undo_tablespaces <= TRX_SYS_N_RSEGS);
- ut_a(!create_new_undo || srv_operation == SRV_OPERATION_NORMAL);
+ ut_a(!create_new_undo || srv_operation <= SRV_OPERATION_EXPORT_RESTORED);
if (srv_undo_tablespaces == 1)
srv_undo_tablespaces= 0;
@@ -1092,7 +1094,11 @@ same_size:
log_write_up_to(flushed_lsn, false);
ut_ad(flushed_lsn == log_sys.get_lsn());
- ut_ad(!buf_pool.any_io_pending());
+ ut_ad(!os_aio_pending_reads());
+ ut_d(mysql_mutex_lock(&buf_pool.flush_list_mutex));
+ ut_ad(!buf_pool.get_oldest_modification(0));
+ ut_d(mysql_mutex_unlock(&buf_pool.flush_list_mutex));
+ ut_d(os_aio_wait_until_no_pending_writes(false));
DBUG_RETURN(flushed_lsn);
}
@@ -1108,7 +1114,11 @@ ATTRIBUTE_COLD static dberr_t srv_log_rebuild()
/* Prohibit redo log writes from any other threads until creating a
log checkpoint at the end of create_log_file(). */
ut_d(recv_no_log_write= true);
- DBUG_ASSERT(!buf_pool.any_io_pending());
+ ut_ad(!os_aio_pending_reads());
+ ut_ad(!os_aio_pending_writes());
+ ut_d(mysql_mutex_lock(&buf_pool.flush_list_mutex));
+ ut_ad(!buf_pool.get_oldest_modification(0));
+ ut_d(mysql_mutex_unlock(&buf_pool.flush_list_mutex));
/* Close the redo log file, so that we can replace it */
log_sys.close_file();
@@ -1171,7 +1181,7 @@ dberr_t srv_start(bool create_new_db)
dberr_t err = DB_SUCCESS;
mtr_t mtr;
- ut_ad(srv_operation == SRV_OPERATION_NORMAL
+ ut_ad(srv_operation <= SRV_OPERATION_RESTORE_EXPORT
|| srv_operation == SRV_OPERATION_RESTORE
|| srv_operation == SRV_OPERATION_RESTORE_EXPORT);
@@ -1507,7 +1517,8 @@ dberr_t srv_start(bool create_new_db)
bool must_upgrade_ibuf = false;
switch (srv_operation) {
- case SRV_OPERATION_NORMAL:
+ case SRV_OPERATION_NORMAL:
+ case SRV_OPERATION_EXPORT_RESTORED:
case SRV_OPERATION_RESTORE_EXPORT:
if (err != DB_SUCCESS) {
break;
@@ -1664,7 +1675,7 @@ dberr_t srv_start(bool create_new_db)
}
}
- if (srv_operation != SRV_OPERATION_NORMAL) {
+ if (srv_operation > SRV_OPERATION_EXPORT_RESTORED) {
ut_ad(srv_operation == SRV_OPERATION_RESTORE_EXPORT
|| srv_operation == SRV_OPERATION_RESTORE);
return(err);
@@ -1839,7 +1850,8 @@ skip_monitors:
return(srv_init_abort(err));
}
- if (!srv_read_only_mode && srv_operation == SRV_OPERATION_NORMAL) {
+ if (!srv_read_only_mode
+ && srv_operation <= SRV_OPERATION_EXPORT_RESTORED) {
/* Initialize the innodb_temporary tablespace and keep
it open until shutdown. */
err = srv_open_tmp_tablespace(create_new_db);
@@ -1919,7 +1931,7 @@ void innodb_preshutdown()
if (srv_read_only_mode)
return;
- if (!srv_fast_shutdown && srv_operation == SRV_OPERATION_NORMAL)
+ if (!srv_fast_shutdown && srv_operation <= SRV_OPERATION_EXPORT_RESTORED)
{
if (trx_sys.is_initialised())
while (trx_sys.any_active_transactions())
@@ -1955,6 +1967,7 @@ void innodb_shutdown()
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
break;
case SRV_OPERATION_NORMAL:
+ case SRV_OPERATION_EXPORT_RESTORED:
/* Shut down the persistent files. */
logs_empty_and_mark_files_at_shutdown();
}
diff --git a/storage/innobase/trx/trx0purge.cc b/storage/innobase/trx/trx0purge.cc
index 841b014019b..f5795c9c128 100644
--- a/storage/innobase/trx/trx0purge.cc
+++ b/storage/innobase/trx/trx0purge.cc
@@ -274,13 +274,11 @@ trx_purge_add_undo_to_history(const trx_t* trx, trx_undo_t*& undo, mtr_t* mtr)
if (undo->state != TRX_UNDO_CACHED) {
/* The undo log segment will not be reused */
ut_a(undo->id < TRX_RSEG_N_SLOTS);
- compile_time_assert(FIL_NULL == 0xffffffff);
+ static_assert(FIL_NULL == 0xffffffff, "");
mtr->memset(rseg_header,
TRX_RSEG + TRX_RSEG_UNDO_SLOTS
+ undo->id * TRX_RSEG_SLOT_SIZE, 4, 0xff);
- MONITOR_DEC(MONITOR_NUM_UNDO_SLOT_USED);
-
uint32_t hist_size = mach_read_from_4(
TRX_RSEG_HISTORY_SIZE + TRX_RSEG
+ rseg_header->page.frame);
@@ -362,7 +360,6 @@ trx_purge_add_undo_to_history(const trx_t* trx, trx_undo_t*& undo, mtr_t* mtr)
if (undo->state == TRX_UNDO_CACHED) {
UT_LIST_ADD_FIRST(rseg->undo_cached, undo);
- MONITOR_INC(MONITOR_NUM_UNDO_SLOT_CACHED);
} else {
ut_ad(undo->state == TRX_UNDO_TO_PURGE);
ut_free(undo);
@@ -384,161 +381,168 @@ static dberr_t trx_purge_remove_log_hdr(buf_block_t *rseg, buf_block_t* log,
uint16_t(offset + TRX_UNDO_HISTORY_NODE), mtr);
}
-MY_ATTRIBUTE((nonnull, warn_unused_result))
-/** Free an undo log segment, and remove the header from the history list.
-@param[in,out] mtr mini-transaction
-@param[in,out] rseg rollback segment
-@param[in] hdr_addr file address of log_hdr
-@return error code */
-static dberr_t
-trx_purge_free_segment(mtr_t &mtr, trx_rseg_t* rseg, fil_addr_t hdr_addr)
+/** Free an undo log segment.
+@param block rollback segment header page
+@param mtr mini-transaction */
+static void trx_purge_free_segment(buf_block_t *block, mtr_t &mtr)
{
- mtr.commit();
- log_free_check();
- mtr.start();
-
- const page_id_t hdr_page_id{rseg->space->id, hdr_addr.page};
- dberr_t err;
- buf_block_t *rseg_hdr= rseg->get(&mtr, &err);
- if (!rseg_hdr)
- return err;
- buf_block_t *block= buf_page_get_gen(hdr_page_id, 0, RW_X_LATCH,
- nullptr, BUF_GET_POSSIBLY_FREED,
- &mtr, &err);
- if (!block)
- return err;
-
- const uint32_t seg_size=
- flst_get_len(TRX_UNDO_SEG_HDR + TRX_UNDO_PAGE_LIST + block->page.frame);
-
- err= trx_purge_remove_log_hdr(rseg_hdr, block, hdr_addr.boffset, &mtr);
- if (UNIV_UNLIKELY(err != DB_SUCCESS))
- return err;
-
- ut_ad(rseg->curr_size >= seg_size);
- rseg->curr_size-= seg_size;
- rseg->history_size--;
-
- byte *hist= TRX_RSEG + TRX_RSEG_HISTORY_SIZE + rseg_hdr->page.frame;
- ut_ad(mach_read_from_4(hist) >= seg_size);
- mtr.write<4>(*rseg_hdr, hist, mach_read_from_4(hist) - seg_size);
-
while (!fseg_free_step_not_header(TRX_UNDO_SEG_HDR + TRX_UNDO_FSEG_HEADER +
block->page.frame, &mtr))
{
block->fix();
+ const page_id_t id{block->page.id()};
mtr.commit();
/* NOTE: If the server is killed after the log that was produced
up to this point was written, and before the log from the mtr.commit()
in our caller is written, then the pages belonging to the
undo log will become unaccessible garbage.
- This does not matters when using multiple innodb_undo_tablespaces;
+ This does not matter when using multiple innodb_undo_tablespaces;
innodb_undo_log_truncate=ON will be able to reclaim the space. */
log_free_check();
mtr.start();
block->page.lock.x_lock();
- mtr.memo_push(block, MTR_MEMO_PAGE_X_MODIFY);
+ if (UNIV_UNLIKELY(block->page.id() != id))
+ {
+ block->unfix();
+ block->page.lock.x_unlock();
+ block= buf_page_get_gen(id, 0, RW_X_LATCH, nullptr, BUF_GET, &mtr);
+ if (!block)
+ return;
+ }
+ else
+ mtr.memo_push(block, MTR_MEMO_PAGE_X_MODIFY);
}
while (!fseg_free_step(TRX_UNDO_SEG_HDR + TRX_UNDO_FSEG_HEADER +
block->page.frame, &mtr));
- return DB_SUCCESS;
}
/** Remove unnecessary history data from a rollback segment.
@param[in,out] rseg rollback segment
@param[in] limit truncate anything before this
@return error code */
-static
-dberr_t
-trx_purge_truncate_rseg_history(
- trx_rseg_t& rseg,
- const purge_sys_t::iterator& limit)
+static dberr_t
+trx_purge_truncate_rseg_history(trx_rseg_t& rseg,
+ const purge_sys_t::iterator& limit)
{
- fil_addr_t hdr_addr;
- mtr_t mtr;
+ fil_addr_t hdr_addr;
+ mtr_t mtr;
- mtr.start();
+ log_free_check();
+ mtr.start();
- dberr_t err;
- buf_block_t* rseg_hdr = rseg.get(&mtr, &err);
- if (!rseg_hdr) {
- goto func_exit;
- }
+ dberr_t err;
+reget:
+ buf_block_t *rseg_hdr= rseg.get(&mtr, &err);
+ if (!rseg_hdr)
+ {
+func_exit:
+ mtr.commit();
+ return err;
+ }
- hdr_addr = flst_get_last(TRX_RSEG + TRX_RSEG_HISTORY
- + rseg_hdr->page.frame);
- hdr_addr.boffset = static_cast<uint16_t>(hdr_addr.boffset
- - TRX_UNDO_HISTORY_NODE);
+ hdr_addr= flst_get_last(TRX_RSEG + TRX_RSEG_HISTORY + rseg_hdr->page.frame);
+ hdr_addr.boffset= static_cast<uint16_t>(hdr_addr.boffset -
+ TRX_UNDO_HISTORY_NODE);
loop:
- if (hdr_addr.page == FIL_NULL) {
-func_exit:
- mtr.commit();
- return err;
- }
+ if (hdr_addr.page == FIL_NULL)
+ goto func_exit;
- buf_block_t* block = buf_page_get_gen(page_id_t(rseg.space->id,
- hdr_addr.page),
- 0, RW_X_LATCH, nullptr,
- BUF_GET_POSSIBLY_FREED,
- &mtr, &err);
- if (!block) {
- goto func_exit;
- }
+ buf_block_t *b=
+ buf_page_get_gen(page_id_t(rseg.space->id, hdr_addr.page),
+ 0, RW_X_LATCH, nullptr, BUF_GET_POSSIBLY_FREED,
+ &mtr, &err);
+ if (!b)
+ goto func_exit;
- const trx_id_t undo_trx_no = mach_read_from_8(
- block->page.frame + hdr_addr.boffset + TRX_UNDO_TRX_NO);
+ const trx_id_t undo_trx_no=
+ mach_read_from_8(b->page.frame + hdr_addr.boffset + TRX_UNDO_TRX_NO);
- if (undo_trx_no >= limit.trx_no) {
- if (undo_trx_no == limit.trx_no) {
- err = trx_undo_truncate_start(
- &rseg, hdr_addr.page,
- hdr_addr.boffset, limit.undo_no);
- }
+ if (undo_trx_no >= limit.trx_no)
+ {
+ if (undo_trx_no == limit.trx_no)
+ err = trx_undo_truncate_start(&rseg, hdr_addr.page,
+ hdr_addr.boffset, limit.undo_no);
+ goto func_exit;
+ }
- goto func_exit;
- }
+ fil_addr_t prev_hdr_addr=
+ flst_get_prev_addr(b->page.frame + hdr_addr.boffset +
+ TRX_UNDO_HISTORY_NODE);
+ prev_hdr_addr.boffset= static_cast<uint16_t>(prev_hdr_addr.boffset -
+ TRX_UNDO_HISTORY_NODE);
+ err= trx_purge_remove_log_hdr(rseg_hdr, b, hdr_addr.boffset, &mtr);
+ if (UNIV_UNLIKELY(err != DB_SUCCESS))
+ goto func_exit;
- fil_addr_t prev_hdr_addr = flst_get_prev_addr(
- block->page.frame + hdr_addr.boffset + TRX_UNDO_HISTORY_NODE);
- prev_hdr_addr.boffset = static_cast<uint16_t>(prev_hdr_addr.boffset
- - TRX_UNDO_HISTORY_NODE);
-
- if (!rseg.is_referenced()
- && rseg.needs_purge <= (purge_sys.head.trx_no
- ? purge_sys.head.trx_no
- : purge_sys.tail.trx_no)
- && mach_read_from_2(TRX_UNDO_SEG_HDR + TRX_UNDO_STATE
- + block->page.frame)
- == TRX_UNDO_TO_PURGE
- && !mach_read_from_2(block->page.frame + hdr_addr.boffset
- + TRX_UNDO_NEXT_LOG)) {
- /* We can free the whole log segment.
- This will call trx_purge_remove_log_hdr(). */
- err = trx_purge_free_segment(mtr, &rseg, hdr_addr);
- } else {
- /* Remove the log hdr from the rseg history. */
- rseg.history_size--;
- err = trx_purge_remove_log_hdr(rseg_hdr, block,
- hdr_addr.boffset, &mtr);
- }
+ rseg_hdr->fix();
- mtr.commit();
- if (err != DB_SUCCESS) {
- return err;
- }
- mtr.start();
+ if (mach_read_from_2(b->page.frame + hdr_addr.boffset + TRX_UNDO_NEXT_LOG) ||
+ rseg.is_referenced() ||
+ rseg.needs_purge > (purge_sys.head.trx_no
+ ? purge_sys.head.trx_no
+ : purge_sys.tail.trx_no))
+ /* We cannot free the entire undo page. */;
+ else
+ {
+ const uint32_t seg_size=
+ flst_get_len(TRX_UNDO_SEG_HDR + TRX_UNDO_PAGE_LIST + b->page.frame);
+ switch (mach_read_from_2(TRX_UNDO_SEG_HDR + TRX_UNDO_STATE +
+ b->page.frame)) {
+ case TRX_UNDO_TO_PURGE:
+ {
+ byte *hist= TRX_RSEG + TRX_RSEG_HISTORY_SIZE + rseg_hdr->page.frame;
+ ut_ad(mach_read_from_4(hist) >= seg_size);
+ mtr.write<4>(*rseg_hdr, hist, mach_read_from_4(hist) - seg_size);
+ }
+ free_segment:
+ ut_ad(rseg.curr_size >= seg_size);
+ rseg.curr_size-= seg_size;
+ trx_purge_free_segment(b, mtr);
+ break;
+ case TRX_UNDO_CACHED:
+ /* rseg.undo_cached must point to this page */
+ trx_undo_t *undo= UT_LIST_GET_FIRST(rseg.undo_cached);
+ for (; undo; undo= UT_LIST_GET_NEXT(undo_list, undo))
+ if (undo->hdr_page_no == hdr_addr.page)
+ goto found_cached;
+ ut_ad("inconsistent undo logs" == 0);
+ break;
+ found_cached:
+ UT_LIST_REMOVE(rseg.undo_cached, undo);
+ static_assert(FIL_NULL == 0xffffffff, "");
+ if (UNIV_UNLIKELY(mach_read_from_4(TRX_RSEG + TRX_RSEG_FORMAT +
+ rseg_hdr->page.frame)))
+ trx_rseg_format_upgrade(rseg_hdr, &mtr);
+ mtr.memset(rseg_hdr, TRX_RSEG + TRX_RSEG_UNDO_SLOTS +
+ undo->id * TRX_RSEG_SLOT_SIZE, 4, 0xff);
+ ut_free(undo);
+ mtr.write<8,mtr_t::MAYBE_NOP>(*rseg_hdr, TRX_RSEG + TRX_RSEG_MAX_TRX_ID +
+ rseg_hdr->page.frame,
+ trx_sys.get_max_trx_id() - 1);
+ goto free_segment;
+ }
+ }
- hdr_addr = prev_hdr_addr;
+ hdr_addr= prev_hdr_addr;
- rseg_hdr = rseg.get(&mtr, &err);
- if (!rseg_hdr) {
- goto func_exit;
- }
+ mtr.commit();
+ ut_ad(rseg.history_size > 0);
+ rseg.history_size--;
+ log_free_check();
+ mtr.start();
+ rseg_hdr->page.lock.x_lock();
+ if (UNIV_UNLIKELY(rseg_hdr->page.id() != rseg.page_id()))
+ {
+ rseg_hdr->unfix();
+ rseg_hdr->page.lock.x_unlock();
+ goto reget;
+ }
+ mtr.memo_push(rseg_hdr, MTR_MEMO_PAGE_X_MODIFY);
- goto loop;
+ goto loop;
}
/** Cleanse purge queue to remove the rseg that reside in undo-tablespace
@@ -1234,43 +1238,6 @@ trx_purge_attach_undo_recs(ulint n_purge_threads)
return(n_pages_handled);
}
-/*******************************************************************//**
-Calculate the DML delay required.
-@return delay in microseconds or ULINT_MAX */
-static
-ulint
-trx_purge_dml_delay(void)
-/*=====================*/
-{
- /* Determine how much data manipulation language (DML) statements
- need to be delayed in order to reduce the lagging of the purge
- thread. */
- ulint delay = 0; /* in microseconds; default: no delay */
-
- /* If purge lag is set then calculate the new DML delay. */
-
- if (srv_max_purge_lag > 0) {
- double ratio = static_cast<double>(trx_sys.history_size()) /
- static_cast<double>(srv_max_purge_lag);
-
- if (ratio > 1.0) {
- /* If the history list length exceeds the
- srv_max_purge_lag, the data manipulation
- statements are delayed by at least 5000
- microseconds. */
- delay = (ulint) ((ratio - .5) * 10000);
- }
-
- if (delay > srv_max_purge_lag_delay) {
- delay = srv_max_purge_lag_delay;
- }
-
- MONITOR_SET(MONITOR_DML_PURGE_DELAY, delay);
- }
-
- return(delay);
-}
-
extern tpool::waitable_task purge_worker_task;
/** Wait for pending purge jobs to complete. */
@@ -1314,18 +1281,18 @@ TRANSACTIONAL_INLINE void purge_sys_t::clone_end_view()
/**
Run a purge batch.
-@param n_tasks number of purge tasks to submit to the queue
-@param truncate whether to truncate the history at the end of the batch
+@param n_tasks number of purge tasks to submit to the queue
+@param history_size trx_sys.history_size()
+@param truncate whether to truncate the history at the end of the batch
@return number of undo log pages handled in the batch */
-TRANSACTIONAL_TARGET ulint trx_purge(ulint n_tasks, bool truncate)
+TRANSACTIONAL_TARGET
+ulint trx_purge(ulint n_tasks, ulint history_size, bool truncate)
{
que_thr_t* thr = NULL;
ulint n_pages_handled;
ut_ad(n_tasks > 0);
- srv_dml_needed_delay = trx_purge_dml_delay();
-
purge_sys.clone_oldest_view();
#ifdef UNIV_DEBUG
@@ -1337,6 +1304,24 @@ TRANSACTIONAL_TARGET ulint trx_purge(ulint n_tasks, bool truncate)
/* Fetch the UNDO recs that need to be purged. */
n_pages_handled = trx_purge_attach_undo_recs(n_tasks);
+ {
+ ulint delay = n_pages_handled ? srv_max_purge_lag : 0;
+ if (UNIV_UNLIKELY(delay)) {
+ if (delay >= history_size) {
+ no_throttle:
+ delay = 0;
+ } else if (const ulint max_delay =
+ srv_max_purge_lag_delay) {
+ delay = std::min(max_delay,
+ 10000 * history_size / delay
+ - 5000);
+ } else {
+ goto no_throttle;
+ }
+ }
+ srv_dml_needed_delay = delay;
+ }
+
/* Submit tasks to workers queue if using multi-threaded purge. */
for (ulint i = n_tasks; --i; ) {
thr = que_fork_scheduler_round_robin(purge_sys.query, thr);
diff --git a/storage/innobase/trx/trx0rec.cc b/storage/innobase/trx/trx0rec.cc
index e70516a2d2d..c1a7b08b717 100644
--- a/storage/innobase/trx/trx0rec.cc
+++ b/storage/innobase/trx/trx0rec.cc
@@ -1875,26 +1875,28 @@ trx_undo_report_row_operation(
}
mtr_t mtr;
+ dberr_t err;
mtr.start();
trx_undo_t** pundo;
trx_rseg_t* rseg;
const bool is_temp = index->table->is_temporary();
+ buf_block_t* undo_block;
if (is_temp) {
mtr.set_log_mode(MTR_LOG_NO_REDO);
-
rseg = trx->get_temp_rseg();
pundo = &trx->rsegs.m_noredo.undo;
+ undo_block = trx_undo_assign_low<true>(trx, rseg, pundo,
+ &mtr, &err);
} else {
ut_ad(!trx->read_only);
ut_ad(trx->id);
pundo = &trx->rsegs.m_redo.undo;
rseg = trx->rsegs.m_redo.rseg;
+ undo_block = trx_undo_assign_low<false>(trx, rseg, pundo,
+ &mtr, &err);
}
- dberr_t err;
- buf_block_t* undo_block = trx_undo_assign_low(trx, rseg, pundo,
- &err, &mtr);
trx_undo_t* undo = *pundo;
ut_ad((err == DB_SUCCESS) == (undo_block != NULL));
if (UNIV_UNLIKELY(undo_block == NULL)) {
diff --git a/storage/innobase/trx/trx0rseg.cc b/storage/innobase/trx/trx0rseg.cc
index 1dc3c18fc09..e730530adb2 100644
--- a/storage/innobase/trx/trx0rseg.cc
+++ b/storage/innobase/trx/trx0rseg.cc
@@ -395,7 +395,6 @@ void trx_rseg_t::reinit(uint32_t page)
{
next= UT_LIST_GET_NEXT(undo_list, undo);
UT_LIST_REMOVE(undo_cached, undo);
- MONITOR_DEC(MONITOR_NUM_UNDO_SLOT_CACHED);
ut_free(undo);
}
@@ -404,6 +403,7 @@ void trx_rseg_t::reinit(uint32_t page)
last_commit_and_offset= 0;
last_page_no= FIL_NULL;
curr_size= 1;
+ ref.store(0, std::memory_order_release);
}
/** Read the undo log lists.
@@ -428,7 +428,6 @@ static dberr_t trx_undo_lists_init(trx_rseg_t *rseg,
if (is_undo_empty)
is_undo_empty= !undo->size || undo->state == TRX_UNDO_CACHED;
rseg->curr_size+= undo->size;
- MONITOR_INC(MONITOR_NUM_UNDO_SLOT_USED);
}
}
diff --git a/storage/innobase/trx/trx0trx.cc b/storage/innobase/trx/trx0trx.cc
index e88f7824ba6..0f85aec540f 100644
--- a/storage/innobase/trx/trx0trx.cc
+++ b/storage/innobase/trx/trx0trx.cc
@@ -816,28 +816,17 @@ static void trx_assign_rseg_low(trx_t *trx)
static Atomic_counter<unsigned> rseg_slot;
unsigned slot = rseg_slot++ % TRX_SYS_N_RSEGS;
ut_d(if (trx_rseg_n_slots_debug) slot = 0);
+ ut_d(const auto start_scan_slot = slot);
+ ut_d(bool look_for_rollover = false);
trx_rseg_t* rseg;
-#ifdef UNIV_DEBUG
- ulint start_scan_slot = slot;
- bool look_for_rollover = false;
-#endif /* UNIV_DEBUG */
-
bool allocated;
do {
for (;;) {
rseg = &trx_sys.rseg_array[slot];
-
-#ifdef UNIV_DEBUG
- /* Ensure that we are not revisiting the same
- slot that we have already inspected. */
- if (look_for_rollover) {
- ut_ad(start_scan_slot != slot);
- }
- look_for_rollover = true;
-#endif /* UNIV_DEBUG */
-
+ ut_ad(!look_for_rollover || start_scan_slot != slot);
+ ut_d(look_for_rollover = true);
ut_d(if (!trx_rseg_n_slots_debug))
slot = (slot + 1) % TRX_SYS_N_RSEGS;
@@ -1038,7 +1027,13 @@ trx_write_serialisation_history(
mtr_t temp_mtr;
temp_mtr.start();
temp_mtr.set_log_mode(MTR_LOG_NO_REDO);
- trx_undo_set_state_at_finish(undo, &temp_mtr);
+ buf_block_t* block= buf_page_get(page_id_t(SRV_TMP_SPACE_ID,
+ undo->hdr_page_no),
+ 0, RW_X_LATCH, mtr);
+ ut_a(block);
+ temp_mtr.write<2>(*block, TRX_UNDO_SEG_HDR + TRX_UNDO_STATE
+ + block->page.frame, TRX_UNDO_TO_PURGE);
+ undo->state = TRX_UNDO_TO_PURGE;
temp_mtr.commit();
}
diff --git a/storage/innobase/trx/trx0undo.cc b/storage/innobase/trx/trx0undo.cc
index 3b9c598e745..cbbf316fe69 100644
--- a/storage/innobase/trx/trx0undo.cc
+++ b/storage/innobase/trx/trx0undo.cc
@@ -535,8 +535,6 @@ trx_undo_seg_create(fil_space_t *space, buf_block_t *rseg_hdr, ulint *id,
+ slot_no * TRX_RSEG_SLOT_SIZE + rseg_hdr->page.frame,
block->page.id().page_no());
- MONITOR_INC(MONITOR_NUM_UNDO_SLOT_USED);
-
*err = DB_SUCCESS;
return block;
}
@@ -991,7 +989,6 @@ static void trx_undo_seg_free(const trx_undo_t *undo)
static_assert(FIL_NULL == 0xffffffff, "compatibility");
mtr.memset(rseg_header, TRX_RSEG + TRX_RSEG_UNDO_SLOTS +
undo->id * TRX_RSEG_SLOT_SIZE, 4, 0xff);
- MONITOR_DEC(MONITOR_NUM_UNDO_SLOT_USED);
}
}
@@ -1150,7 +1147,6 @@ corrupted_type:
UT_LIST_ADD_LAST(rseg->undo_list, undo);
} else {
UT_LIST_ADD_LAST(rseg->undo_cached, undo);
- MONITOR_INC(MONITOR_NUM_UNDO_SLOT_CACHED);
}
mtr.commit();
@@ -1289,27 +1285,25 @@ trx_undo_create(trx_t* trx, trx_rseg_t* rseg, trx_undo_t** undo,
@param[in,out] rseg rollback segment
@param[out] pundo the undo log memory object
@param[in,out] mtr mini-transaction
+@param[out] err error code
@return the undo log block
@retval NULL if none cached */
static
buf_block_t*
trx_undo_reuse_cached(trx_t* trx, trx_rseg_t* rseg, trx_undo_t** pundo,
- mtr_t* mtr)
+ mtr_t* mtr, dberr_t *err)
{
- if (rseg->is_persistent()) {
- ut_ad(rseg->is_referenced());
- if (rseg->needs_purge <= trx->id) {
- /* trx_purge_truncate_history() compares
- rseg->needs_purge <= head.trx_no
- so we need to compensate for that.
- The rseg->needs_purge after crash
- recovery would be at least trx->id + 1,
- because that is the minimum possible value
- assigned by trx_serialise() on commit. */
- rseg->needs_purge = trx->id + 1;
- }
- } else {
- ut_ad(!rseg->is_referenced());
+ ut_ad(rseg->is_persistent());
+ ut_ad(rseg->is_referenced());
+ if (rseg->needs_purge <= trx->id) {
+ /* trx_purge_truncate_history() compares
+ rseg->needs_purge <= head.trx_no
+ so we need to compensate for that.
+ The rseg->needs_purge after crash
+ recovery would be at least trx->id + 1,
+ because that is the minimum possible value
+ assigned by trx_serialise() on commit. */
+ rseg->needs_purge = trx->id + 1;
}
trx_undo_t* undo = UT_LIST_GET_FIRST(rseg->undo_cached);
@@ -1320,15 +1314,15 @@ trx_undo_reuse_cached(trx_t* trx, trx_rseg_t* rseg, trx_undo_t** pundo,
ut_ad(undo->size == 1);
ut_ad(undo->id < TRX_RSEG_N_SLOTS);
- buf_block_t* block = buf_page_get(page_id_t(undo->rseg->space->id,
- undo->hdr_page_no),
- 0, RW_X_LATCH, mtr);
+ buf_block_t* block = buf_page_get_gen(page_id_t(undo->rseg->space->id,
+ undo->hdr_page_no),
+ 0, RW_X_LATCH, nullptr, BUF_GET,
+ mtr, err);
if (!block) {
return NULL;
}
UT_LIST_REMOVE(rseg->undo_cached, undo);
- MONITOR_DEC(MONITOR_NUM_UNDO_SLOT_CACHED);
*pundo = undo;
@@ -1374,11 +1368,12 @@ trx_undo_assign(trx_t* trx, dberr_t* err, mtr_t* mtr)
BUF_GET, mtr, err);
}
+ *err = DB_SUCCESS;
trx_rseg_t* rseg = trx->rsegs.m_redo.rseg;
rseg->latch.wr_lock(SRW_LOCK_CALL);
buf_block_t* block = trx_undo_reuse_cached(
- trx, rseg, &trx->rsegs.m_redo.undo, mtr);
+ trx, rseg, &trx->rsegs.m_redo.undo, mtr, err);
if (!block) {
block = trx_undo_create(trx, rseg, &trx->rsegs.m_redo.undo,
@@ -1387,8 +1382,6 @@ trx_undo_assign(trx_t* trx, dberr_t* err, mtr_t* mtr)
if (!block) {
goto func_exit;
}
- } else {
- *err = DB_SUCCESS;
}
UT_LIST_ADD_FIRST(rseg->undo_list, trx->rsegs.m_redo.undo);
@@ -1400,18 +1393,20 @@ func_exit:
/** Assign an undo log for a transaction.
A new undo log is created or a cached undo log reused.
+@tparam is_temp whether this is temporary undo log
@param[in,out] trx transaction
@param[in] rseg rollback segment
@param[out] undo the undo log
-@param[out] err error code
@param[in,out] mtr mini-transaction
+@param[out] err error code
@return the undo log block
-@retval NULL on error */
+@retval nullptr on error */
+template<bool is_temp>
buf_block_t*
-trx_undo_assign_low(trx_t* trx, trx_rseg_t* rseg, trx_undo_t** undo,
- dberr_t* err, mtr_t* mtr)
+trx_undo_assign_low(trx_t *trx, trx_rseg_t *rseg, trx_undo_t **undo,
+ mtr_t *mtr, dberr_t *err)
{
- ut_d(const bool is_temp = rseg == trx->rsegs.m_noredo.rseg);
+ ut_ad(is_temp == (rseg == trx->rsegs.m_noredo.rseg));
ut_ad(is_temp || rseg == trx->rsegs.m_redo.rseg);
ut_ad(undo == (is_temp
? &trx->rsegs.m_noredo.undo
@@ -1431,19 +1426,24 @@ trx_undo_assign_low(trx_t* trx, trx_rseg_t* rseg, trx_undo_t** undo,
*err = DB_TOO_MANY_CONCURRENT_TRXS; return NULL;
);
+ *err = DB_SUCCESS;
rseg->latch.wr_lock(SRW_LOCK_CALL);
- buf_block_t* block = trx_undo_reuse_cached(trx, rseg, undo, mtr);
-
- if (!block) {
- block = trx_undo_create(trx, rseg, undo, err, mtr);
- ut_ad(!block == (*err != DB_SUCCESS));
- if (!block) {
- goto func_exit;
- }
+ buf_block_t* block;
+ if (is_temp) {
+ ut_ad(!UT_LIST_GET_LEN(rseg->undo_cached));
} else {
- *err = DB_SUCCESS;
+ block = trx_undo_reuse_cached(trx, rseg, undo, mtr, err);
+ if (block) {
+ goto got_block;
+ }
+ }
+ block = trx_undo_create(trx, rseg, undo, err, mtr);
+ ut_ad(!block == (*err != DB_SUCCESS));
+ if (!block) {
+ goto func_exit;
}
+got_block:
UT_LIST_ADD_FIRST(rseg->undo_list, *undo);
func_exit:
@@ -1451,6 +1451,13 @@ func_exit:
return block;
}
+template buf_block_t*
+trx_undo_assign_low<false>(trx_t *trx, trx_rseg_t *rseg, trx_undo_t **undo,
+ mtr_t *mtr, dberr_t *err);
+template buf_block_t*
+trx_undo_assign_low<true>(trx_t *trx, trx_rseg_t *rseg, trx_undo_t **undo,
+ mtr_t *mtr, dberr_t *err);
+
/******************************************************************//**
Sets the state of the undo log segment at a transaction finish.
@return undo log segment header page, x-latched */
@@ -1461,6 +1468,7 @@ trx_undo_set_state_at_finish(
mtr_t* mtr) /*!< in: mtr */
{
ut_ad(undo->id < TRX_RSEG_N_SLOTS);
+ ut_ad(undo->rseg->is_persistent());
buf_block_t *block=
buf_page_get(page_id_t(undo->rseg->space->id, undo->hdr_page_no), 0,
@@ -1532,29 +1540,19 @@ the data can be discarded.
@param undo temporary undo log */
void trx_undo_commit_cleanup(trx_undo_t *undo)
{
- trx_rseg_t* rseg = undo->rseg;
- ut_ad(rseg->space == fil_system.temp_space);
-
- rseg->latch.wr_lock(SRW_LOCK_CALL);
-
- UT_LIST_REMOVE(rseg->undo_list, undo);
-
- if (undo->state == TRX_UNDO_CACHED) {
- UT_LIST_ADD_FIRST(rseg->undo_cached, undo);
- MONITOR_INC(MONITOR_NUM_UNDO_SLOT_CACHED);
- undo = nullptr;
- } else {
- ut_ad(undo->state == TRX_UNDO_TO_PURGE);
-
- /* Delete first the undo log segment in the file */
- trx_undo_seg_free(undo);
+ trx_rseg_t *rseg= undo->rseg;
+ ut_ad(rseg->space == fil_system.temp_space);
+ rseg->latch.wr_lock(SRW_LOCK_CALL);
- ut_ad(rseg->curr_size > undo->size);
- rseg->curr_size -= undo->size;
- }
+ UT_LIST_REMOVE(rseg->undo_list, undo);
+ ut_ad(undo->state == TRX_UNDO_TO_PURGE);
+ /* Delete first the undo log segment in the file */
+ trx_undo_seg_free(undo);
+ ut_ad(rseg->curr_size > undo->size);
+ rseg->curr_size-= undo->size;
- rseg->latch.wr_unlock();
- ut_free(undo);
+ rseg->latch.wr_unlock();
+ ut_free(undo);
}
/** At shutdown, frees the undo logs of a transaction. */