From 5aef0123a707415c56ffae48fc872e7d3ad292d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Tue, 19 Apr 2022 12:40:05 +0300 Subject: MDEV-28317 Assertion failures in row_undo_mod on recovery Starting with 10.3, an assertion would fail on the rollback of a recovered incomplete transaction if a table definition violates a FOREIGN KEY constraint. DICT_ERR_IGNORE_RECOVER_LOCK: Include also DICT_ERR_IGNORE_FK_NOKEY so that trx_resurrect_table_locks() will be able to load table definitions and resurrect IX locks. Previously, if the FOREIGN KEY constraints of a table were incomplete, the table would fail to load until rollback, and in 10.3 or later an assertion would fail that the rollback was not protected by a table IX lock. Thanks to commit 9de2e60d7491fcf3cd1f20a4be715ef0bedc316f there will be no problems to enforce subsequent FOREIGN KEY operations even though a table with invalid REFERENCES clause was loaded. --- storage/innobase/include/dict0types.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'storage') diff --git a/storage/innobase/include/dict0types.h b/storage/innobase/include/dict0types.h index bea08f398de..04c8b163b14 100644 --- a/storage/innobase/include/dict0types.h +++ b/storage/innobase/include/dict0types.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2013, 2019, MariaDB Corporation. +Copyright (c) 2013, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -64,7 +64,7 @@ enum dict_err_ignore_t { DICT_ERR_IGNORE_INDEX_ROOT = 2, /*!< ignore error if index root page is FIL_NULL or incorrect value */ DICT_ERR_IGNORE_CORRUPT = 4, /*!< skip corrupted indexes */ - DICT_ERR_IGNORE_RECOVER_LOCK = 8, + DICT_ERR_IGNORE_RECOVER_LOCK = 8 | DICT_ERR_IGNORE_FK_NOKEY, /*!< Used when recovering table locks for resurrected transactions. Silently load a missing -- cgit v1.2.1 From 188aae65e4fa43b73ee1af6ce26724c3dca61380 Mon Sep 17 00:00:00 2001 From: Vlad Lesin Date: Tue, 12 Apr 2022 13:39:04 +0300 Subject: MDEV-26224 InnoDB fails to remove AUTO_INCREMENT attribute Reset dict_table_t::persistent_autoinc when inplace alter table is committed successfully. --- storage/innobase/handler/handler0alter.cc | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'storage') diff --git a/storage/innobase/handler/handler0alter.cc b/storage/innobase/handler/handler0alter.cc index e79d9d67dbf..91a5fd3ca4a 100644 --- a/storage/innobase/handler/handler0alter.cc +++ b/storage/innobase/handler/handler0alter.cc @@ -10711,6 +10711,10 @@ commit_cache_norebuild( : NULL; DBUG_ASSERT((ctx->new_table->fts == NULL) == (ctx->new_table->fts_doc_id_index == NULL)); + if (table->found_next_number_field + && !altered_table->found_next_number_field) { + ctx->prebuilt->table->persistent_autoinc = 0; + } DBUG_RETURN(found); } @@ -11035,7 +11039,15 @@ ha_innobase::commit_inplace_alter_table( if (!(ha_alter_info->handler_flags & ~INNOBASE_INPLACE_IGNORE)) { DBUG_ASSERT(!ctx0); MONITOR_ATOMIC_DEC(MONITOR_PENDING_ALTER_TABLE); - ha_alter_info->group_commit_ctx = NULL; + if (table->found_next_number_field + && !altered_table->found_next_number_field) { + m_prebuilt->table->persistent_autoinc = 0; + /* Don't reset ha_alter_info->group_commit_ctx to make + partitions engine to call this function for all + partitions. */ + } + else + ha_alter_info->group_commit_ctx = NULL; DBUG_RETURN(false); } @@ -11543,6 +11555,8 @@ foreign_fail: row_mysql_unlock_data_dictionary(trx); trx->free(); MONITOR_ATOMIC_DEC(MONITOR_PENDING_ALTER_TABLE); + /* There is no need to reset dict_table_t::persistent_autoinc + as the table is reloaded */ DBUG_RETURN(false); } -- cgit v1.2.1 From bc7ba7afee8ba6f7d8fe61078d4c46184dc6fa56 Mon Sep 17 00:00:00 2001 From: Dmitry Shulga Date: Fri, 22 Apr 2022 18:47:19 +0700 Subject: MDEV-27758: Errors when building Connect engine on os x 11.6.2 Added checking for support of vfork by a platform where building being done. Set HAVE_VFORK macros in case vfork() system call is supported. Use vfork() system call if the macros HAVE_VFORK is set, else use fork(). --- storage/connect/tabrest.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'storage') diff --git a/storage/connect/tabrest.cpp b/storage/connect/tabrest.cpp index c66d8d76f3d..7e8b51714fb 100644 --- a/storage/connect/tabrest.cpp +++ b/storage/connect/tabrest.cpp @@ -112,7 +112,11 @@ int Xcurl(PGLOBAL g, PCSZ Http, PCSZ Uri, PCSZ filename) } // endif f - pID = vfork(); +#ifdef HAVE_VFORK + pID = vfork(); +#else + pID = fork(); +#endif sprintf(fn, "-o%s", filename); if (pID == 0) { -- cgit v1.2.1 From b208030ef5b4274fd66cc9667a2dc96f6e63db81 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Mon, 25 Apr 2022 14:14:02 +0300 Subject: MDEV-11415 merge fixup: Remove a redundant call In merge commit 921c5e931452301a09c84c53ffe35b81e6a1c71a the call log_free_check() was accidentally duplicated, causing a small performance regression on INSERT. --- storage/innobase/row/row0ins.cc | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'storage') diff --git a/storage/innobase/row/row0ins.cc b/storage/innobase/row/row0ins.cc index d3ef1c89c4a..8cfad511d06 100644 --- a/storage/innobase/row/row0ins.cc +++ b/storage/innobase/row/row0ins.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2016, 2021, MariaDB Corporation. +Copyright (c) 2016, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -3194,9 +3194,6 @@ row_ins_clust_index_entry( ? BTR_NO_LOCKING_FLAG : 0; const ulint orig_n_fields = entry->n_fields; - /* Try first optimistic descent to the B-tree */ - log_free_check(); - /* For intermediate table during copy alter table, skip the undo log and record lock checking for insertion operation. -- cgit v1.2.1 From f21a87560091a8149752b13e097d382ba30786d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Wed, 27 Apr 2022 07:57:04 +0300 Subject: MDEV-28415 ALTER TABLE on a large table hangs InnoDB buf_flush_page(): Never wait for a page latch, even in checkpoint flushing (flush_type == BUF_FLUSH_LIST), to prevent a hang of the page cleaner threads when a large number of pages is latched. In mysql/mysql-server@9542f3015b00330ef537f6223565b28b82a5b325 it was claimed that such a hang only affects CREATE FULLTEXT INDEX. Their fix was to retain buffer-fix but release exclusive latch on non-leaf pages, and subsequently write to those pages while they are not associated with the mini-transaction, which would trip a debug assertion in the MariaDB version of mtr_t::memo_modify_page() and cause potential corruption when using the default MariaDB setting innodb_log_optimize_ddl=OFF. This change essentially backports a small part of commit 7cffb5f6e8a231a041152447be8980ce35d2c9b8 (MDEV-23399) from MariaDB Server 10.5.7. --- storage/innobase/buf/buf0flu.cc | 27 +++------------------------ 1 file changed, 3 insertions(+), 24 deletions(-) (limited to 'storage') diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc index 2beddf4b243..e029948be55 100644 --- a/storage/innobase/buf/buf0flu.cc +++ b/storage/innobase/buf/buf0flu.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2013, 2020, MariaDB Corporation. +Copyright (c) 2013, 2022, MariaDB Corporation. Copyright (c) 2013, 2014, Fusion-io This program is free software; you can redistribute it and/or modify it under @@ -1160,15 +1160,10 @@ buf_flush_page( /* For table residing in temporary tablespace sync is done using IO_FIX and so before scheduling for flush ensure that page is not fixed. */ - flush = FALSE; + return FALSE; } else { rw_lock = &reinterpret_cast(bpage)->lock; - if (flush_type != BUF_FLUSH_LIST) { - flush = rw_lock_sx_lock_nowait(rw_lock, BUF_IO_WRITE); - } else { - /* Will SX lock later */ - flush = TRUE; - } + flush = rw_lock_sx_lock_nowait(rw_lock, BUF_IO_WRITE); } if (flush) { @@ -1190,22 +1185,6 @@ buf_flush_page( buf_pool_mutex_exit(buf_pool); - if (flush_type == BUF_FLUSH_LIST - && is_uncompressed - && !rw_lock_sx_lock_nowait(rw_lock, BUF_IO_WRITE)) { - - if (!fsp_is_system_temporary(bpage->id.space())) { - /* avoiding deadlock possibility involves - doublewrite buffer, should flush it, because - it might hold the another block->lock. */ - buf_dblwr_flush_buffered_writes(); - } else { - buf_dblwr_sync_datafiles(); - } - - rw_lock_sx_lock_gen(rw_lock, BUF_IO_WRITE); - } - /* If there is an observer that want to know if the asynchronous flushing was sent then notify it. Note: we set flush observer to a page with x-latch, so we can -- cgit v1.2.1 From 44a27a26e910c9fb27731bd2cb267262949ea2da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Wed, 27 Apr 2022 08:08:06 +0300 Subject: MDEV-28416 Incorrect AUTO_INCREMENT may be issued when close to UINT64_MAX ha_innobase::get_auto_increment(): In the overflow check, account for 64-bit unsigned integer wrap-around. Based on mysql/mysql-server@25ecfe7f49b5a649e96d462cb90602de9de3b919 --- storage/innobase/handler/ha_innodb.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'storage') diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 1884250ee48..53d5a07b79f 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -16970,8 +16970,8 @@ ha_innobase::get_auto_increment( (3) It is restricted only for insert operations. */ - if (increment > 1 && thd_sql_command(m_user_thd) != SQLCOM_ALTER_TABLE - && autoinc < col_max_value) { + if (increment > 1 && increment <= ~autoinc && autoinc < col_max_value + && thd_sql_command(m_user_thd) != SQLCOM_ALTER_TABLE) { ulonglong prev_auto_inc = autoinc; -- cgit v1.2.1 From 0806592ac8fb7c6071defccb51e762a89f5bd8d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Wed, 27 Apr 2022 13:16:07 +0300 Subject: MDEV-28422 Page split breaks a gap lock btr_insert_into_right_sibling(): Inherit any gap lock from the left sibling to the right sibling before inserting the record to the right sibling and updating the node pointer(s). lock_update_node_pointer(): Update locks in case a node pointer will move. Based on mysql/mysql-server@c7d93c274fdc5c56e36458fa4000fa3a483ffffd --- storage/innobase/btr/btr0btr.cc | 6 +++--- storage/innobase/include/lock0lock.h | 38 ++++++++++++++++++++++++++++++++++-- storage/innobase/lock/lock0lock.cc | 15 ++++++++++++-- 3 files changed, 52 insertions(+), 7 deletions(-) (limited to 'storage') diff --git a/storage/innobase/btr/btr0btr.cc b/storage/innobase/btr/btr0btr.cc index bc85a955f80..b2c3b55cbaa 100644 --- a/storage/innobase/btr/btr0btr.cc +++ b/storage/innobase/btr/btr0btr.cc @@ -2,7 +2,7 @@ Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2012, Facebook Inc. -Copyright (c) 2014, 2021, MariaDB Corporation. +Copyright (c) 2014, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -2688,8 +2688,8 @@ btr_insert_into_right_sibling( max_size = page_get_max_insert_size_after_reorganize(next_page, 1); /* Extends gap lock for the next page */ - if (!dict_table_is_locking_disabled(cursor->index->table)) { - lock_update_split_left(next_block, block); + if (is_leaf && !dict_table_is_locking_disabled(cursor->index->table)) { + lock_update_node_pointer(block, next_block); } rec = page_cur_tuple_insert( diff --git a/storage/innobase/include/lock0lock.h b/storage/innobase/include/lock0lock.h index 9c3f5d57f01..c7fb219a7d5 100644 --- a/storage/innobase/include/lock0lock.h +++ b/storage/innobase/include/lock0lock.h @@ -1,7 +1,7 @@ /***************************************************************************** -Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2020, MariaDB Corporation. +Copyright (c) 1996, 2022, Oracle and/or its affiliates. +Copyright (c) 2017, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -151,6 +151,40 @@ lock_update_copy_and_discard( which copied */ const buf_block_t* block); /*!< in: index page; NOT the root! */ +/** Update gap locks between the last record of the left_block and the +first record of the right_block when a record is about to be inserted +at the start of the right_block, even though it should "naturally" be +inserted as the last record of the left_block according to the +current node pointer in the parent page. + +That is, we assume that the lowest common ancestor of the left_block +and right_block routes the key of the new record to the left_block, +but a heuristic which tries to avoid overflowing left_block has chosen +to insert the record into right_block instead. Said ancestor performs +this routing by comparing the key of the record to a "split point" - +all records greater or equal to than the split point (node pointer) +are in right_block, and smaller ones in left_block. +The split point may be smaller than the smallest key in right_block. + +The gap between the last record on the left_block and the first record +on the right_block is represented as a gap lock attached to the supremum +pseudo-record of left_block, and a gap lock attached to the new first +record of right_block. + +Thus, inserting the new record, and subsequently adjusting the node +pointers in parent pages to values smaller or equal to the new +records' key, will mean that gap will be sliced at a different place +("moved to the left"): fragment of the 1st gap will now become treated +as 2nd. Therefore, we must copy any GRANTED locks from 1st gap to the +2nd gap. Any WAITING locks must be of INSERT_INTENTION type (as no +other GAP locks ever wait for anything) and can stay at 1st gap, as +their only purpose is to notify the requester they can retry +insertion, and there's no correctness requirement to avoid waking them +up too soon. +@param left_block left page +@param right_block right page */ +void lock_update_node_pointer(const buf_block_t *left_block, + const buf_block_t *right_block); /*************************************************************//** Updates the lock table when a page is split to the left. */ void diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc index 37e23b56dfc..3398f09b772 100644 --- a/storage/innobase/lock/lock0lock.cc +++ b/storage/innobase/lock/lock0lock.cc @@ -1,7 +1,7 @@ /***************************************************************************** -Copyright (c) 1996, 2017, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2014, 2021, MariaDB Corporation. +Copyright (c) 1996, 2022, Oracle and/or its affiliates. +Copyright (c) 2014, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -3044,6 +3044,17 @@ lock_update_split_right( lock_mutex_exit(); } +void lock_update_node_pointer(const buf_block_t *left_block, + const buf_block_t *right_block) +{ + const ulint h= lock_get_min_heap_no(right_block); + + lock_mutex_enter(); + lock_rec_inherit_to_gap(right_block, left_block, + h, PAGE_HEAP_NO_SUPREMUM); + lock_mutex_exit(); +} + /*************************************************************//** Updates the lock table when a page is merged to the right. */ void -- cgit v1.2.1 From 2c381d8cf65ad46936045fb7ee141de4e392cde7 Mon Sep 17 00:00:00 2001 From: Vlad Lesin Date: Thu, 14 Apr 2022 14:27:23 +0300 Subject: MDEV-17843 Assertion `page_rec_is_leaf(rec)' failed in lock_rec_queue_validate upon SHOW ENGINE INNODB STATUS lock_validate() accumulates page ids under locked lock_sys->mutex, then releases the latch, and invokes lock_rec_block_validate() for each page. Some other thread has ability to add/remove locks and change pages between releasing the latch in lock_validate() and acquiring it in lock_rec_validate_page(). lock_rec_validate_page() can invoke lock_rec_queue_validate() for non-locked supremum, what can cause ut_ad(page_rec_is_leaf(rec)) failure in lock_rec_queue_validate(). The fix is to invoke lock_rec_queue_validate() only for locked records in lock_rec_validate_page(). The error message in lock_rec_block_validate() is not necessary as BUF_GET_POSSIBLY_FREED mode is used to get block from buffer pool, and this is not error if a block was evicted. The test case would require new debug sync point. I think it's not necessary as the fixed code is debug-only. --- storage/innobase/lock/lock0lock.cc | 27 ++++++++++----------------- 1 file changed, 10 insertions(+), 17 deletions(-) (limited to 'storage') diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc index 3398f09b772..2fd2ef94365 100644 --- a/storage/innobase/lock/lock0lock.cc +++ b/storage/innobase/lock/lock0lock.cc @@ -5050,25 +5050,25 @@ loop: holding a space->latch. */ if (!sync_check_find(SYNC_FSP)) for (i = nth_bit; i < lock_rec_get_n_bits(lock); i++) { - - if (i == PAGE_HEAP_NO_SUPREMUM - || lock_rec_get_nth_bit(lock, i)) { + bool locked = lock_rec_get_nth_bit(lock, i); + if (locked || i == PAGE_HEAP_NO_SUPREMUM) { rec = page_find_rec_with_heap_no(block->frame, i); ut_a(rec); - ut_ad(!lock_rec_get_nth_bit(lock, i) - || page_rec_is_leaf(rec)); - offsets = rec_get_offsets(rec, lock->index, offsets, - lock->index->n_core_fields, - ULINT_UNDEFINED, &heap); + ut_ad(!locked || page_rec_is_leaf(rec)); /* If this thread is holding the file space latch (fil_space_t::latch), the following check WILL break the latching order and may cause a deadlock of threads. */ - lock_rec_queue_validate( - TRUE, block, rec, lock->index, offsets); + if (locked) { + offsets = rec_get_offsets(rec, lock->index, + offsets, lock->index->n_core_fields, + ULINT_UNDEFINED, &heap); + lock_rec_queue_validate(TRUE, block, rec, + lock->index, offsets); + } nth_bit = i + 1; @@ -5161,13 +5161,6 @@ lock_rec_block_validate( BUF_GET_POSSIBLY_FREED, __FILE__, __LINE__, &mtr, &err); - if (err != DB_SUCCESS) { - ib::error() << "Lock rec block validate failed for tablespace " - << space->name - << " space_id " << space_id - << " page_no " << page_no << " err " << err; - } - if (block) { buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK); -- cgit v1.2.1 From 20ae4816bba712a3faa0110c973e197d92f43b42 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Fri, 6 May 2022 09:30:17 +0300 Subject: MDEV-28478: INSERT into SPATIAL INDEX in TEMPORARY table writes log row_ins_sec_index_entry_low(): If a separate mini-transaction is needed to adjust the minimum bounding rectangle (MBR) in the parent page, we must disable redo logging if the table is a temporary table. For temporary tables, no log is supposed to be written, because the temporary tablespace will be reinitialized on server restart. rtr_update_mbr_field(): Plug a memory leak. --- storage/innobase/gis/gis0rtree.cc | 3 ++- storage/innobase/row/row0ins.cc | 10 +++++++--- 2 files changed, 9 insertions(+), 4 deletions(-) (limited to 'storage') diff --git a/storage/innobase/gis/gis0rtree.cc b/storage/innobase/gis/gis0rtree.cc index 50071bcfae4..b18642b0e3c 100644 --- a/storage/innobase/gis/gis0rtree.cc +++ b/storage/innobase/gis/gis0rtree.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2019, 2020, MariaDB Corporation. +Copyright (c) 2019, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -376,6 +376,7 @@ rtr_update_mbr_field( if (!rtr_update_mbr_field_in_place(index, rec, offsets, mbr, mtr)) { + mem_heap_free(heap); return(false); } diff --git a/storage/innobase/row/row0ins.cc b/storage/innobase/row/row0ins.cc index cbe6577c02a..0c0214b009e 100644 --- a/storage/innobase/row/row0ins.cc +++ b/storage/innobase/row/row0ins.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2016, 2021, MariaDB Corporation. +Copyright (c) 2016, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -2900,8 +2900,12 @@ row_ins_sec_index_entry_low( rtr_init_rtr_info(&rtr_info, false, &cursor, index, false); rtr_info_update_btr(&cursor, &rtr_info); - mtr_start(&mtr); - mtr.set_named_space(index->space); + mtr.start(); + if (index->table->is_temporary()) { + mtr.set_log_mode(MTR_LOG_NO_REDO); + } else { + mtr.set_named_space(index->space); + } search_mode &= ulint(~BTR_MODIFY_LEAF); search_mode |= BTR_MODIFY_TREE; err = btr_cur_search_to_nth_level( -- cgit v1.2.1