summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarko Mäkelä <marko.makela@mariadb.com>2023-01-19 17:19:18 +0200
committerMarko Mäkelä <marko.makela@mariadb.com>2023-01-19 17:19:18 +0200
commitf9cac8d2cbf82d4d616905fb3dfab34a9901179d (patch)
tree8c9ade75b3ff51612862ae6d59476359329181cd
parent67dc8af2a75a70df50f76119ed4bfafcf60548c4 (diff)
downloadmariadb-git-f9cac8d2cbf82d4d616905fb3dfab34a9901179d.tar.gz
MDEV-30400 Assertion height == btr_page_get_level(...) on INSERT
This also fixes part of MDEV-29835 Partial server freeze which is caused by violations of the latching order that was defined in https://dev.mysql.com/worklog/task/?id=6326 (WL#6326: InnoDB: fix index->lock contention). Unless the current thread is holding an exclusive dict_index_t::lock, it must acquire page latches in a strict parent-to-child, left-to-right order. Not all cases are fixed yet. Failure to follow the correct latching order will cause deadlocks of threads due to lock order inversion. As part of these changes, the BTR_MODIFY_TREE mode is modified so that an Update latch (U a.k.a. SX) will be acquired on the root page, and eXclusive latches (X) will be acquired on all pages leading to the leaf page, as well as any left and right siblings of the pages along the path. The test innodb.innodb_wl6326 will be removed, because at the time the DEBUG_SYNC point is hit, the thread is actually holding several page latches that will be blocking a concurrent SELECT statement. We also remove double bookkeeping that was caused due to excessive information hiding in mtr_t::m_memo. We simply let mtr_t::m_memo store information of latched pages, and ensure that mtr_memo_slot_t::object is never a null pointer. The tree_blocks[] and tree_savepoints[] were redundant. mtr_t::get_already_latched(): Look up a latched page in mtr_t::m_memo. This avoids many redundant entries in mtr_t::m_memo, as well as redundant calls to buf_page_get_gen() for blocks that had already been looked up in a mini-transaction. btr_get_latched_root(): Return a pointer to an already latched root page. This replaces btr_root_block_get() in cases where the mini-transaction has already latched the root page. btr_page_get_parent(): Fetch a parent page that was already latched in BTR_MODIFY_TREE, by invoking mtr_t::get_already_latched(). If needed, upgrade the root page U latch to X. This avoids bloating mtr_t::m_memo as well as redundant buf_pool.page_hash lookups. For non-QUICK CHECK TABLE as well as for B-tree defragmentation, we will invoke btr_cur_search_to_nth_level(). btr_cur_search_to_nth_level(): This will only be used for non-leaf (level>0) B-tree searches that were formerly named BTR_CONT_SEARCH_TREE or BTR_CONT_MODIFY_TREE. In MDEV-29835, this function could be removed altogether, or retained for the case of CHECK TABLE without QUICK. btr_cur_t::search_leaf(): Replaces btr_cur_search_to_nth_level() for searches to level=0 (the leaf level). btr_cur_t::pessimistic_search_leaf(): Implement the new BTR_MODIFY_TREE latching logic in the case that page splits or merges will be needed. The parent pages (and their siblings) should already be latched on the first dive to the leaf and be present in mtr_t::m_memo; there should be no need for BTR_CONT_MODIFY_TREE. This pre-latching almost suffices; MDEV-29835 will have to revise it and remove work-arounds where mtr_t::get_already_latched() fails to find a block. rtr_search_to_nth_level(): A SPATIAL INDEX version of btr_search_to_nth_level() that can search to any level (including the leaf level). rtr_search_leaf(), rtr_insert_leaf(): Wrappers for rtr_search_to_nth_level(). rtr_search(): Replaces rtr_pcur_open(). rtr_cur_restore_position(): Remove an unused constant parameter. btr_pcur_open_on_user_rec(): Remove the constant parameter mode=PAGE_CUR_GE. btr_cur_latch_leaves(): Update a pre-existing mtr_t::m_memo entry for the current leaf page. row_ins_clust_index_entry_low(): Use a new mode=BTR_MODIFY_ROOT_AND_LEAF to gain access to the root page when mode!=BTR_MODIFY_TREE, to write the PAGE_ROOT_AUTO_INC. btr_cur_t::open_leaf(): Some clean-up. mtr_t::lock_register(): Register a page latch on a buffer-fixed block. BTR_SEARCH_TREE, BTR_CONT_SEARCH_TREE: Remove. BTR_CONT_MODIFY_TREE: Note that this is only used by rtr_search_to_nth_level(). btr_pcur_optimistic_latch_leaves(): Replaces btr_cur_optimistic_latch_leaves(). ibuf_delete_rec(): Acquire ibuf.index->lock.u_lock() in order to avoid a deadlock with ibuf_insert_low(BTR_MODIFY_PREV). Tested by: Matthias Leich
-rw-r--r--mysql-test/suite/innodb/r/innodb_wl6326.result405
-rw-r--r--mysql-test/suite/innodb/t/innodb_wl6326.opt1
-rw-r--r--mysql-test/suite/innodb/t/innodb_wl6326.test519
-rw-r--r--mysql-test/suite/innodb_gis/r/rtree_split.result12
-rw-r--r--mysql-test/suite/innodb_gis/t/rtree_split.test15
-rw-r--r--storage/innobase/btr/btr0btr.cc498
-rw-r--r--storage/innobase/btr/btr0cur.cc2254
-rw-r--r--storage/innobase/btr/btr0defragment.cc68
-rw-r--r--storage/innobase/btr/btr0pcur.cc109
-rw-r--r--storage/innobase/btr/btr0sea.cc22
-rw-r--r--storage/innobase/dict/dict0crea.cc29
-rw-r--r--storage/innobase/dict/dict0dict.cc8
-rw-r--r--storage/innobase/dict/dict0load.cc23
-rw-r--r--storage/innobase/dict/dict0stats.cc4
-rw-r--r--storage/innobase/fsp/fsp0fsp.cc25
-rw-r--r--storage/innobase/gis/gis0sea.cc634
-rw-r--r--storage/innobase/handler/ha_innodb.cc3
-rw-r--r--storage/innobase/handler/handler0alter.cc9
-rw-r--r--storage/innobase/ibuf/ibuf0ibuf.cc24
-rw-r--r--storage/innobase/include/btr0btr.h11
-rw-r--r--storage/innobase/include/btr0cur.h89
-rw-r--r--storage/innobase/include/btr0pcur.h49
-rw-r--r--storage/innobase/include/btr0pcur.inl40
-rw-r--r--storage/innobase/include/btr0types.h32
-rw-r--r--storage/innobase/include/gis0rtree.h46
-rw-r--r--storage/innobase/include/gis0type.h8
-rw-r--r--storage/innobase/include/ibuf0ibuf.inl3
-rw-r--r--storage/innobase/include/mtr0log.h6
-rw-r--r--storage/innobase/include/mtr0mtr.h84
-rw-r--r--storage/innobase/include/small_vector.h3
-rw-r--r--storage/innobase/mtr/mtr0mtr.cc39
-rw-r--r--storage/innobase/row/row0import.cc5
-rw-r--r--storage/innobase/row/row0ins.cc104
-rw-r--r--storage/innobase/row/row0log.cc33
-rw-r--r--storage/innobase/row/row0merge.cc18
-rw-r--r--storage/innobase/row/row0purge.cc46
-rw-r--r--storage/innobase/row/row0row.cc16
-rw-r--r--storage/innobase/row/row0sel.cc16
-rw-r--r--storage/innobase/row/row0uins.cc44
-rw-r--r--storage/innobase/row/row0umod.cc53
-rw-r--r--storage/innobase/row/row0upd.cc53
41 files changed, 2309 insertions, 3151 deletions
diff --git a/mysql-test/suite/innodb/r/innodb_wl6326.result b/mysql-test/suite/innodb/r/innodb_wl6326.result
deleted file mode 100644
index fcd58aedafe..00000000000
--- a/mysql-test/suite/innodb/r/innodb_wl6326.result
+++ /dev/null
@@ -1,405 +0,0 @@
-SET GLOBAL innodb_adaptive_hash_index = false;
-SET GLOBAL innodb_stats_persistent = false;
-connect con1,localhost,root,,;
-connect con2,localhost,root,,;
-connect con3,localhost,root,,;
-CREATE TABLE t1 (
-a00 CHAR(255) NOT NULL DEFAULT 'a',
-a01 CHAR(255) NOT NULL DEFAULT 'a',
-a02 CHAR(255) NOT NULL DEFAULT 'a',
-a03 CHAR(255) NOT NULL DEFAULT 'a',
-a04 CHAR(255) NOT NULL DEFAULT 'a',
-a05 CHAR(255) NOT NULL DEFAULT 'a',
-a06 CHAR(255) NOT NULL DEFAULT 'a',
-b INT NOT NULL DEFAULT 0
-) ENGINE = InnoDB;
-ALTER TABLE t1 ADD PRIMARY KEY(
-a00,
-a01,
-a02,
-a03,
-a04,
-a05,
-a06
-);
-ANALYZE TABLE t1;
-Table Op Msg_type Msg_text
-test.t1 analyze status Engine-independent statistics collected
-test.t1 analyze status OK
-SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1';
-CLUST_INDEX_SIZE
-1
-SET GLOBAL innodb_limit_optimistic_insert_debug = 7;
-BEGIN;
-INSERT INTO t1 (a00) VALUES ('aa');
-INSERT INTO t1 (a00) VALUES ('ab');
-INSERT INTO t1 (a00) VALUES ('ac');
-INSERT INTO t1 (a00) VALUES ('ad');
-INSERT INTO t1 (a00) VALUES ('ae');
-INSERT INTO t1 (a00) VALUES ('af');
-INSERT INTO t1 (a00) VALUES ('ag');
-INSERT INTO t1 (a00) VALUES ('ah');
-COMMIT;
-ANALYZE TABLE t1;
-Table Op Msg_type Msg_text
-test.t1 analyze status Engine-independent statistics collected
-test.t1 analyze status OK
-SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1';
-CLUST_INDEX_SIZE
-3
-BEGIN;
-INSERT INTO t1 (a00) VALUES ('ai');
-INSERT INTO t1 (a00) VALUES ('aj');
-INSERT INTO t1 (a00) VALUES ('ak');
-COMMIT;
-ANALYZE TABLE t1;
-Table Op Msg_type Msg_text
-test.t1 analyze status Engine-independent statistics collected
-test.t1 analyze status OK
-SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1';
-CLUST_INDEX_SIZE
-4
-BEGIN;
-INSERT INTO t1 (a00) VALUES ('al');
-INSERT INTO t1 (a00) VALUES ('am');
-INSERT INTO t1 (a00) VALUES ('an');
-INSERT INTO t1 (a00) VALUES ('ao');
-INSERT INTO t1 (a00) VALUES ('ap');
-INSERT INTO t1 (a00) VALUES ('aq');
-INSERT INTO t1 (a00) VALUES ('ar');
-COMMIT;
-ANALYZE TABLE t1;
-Table Op Msg_type Msg_text
-test.t1 analyze status Engine-independent statistics collected
-test.t1 analyze status OK
-SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1';
-CLUST_INDEX_SIZE
-5
-BEGIN;
-INSERT INTO t1 (a00) VALUES ('as');
-INSERT INTO t1 (a00) VALUES ('at');
-INSERT INTO t1 (a00) VALUES ('au');
-INSERT INTO t1 (a00) VALUES ('av');
-INSERT INTO t1 (a00) VALUES ('aw');
-INSERT INTO t1 (a00) VALUES ('ax');
-INSERT INTO t1 (a00) VALUES ('ay');
-COMMIT;
-ANALYZE TABLE t1;
-Table Op Msg_type Msg_text
-test.t1 analyze status Engine-independent statistics collected
-test.t1 analyze status OK
-SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1';
-CLUST_INDEX_SIZE
-6
-BEGIN;
-INSERT INTO t1 (a00) VALUES ('az');
-INSERT INTO t1 (a00) VALUES ('ba');
-INSERT INTO t1 (a00) VALUES ('bb');
-INSERT INTO t1 (a00) VALUES ('bc');
-INSERT INTO t1 (a00) VALUES ('bd');
-INSERT INTO t1 (a00) VALUES ('be');
-INSERT INTO t1 (a00) VALUES ('bf');
-COMMIT;
-ANALYZE TABLE t1;
-Table Op Msg_type Msg_text
-test.t1 analyze status Engine-independent statistics collected
-test.t1 analyze status OK
-SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1';
-CLUST_INDEX_SIZE
-7
-BEGIN;
-INSERT INTO t1 (a00) VALUES ('bg');
-INSERT INTO t1 (a00) VALUES ('bh');
-INSERT INTO t1 (a00) VALUES ('bi');
-INSERT INTO t1 (a00) VALUES ('bj');
-INSERT INTO t1 (a00) VALUES ('bk');
-INSERT INTO t1 (a00) VALUES ('bl');
-INSERT INTO t1 (a00) VALUES ('bm');
-COMMIT;
-ANALYZE TABLE t1;
-Table Op Msg_type Msg_text
-test.t1 analyze status Engine-independent statistics collected
-test.t1 analyze status OK
-SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1';
-CLUST_INDEX_SIZE
-8
-BEGIN;
-INSERT INTO t1 (a00) VALUES ('bn');
-INSERT INTO t1 (a00) VALUES ('bo');
-INSERT INTO t1 (a00) VALUES ('bp');
-INSERT INTO t1 (a00) VALUES ('bq');
-INSERT INTO t1 (a00) VALUES ('br');
-INSERT INTO t1 (a00) VALUES ('bs');
-INSERT INTO t1 (a00) VALUES ('bt');
-COMMIT;
-ANALYZE TABLE t1;
-Table Op Msg_type Msg_text
-test.t1 analyze status Engine-independent statistics collected
-test.t1 analyze status OK
-SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1';
-CLUST_INDEX_SIZE
-11
-BEGIN;
-INSERT INTO t1 (a00) VALUES ('bu');
-INSERT INTO t1 (a00) VALUES ('bv');
-INSERT INTO t1 (a00) VALUES ('bw');
-INSERT INTO t1 (a00) VALUES ('bx');
-INSERT INTO t1 (a00) VALUES ('by');
-INSERT INTO t1 (a00) VALUES ('bz');
-INSERT INTO t1 (a00) VALUES ('ca');
-INSERT INTO t1 (a00) VALUES ('cb');
-INSERT INTO t1 (a00) VALUES ('cc');
-INSERT INTO t1 (a00) VALUES ('cd');
-INSERT INTO t1 (a00) VALUES ('ce');
-INSERT INTO t1 (a00) VALUES ('cf');
-INSERT INTO t1 (a00) VALUES ('cg');
-INSERT INTO t1 (a00) VALUES ('ch');
-INSERT INTO t1 (a00) VALUES ('ci');
-INSERT INTO t1 (a00) VALUES ('cj');
-INSERT INTO t1 (a00) VALUES ('ck');
-INSERT INTO t1 (a00) VALUES ('cl');
-INSERT INTO t1 (a00) VALUES ('cm');
-INSERT INTO t1 (a00) VALUES ('cn');
-INSERT INTO t1 (a00) VALUES ('co');
-COMMIT;
-ANALYZE TABLE t1;
-Table Op Msg_type Msg_text
-test.t1 analyze status Engine-independent statistics collected
-test.t1 analyze status OK
-SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1';
-CLUST_INDEX_SIZE
-15
-BEGIN;
-INSERT INTO t1 (a00) VALUES ('cp');
-INSERT INTO t1 (a00) VALUES ('cq');
-INSERT INTO t1 (a00) VALUES ('cr');
-INSERT INTO t1 (a00) VALUES ('cs');
-INSERT INTO t1 (a00) VALUES ('ct');
-INSERT INTO t1 (a00) VALUES ('cu');
-INSERT INTO t1 (a00) VALUES ('cv');
-INSERT INTO t1 (a00) VALUES ('cw');
-INSERT INTO t1 (a00) VALUES ('cx');
-INSERT INTO t1 (a00) VALUES ('cy');
-INSERT INTO t1 (a00) VALUES ('cz');
-INSERT INTO t1 (a00) VALUES ('da');
-INSERT INTO t1 (a00) VALUES ('db');
-INSERT INTO t1 (a00) VALUES ('dc');
-INSERT INTO t1 (a00) VALUES ('dd');
-INSERT INTO t1 (a00) VALUES ('de');
-INSERT INTO t1 (a00) VALUES ('df');
-INSERT INTO t1 (a00) VALUES ('dg');
-INSERT INTO t1 (a00) VALUES ('dh');
-INSERT INTO t1 (a00) VALUES ('di');
-INSERT INTO t1 (a00) VALUES ('dj');
-INSERT INTO t1 (a00) VALUES ('dk');
-INSERT INTO t1 (a00) VALUES ('dl');
-INSERT INTO t1 (a00) VALUES ('dm');
-INSERT INTO t1 (a00) VALUES ('dn');
-INSERT INTO t1 (a00) VALUES ('do');
-INSERT INTO t1 (a00) VALUES ('dp');
-INSERT INTO t1 (a00) VALUES ('dq');
-INSERT INTO t1 (a00) VALUES ('dr');
-INSERT INTO t1 (a00) VALUES ('ds');
-INSERT INTO t1 (a00) VALUES ('dt');
-INSERT INTO t1 (a00) VALUES ('du');
-INSERT INTO t1 (a00) VALUES ('dv');
-INSERT INTO t1 (a00) VALUES ('dw');
-INSERT INTO t1 (a00) VALUES ('dx');
-INSERT INTO t1 (a00) VALUES ('dy');
-INSERT INTO t1 (a00) VALUES ('dz');
-INSERT INTO t1 (a00) VALUES ('ea');
-INSERT INTO t1 (a00) VALUES ('eb');
-INSERT INTO t1 (a00) VALUES ('ec');
-INSERT INTO t1 (a00) VALUES ('ed');
-INSERT INTO t1 (a00) VALUES ('ee');
-INSERT INTO t1 (a00) VALUES ('ef');
-INSERT INTO t1 (a00) VALUES ('eg');
-INSERT INTO t1 (a00) VALUES ('eh');
-INSERT INTO t1 (a00) VALUES ('ei');
-INSERT INTO t1 (a00) VALUES ('ej');
-INSERT INTO t1 (a00) VALUES ('ek');
-INSERT INTO t1 (a00) VALUES ('el');
-COMMIT;
-ANALYZE TABLE t1;
-Table Op Msg_type Msg_text
-test.t1 analyze status Engine-independent statistics collected
-test.t1 analyze status OK
-SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1';
-CLUST_INDEX_SIZE
-23
-BEGIN;
-INSERT INTO t1 (a00) VALUES ('em');
-INSERT INTO t1 (a00) VALUES ('en');
-INSERT INTO t1 (a00) VALUES ('eo');
-INSERT INTO t1 (a00) VALUES ('ep');
-INSERT INTO t1 (a00) VALUES ('eq');
-INSERT INTO t1 (a00) VALUES ('er');
-INSERT INTO t1 (a00) VALUES ('es');
-INSERT INTO t1 (a00) VALUES ('et');
-INSERT INTO t1 (a00) VALUES ('eu');
-INSERT INTO t1 (a00) VALUES ('ev');
-INSERT INTO t1 (a00) VALUES ('ew');
-INSERT INTO t1 (a00) VALUES ('ex');
-INSERT INTO t1 (a00) VALUES ('ey');
-INSERT INTO t1 (a00) VALUES ('ez');
-INSERT INTO t1 (a00) VALUES ('fa');
-INSERT INTO t1 (a00) VALUES ('fb');
-INSERT INTO t1 (a00) VALUES ('fc');
-INSERT INTO t1 (a00) VALUES ('fd');
-INSERT INTO t1 (a00) VALUES ('fe');
-INSERT INTO t1 (a00) VALUES ('ff');
-INSERT INTO t1 (a00) VALUES ('fg');
-INSERT INTO t1 (a00) VALUES ('fh');
-INSERT INTO t1 (a00) VALUES ('fi');
-INSERT INTO t1 (a00) VALUES ('fj');
-INSERT INTO t1 (a00) VALUES ('fk');
-INSERT INTO t1 (a00) VALUES ('fl');
-INSERT INTO t1 (a00) VALUES ('fm');
-INSERT INTO t1 (a00) VALUES ('fn');
-INSERT INTO t1 (a00) VALUES ('fo');
-INSERT INTO t1 (a00) VALUES ('fp');
-INSERT INTO t1 (a00) VALUES ('fq');
-INSERT INTO t1 (a00) VALUES ('fr');
-INSERT INTO t1 (a00) VALUES ('fs');
-INSERT INTO t1 (a00) VALUES ('ft');
-INSERT INTO t1 (a00) VALUES ('fu');
-INSERT INTO t1 (a00) VALUES ('fv');
-INSERT INTO t1 (a00) VALUES ('fw');
-INSERT INTO t1 (a00) VALUES ('fx');
-INSERT INTO t1 (a00) VALUES ('fy');
-INSERT INTO t1 (a00) VALUES ('fz');
-INSERT INTO t1 (a00) VALUES ('ga');
-INSERT INTO t1 (a00) VALUES ('gb');
-INSERT INTO t1 (a00) VALUES ('gc');
-INSERT INTO t1 (a00) VALUES ('gd');
-INSERT INTO t1 (a00) VALUES ('ge');
-INSERT INTO t1 (a00) VALUES ('gf');
-INSERT INTO t1 (a00) VALUES ('gg');
-INSERT INTO t1 (a00) VALUES ('gh');
-COMMIT;
-ANALYZE TABLE t1;
-Table Op Msg_type Msg_text
-test.t1 analyze status Engine-independent statistics collected
-test.t1 analyze status OK
-SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1';
-CLUST_INDEX_SIZE
-29
-SET GLOBAL innodb_limit_optimistic_insert_debug = 0;
-# Test start
-SET DEBUG_SYNC = 'RESET';
-INSERT INTO t1 (a00) VALUES ('bfa');
-connection con1;
-SET DEBUG_SYNC = 'before_insert_pessimitic_row_ins_clust SIGNAL reached WAIT_FOR continue';
-INSERT INTO t1 (a00) VALUES ('bfb');
-connection con2;
-SET DEBUG_SYNC = 'now WAIT_FOR reached';
-SELECT a00,a01 FROM t1 WHERE a00 = 'aa';
-a00 a01
-aa a
-SELECT a00,a01 FROM t1 WHERE a00 = 'aq';
-a00 a01
-aq a
-SELECT a00,a01 FROM t1 WHERE a00 = 'cp';
-a00 a01
-cp a
-SELECT a00,a01 FROM t1 WHERE a00 = 'el';
-a00 a01
-el a
-SET DEBUG_SYNC = 'rw_s_lock_waiting SIGNAL lockwait1';
-SELECT a00,a01 FROM t1 WHERE a00 = 'ar';
-connection con3;
-SET DEBUG_SYNC = 'rw_s_lock_waiting SIGNAL lockwait2';
-SELECT a00,a01 FROM t1 WHERE a00 = 'cn';
-connection default;
-SET DEBUG_SYNC = 'now WAIT_FOR lockwait1 TIMEOUT 1';
-SET DEBUG_SYNC = 'now WAIT_FOR lockwait2 TIMEOUT 1';
-SET DEBUG_SYNC = 'now SIGNAL continue';
-connection con1;
-connection con2;
-a00 a01
-ar a
-connection con3;
-a00 a01
-cn a
-connection default;
-ANALYZE TABLE t1;
-Table Op Msg_type Msg_text
-test.t1 analyze status Engine-independent statistics collected
-test.t1 analyze status OK
-SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1';
-CLUST_INDEX_SIZE
-30
-SET DEBUG_SYNC = 'RESET';
-INSERT INTO t1 (a00) VALUES ('coa');
-connection con1;
-SET DEBUG_SYNC = 'before_insert_pessimitic_row_ins_clust SIGNAL reached WAIT_FOR continue';
-INSERT INTO t1 (a00) VALUES ('cob');
-connection con2;
-SET DEBUG_SYNC = 'now WAIT_FOR reached';
-SET DEBUG_SYNC = 'rw_s_lock_waiting SIGNAL lockwait1';
-SELECT a00,a01 FROM t1 WHERE a00 = 'aa';
-connection con3;
-SET DEBUG_SYNC = 'rw_s_lock_waiting SIGNAL lockwait2';
-SELECT a00,a01 FROM t1 WHERE a00 = 'el';
-connection default;
-SET DEBUG_SYNC = 'now WAIT_FOR lockwait1 TIMEOUT 1';
-SET DEBUG_SYNC = 'now WAIT_FOR lockwait2 TIMEOUT 1';
-SET DEBUG_SYNC = 'now SIGNAL continue';
-connection con1;
-connection con2;
-a00 a01
-aa a
-connection con3;
-a00 a01
-el a
-connection default;
-ANALYZE TABLE t1;
-Table Op Msg_type Msg_text
-test.t1 analyze status Engine-independent statistics collected
-test.t1 analyze status OK
-SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1';
-CLUST_INDEX_SIZE
-31
-SET DEBUG_SYNC = 'RESET';
-INSERT INTO t1 (a00) VALUES ('gba');
-connection con1;
-SET DEBUG_SYNC = 'before_insert_pessimitic_row_ins_clust SIGNAL reached WAIT_FOR continue';
-INSERT INTO t1 (a00) VALUES ('gbb');
-connection con2;
-SET DEBUG_SYNC = 'now WAIT_FOR reached';
-SELECT a00,a01 FROM t1 WHERE a00 = 'aa';
-a00 a01
-aa a
-SELECT a00,a01 FROM t1 WHERE a00 = 'ek';
-a00 a01
-ek a
-SET DEBUG_SYNC = 'rw_s_lock_waiting SIGNAL lockwait1';
-SELECT a00,a01 FROM t1 WHERE a00 = 'el';
-connection con3;
-SET DEBUG_SYNC = 'rw_s_lock_waiting SIGNAL lockwait2';
-SELECT a00,a01 FROM t1 WHERE a00 = 'gb';
-connection default;
-SET DEBUG_SYNC = 'now WAIT_FOR lockwait1 TIMEOUT 1';
-SET DEBUG_SYNC = 'now WAIT_FOR lockwait2 TIMEOUT 1';
-SET DEBUG_SYNC = 'now SIGNAL continue';
-connection con1;
-connection con2;
-a00 a01
-el a
-connection con3;
-a00 a01
-gb a
-connection default;
-ANALYZE TABLE t1;
-Table Op Msg_type Msg_text
-test.t1 analyze status Engine-independent statistics collected
-test.t1 analyze status OK
-SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1';
-CLUST_INDEX_SIZE
-32
-SET DEBUG_SYNC = 'RESET';
-connection default;
-disconnect con1;
-disconnect con2;
-disconnect con3;
-DROP TABLE t1;
diff --git a/mysql-test/suite/innodb/t/innodb_wl6326.opt b/mysql-test/suite/innodb/t/innodb_wl6326.opt
deleted file mode 100644
index 99bf0e5a28b..00000000000
--- a/mysql-test/suite/innodb/t/innodb_wl6326.opt
+++ /dev/null
@@ -1 +0,0 @@
---innodb-sys-tablestats
diff --git a/mysql-test/suite/innodb/t/innodb_wl6326.test b/mysql-test/suite/innodb/t/innodb_wl6326.test
deleted file mode 100644
index 1cf98cd1c7b..00000000000
--- a/mysql-test/suite/innodb/t/innodb_wl6326.test
+++ /dev/null
@@ -1,519 +0,0 @@
-#
-# WL#6326: InnoDB: fix index->lock contention
-#
-
---source include/have_innodb.inc
---source include/have_debug.inc
---source include/have_debug_sync.inc
---source include/have_innodb_16k.inc
-
---disable_query_log
-SET @old_innodb_limit_optimistic_insert_debug = @@innodb_limit_optimistic_insert_debug;
-SET @old_innodb_adaptive_hash_index = @@innodb_adaptive_hash_index;
-SET @old_innodb_stats_persistent = @@innodb_stats_persistent;
---enable_query_log
-
-# Save the initial number of concurrent sessions
---source include/count_sessions.inc
-
-SET GLOBAL innodb_adaptive_hash_index = false;
-SET GLOBAL innodb_stats_persistent = false;
-
---connect (con1,localhost,root,,)
---connect (con2,localhost,root,,)
---connect (con3,localhost,root,,)
-
-CREATE TABLE t1 (
- a00 CHAR(255) NOT NULL DEFAULT 'a',
- a01 CHAR(255) NOT NULL DEFAULT 'a',
- a02 CHAR(255) NOT NULL DEFAULT 'a',
- a03 CHAR(255) NOT NULL DEFAULT 'a',
- a04 CHAR(255) NOT NULL DEFAULT 'a',
- a05 CHAR(255) NOT NULL DEFAULT 'a',
- a06 CHAR(255) NOT NULL DEFAULT 'a',
- b INT NOT NULL DEFAULT 0
-) ENGINE = InnoDB;
-
-ALTER TABLE t1 ADD PRIMARY KEY(
- a00,
- a01,
- a02,
- a03,
- a04,
- a05,
- a06
-);
-
-#
-# Prepare primary key index tree to be used for this test.
-#
-
-# Only root (1)
-ANALYZE TABLE t1;
-SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1';
-
-# Make the first records sparse artificially,
-# not to cause modify_tree by single node_ptr insert operation.
-# * (7 - 2) records should be larger than a half of the page size
-# * (7 + 2) records should be fit to the page
-# (above t1 definition is already adjusted)
-SET GLOBAL innodb_limit_optimistic_insert_debug = 7;
-
-BEGIN;
-INSERT INTO t1 (a00) VALUES ('aa');
-INSERT INTO t1 (a00) VALUES ('ab');
-INSERT INTO t1 (a00) VALUES ('ac');
-INSERT INTO t1 (a00) VALUES ('ad');
-INSERT INTO t1 (a00) VALUES ('ae');
-INSERT INTO t1 (a00) VALUES ('af');
-INSERT INTO t1 (a00) VALUES ('ag');
-INSERT INTO t1 (a00) VALUES ('ah');
-COMMIT;
-# Raise root (1-2)
-# (aa,ad)
-# (aa,ab,ac)(ad,ae,af,ag,ah)
-ANALYZE TABLE t1;
-SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1';
-
-BEGIN;
-INSERT INTO t1 (a00) VALUES ('ai');
-INSERT INTO t1 (a00) VALUES ('aj');
-INSERT INTO t1 (a00) VALUES ('ak');
-COMMIT;
-# Split leaf (1-3)
-# (aa,ad,ak)
-# (aa,ab,ac)(ad,ae,af,ag,ah,ai,aj)(ak)
-ANALYZE TABLE t1;
-SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1';
-
-BEGIN;
-INSERT INTO t1 (a00) VALUES ('al');
-INSERT INTO t1 (a00) VALUES ('am');
-INSERT INTO t1 (a00) VALUES ('an');
-INSERT INTO t1 (a00) VALUES ('ao');
-INSERT INTO t1 (a00) VALUES ('ap');
-INSERT INTO t1 (a00) VALUES ('aq');
-INSERT INTO t1 (a00) VALUES ('ar');
-COMMIT;
-# Split leaf (1-4)
-# (aa,ad,ak,ar)
-# (aa,ab,ac)(ad,ae,af,ag,ah,ai,aj)(ak,al,am,an,ao,ap,aq)(ar)
-ANALYZE TABLE t1;
-SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1';
-
-BEGIN;
-INSERT INTO t1 (a00) VALUES ('as');
-INSERT INTO t1 (a00) VALUES ('at');
-INSERT INTO t1 (a00) VALUES ('au');
-INSERT INTO t1 (a00) VALUES ('av');
-INSERT INTO t1 (a00) VALUES ('aw');
-INSERT INTO t1 (a00) VALUES ('ax');
-INSERT INTO t1 (a00) VALUES ('ay');
-COMMIT;
-# Split leaf (1-5)
-# (aa,ad,ak,ar,ay)
-# (aa,ab,ac)(ad,ae,af,ag,ah,ai,aj)(ak,al,am,an,ao,ap,aq)(ar,as,at,au,av,aw,ax)(ay)
-ANALYZE TABLE t1;
-SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1';
-
-BEGIN;
-INSERT INTO t1 (a00) VALUES ('az');
-INSERT INTO t1 (a00) VALUES ('ba');
-INSERT INTO t1 (a00) VALUES ('bb');
-INSERT INTO t1 (a00) VALUES ('bc');
-INSERT INTO t1 (a00) VALUES ('bd');
-INSERT INTO t1 (a00) VALUES ('be');
-INSERT INTO t1 (a00) VALUES ('bf');
-COMMIT;
-# Split leaf (1-6)
-# (aa,ad,ak,ar,ay,bf)
-# (aa,ab,ac)(ad..)(ak..)(ar,as,at,au,av,aw,ax)(ay,az,ba,bb,bc,bd,be)(bf)
-ANALYZE TABLE t1;
-SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1';
-
-BEGIN;
-INSERT INTO t1 (a00) VALUES ('bg');
-INSERT INTO t1 (a00) VALUES ('bh');
-INSERT INTO t1 (a00) VALUES ('bi');
-INSERT INTO t1 (a00) VALUES ('bj');
-INSERT INTO t1 (a00) VALUES ('bk');
-INSERT INTO t1 (a00) VALUES ('bl');
-INSERT INTO t1 (a00) VALUES ('bm');
-COMMIT;
-# Split leaf (1-7)
-# (aa,ad,ak,ar,ay,bf,bm)
-# (aa,ab,ac)(ad..)(ak..)(ar..)(ay,az,ba,bb,bc,bd,be)(bf,bg,bh,bi,bj,bk,bl)(bm)
-ANALYZE TABLE t1;
-SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1';
-
-BEGIN;
-INSERT INTO t1 (a00) VALUES ('bn');
-INSERT INTO t1 (a00) VALUES ('bo');
-INSERT INTO t1 (a00) VALUES ('bp');
-INSERT INTO t1 (a00) VALUES ('bq');
-INSERT INTO t1 (a00) VALUES ('br');
-INSERT INTO t1 (a00) VALUES ('bs');
-INSERT INTO t1 (a00) VALUES ('bt');
-COMMIT;
-# Raise root (1-2-8)
-# (aa,ar)
-# (aa,ad,ak) (ar,ay,bf,bm,bt)
-# (aa,ab,ac)(ad..)(ak..)(ar..)(ay..)(bf..)(bm..)(bt)
-ANALYZE TABLE t1;
-SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1';
-
-BEGIN;
-INSERT INTO t1 (a00) VALUES ('bu');
-INSERT INTO t1 (a00) VALUES ('bv');
-INSERT INTO t1 (a00) VALUES ('bw');
-INSERT INTO t1 (a00) VALUES ('bx');
-INSERT INTO t1 (a00) VALUES ('by');
-INSERT INTO t1 (a00) VALUES ('bz');
-INSERT INTO t1 (a00) VALUES ('ca');
-
-INSERT INTO t1 (a00) VALUES ('cb');
-INSERT INTO t1 (a00) VALUES ('cc');
-INSERT INTO t1 (a00) VALUES ('cd');
-INSERT INTO t1 (a00) VALUES ('ce');
-INSERT INTO t1 (a00) VALUES ('cf');
-INSERT INTO t1 (a00) VALUES ('cg');
-INSERT INTO t1 (a00) VALUES ('ch');
-
-INSERT INTO t1 (a00) VALUES ('ci');
-INSERT INTO t1 (a00) VALUES ('cj');
-INSERT INTO t1 (a00) VALUES ('ck');
-INSERT INTO t1 (a00) VALUES ('cl');
-INSERT INTO t1 (a00) VALUES ('cm');
-INSERT INTO t1 (a00) VALUES ('cn');
-INSERT INTO t1 (a00) VALUES ('co');
-COMMIT;
-# Split also at level 1 (1-3-11)
-# (aa,ar,co)
-# (aa,ad,ak) (ar,ay,bf,bm,bt,ca,ch) (co)
-# (aa,ab,ac)(ad..)(ak..)(ar..)(ay..)(bf..)(bm..)(bt..)(ca..)(ch..)(co)
-ANALYZE TABLE t1;
-SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1';
-
-BEGIN;
-INSERT INTO t1 (a00) VALUES ('cp');
-INSERT INTO t1 (a00) VALUES ('cq');
-INSERT INTO t1 (a00) VALUES ('cr');
-INSERT INTO t1 (a00) VALUES ('cs');
-INSERT INTO t1 (a00) VALUES ('ct');
-INSERT INTO t1 (a00) VALUES ('cu');
-INSERT INTO t1 (a00) VALUES ('cv');
-
-INSERT INTO t1 (a00) VALUES ('cw');
-INSERT INTO t1 (a00) VALUES ('cx');
-INSERT INTO t1 (a00) VALUES ('cy');
-INSERT INTO t1 (a00) VALUES ('cz');
-INSERT INTO t1 (a00) VALUES ('da');
-INSERT INTO t1 (a00) VALUES ('db');
-INSERT INTO t1 (a00) VALUES ('dc');
-
-INSERT INTO t1 (a00) VALUES ('dd');
-INSERT INTO t1 (a00) VALUES ('de');
-INSERT INTO t1 (a00) VALUES ('df');
-INSERT INTO t1 (a00) VALUES ('dg');
-INSERT INTO t1 (a00) VALUES ('dh');
-INSERT INTO t1 (a00) VALUES ('di');
-INSERT INTO t1 (a00) VALUES ('dj');
-
-INSERT INTO t1 (a00) VALUES ('dk');
-INSERT INTO t1 (a00) VALUES ('dl');
-INSERT INTO t1 (a00) VALUES ('dm');
-INSERT INTO t1 (a00) VALUES ('dn');
-INSERT INTO t1 (a00) VALUES ('do');
-INSERT INTO t1 (a00) VALUES ('dp');
-INSERT INTO t1 (a00) VALUES ('dq');
-
-INSERT INTO t1 (a00) VALUES ('dr');
-INSERT INTO t1 (a00) VALUES ('ds');
-INSERT INTO t1 (a00) VALUES ('dt');
-INSERT INTO t1 (a00) VALUES ('du');
-INSERT INTO t1 (a00) VALUES ('dv');
-INSERT INTO t1 (a00) VALUES ('dw');
-INSERT INTO t1 (a00) VALUES ('dx');
-
-INSERT INTO t1 (a00) VALUES ('dy');
-INSERT INTO t1 (a00) VALUES ('dz');
-INSERT INTO t1 (a00) VALUES ('ea');
-INSERT INTO t1 (a00) VALUES ('eb');
-INSERT INTO t1 (a00) VALUES ('ec');
-INSERT INTO t1 (a00) VALUES ('ed');
-INSERT INTO t1 (a00) VALUES ('ee');
-
-INSERT INTO t1 (a00) VALUES ('ef');
-INSERT INTO t1 (a00) VALUES ('eg');
-INSERT INTO t1 (a00) VALUES ('eh');
-INSERT INTO t1 (a00) VALUES ('ei');
-INSERT INTO t1 (a00) VALUES ('ej');
-INSERT INTO t1 (a00) VALUES ('ek');
-INSERT INTO t1 (a00) VALUES ('el');
-COMMIT;
-# Split also at level 1 (1-4-18)
-# (aa,ar,co,el)
-# (aa,ad,ak) (ar,ay,bf,bm,bt,ca,ch) (co,cv,dc,dj,dq,dx,ee) (el)
-# (aa,ab,ac)(ad..)(ak..)(ar..)(ay..)(bf..)(bm..)(bt..)(ca..)(ch..)(co..)(cv..)(dc..)(dj..)(dq..)(dx..)(ee..)(el)
-ANALYZE TABLE t1;
-SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1';
-
-BEGIN;
-INSERT INTO t1 (a00) VALUES ('em');
-INSERT INTO t1 (a00) VALUES ('en');
-INSERT INTO t1 (a00) VALUES ('eo');
-INSERT INTO t1 (a00) VALUES ('ep');
-INSERT INTO t1 (a00) VALUES ('eq');
-INSERT INTO t1 (a00) VALUES ('er');
-INSERT INTO t1 (a00) VALUES ('es');
-
-INSERT INTO t1 (a00) VALUES ('et');
-INSERT INTO t1 (a00) VALUES ('eu');
-INSERT INTO t1 (a00) VALUES ('ev');
-INSERT INTO t1 (a00) VALUES ('ew');
-INSERT INTO t1 (a00) VALUES ('ex');
-INSERT INTO t1 (a00) VALUES ('ey');
-INSERT INTO t1 (a00) VALUES ('ez');
-
-INSERT INTO t1 (a00) VALUES ('fa');
-INSERT INTO t1 (a00) VALUES ('fb');
-INSERT INTO t1 (a00) VALUES ('fc');
-INSERT INTO t1 (a00) VALUES ('fd');
-INSERT INTO t1 (a00) VALUES ('fe');
-INSERT INTO t1 (a00) VALUES ('ff');
-INSERT INTO t1 (a00) VALUES ('fg');
-
-INSERT INTO t1 (a00) VALUES ('fh');
-INSERT INTO t1 (a00) VALUES ('fi');
-INSERT INTO t1 (a00) VALUES ('fj');
-INSERT INTO t1 (a00) VALUES ('fk');
-INSERT INTO t1 (a00) VALUES ('fl');
-INSERT INTO t1 (a00) VALUES ('fm');
-INSERT INTO t1 (a00) VALUES ('fn');
-
-INSERT INTO t1 (a00) VALUES ('fo');
-INSERT INTO t1 (a00) VALUES ('fp');
-INSERT INTO t1 (a00) VALUES ('fq');
-INSERT INTO t1 (a00) VALUES ('fr');
-INSERT INTO t1 (a00) VALUES ('fs');
-INSERT INTO t1 (a00) VALUES ('ft');
-INSERT INTO t1 (a00) VALUES ('fu');
-
-INSERT INTO t1 (a00) VALUES ('fv');
-INSERT INTO t1 (a00) VALUES ('fw');
-INSERT INTO t1 (a00) VALUES ('fx');
-INSERT INTO t1 (a00) VALUES ('fy');
-INSERT INTO t1 (a00) VALUES ('fz');
-INSERT INTO t1 (a00) VALUES ('ga');
-INSERT INTO t1 (a00) VALUES ('gb');
-
-INSERT INTO t1 (a00) VALUES ('gc');
-INSERT INTO t1 (a00) VALUES ('gd');
-INSERT INTO t1 (a00) VALUES ('ge');
-INSERT INTO t1 (a00) VALUES ('gf');
-INSERT INTO t1 (a00) VALUES ('gg');
-INSERT INTO t1 (a00) VALUES ('gh');
-COMMIT;
-
-# Current tree form (1-4-24)
-# (aa,ar,co,el)
-# (aa,ad,ak) (ar,ay,bf,bm,bt,ca,ch) (co,cv,dc,dj,dq,dx,ee) (el..,gb)
-# (aa,ab,ac)(ad..)(ak..)(ar..)(ay..)(bf..)(bm..)(bt..)(ca..)(ch..)(co..)(cv..)(dc..)(dj..)(dq..)(dx..)(ee..)(el..)..(gb..)
-ANALYZE TABLE t1;
-SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1';
-
-# Insert the rest of records normally
-SET GLOBAL innodb_limit_optimistic_insert_debug = 0;
-
---echo # Test start
-
-# (1) Insert records to leaf page (bf..) and cause modify_page.
-# - root page is not X latched
-# - latched from level 1 page (ar,ay,bf,bm,bt,ca,ch)
-
-SET DEBUG_SYNC = 'RESET';
-
-# Filling leaf page (bf..)
-INSERT INTO t1 (a00) VALUES ('bfa');
-
---connection con1
-SET DEBUG_SYNC = 'before_insert_pessimitic_row_ins_clust SIGNAL reached WAIT_FOR continue';
-# Cause modify_tree
---send
-INSERT INTO t1 (a00) VALUES ('bfb');
-
---connection con2
-SET DEBUG_SYNC = 'now WAIT_FOR reached';
-# Not blocked searches
-SELECT a00,a01 FROM t1 WHERE a00 = 'aa';
-SELECT a00,a01 FROM t1 WHERE a00 = 'aq';
-# "where a00 = 'co'" is blocked because searching from smaller ('co','a','a',..).
-SELECT a00,a01 FROM t1 WHERE a00 = 'cp';
-SELECT a00,a01 FROM t1 WHERE a00 = 'el';
-
-SET DEBUG_SYNC = 'rw_s_lock_waiting SIGNAL lockwait1';
-# Blocked
---send
-SELECT a00,a01 FROM t1 WHERE a00 = 'ar';
-
---connection con3
-SET DEBUG_SYNC = 'rw_s_lock_waiting SIGNAL lockwait2';
-# Blocked
---send
-SELECT a00,a01 FROM t1 WHERE a00 = 'cn';
-
---connection default
-# FIXME: These occasionally time out!
---disable_warnings
-SET DEBUG_SYNC = 'now WAIT_FOR lockwait1 TIMEOUT 1';
-SET DEBUG_SYNC = 'now WAIT_FOR lockwait2 TIMEOUT 1';
---enable_warnings
-SET DEBUG_SYNC = 'now SIGNAL continue';
-
---connection con1
---reap
-
---connection con2
---reap
-
---connection con3
---reap
-
---connection default
-
-ANALYZE TABLE t1;
-SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1';
-
-
-
-# (2) Insert records to leaf page (co..) and cause modify_page
-# - root page is X latched, because node_ptr for 'co'
-# is 1st record for (co,cv,dc,dj,dq,dx,ee)
-#
-# * ordinary pessimitic insert might be done by pessistic update
-# and we should consider possibility node_ptr to be deleted.
-
-SET DEBUG_SYNC = 'RESET';
-
-# Filling leaf page (co..)
-INSERT INTO t1 (a00) VALUES ('coa');
-
---connection con1
-SET DEBUG_SYNC = 'before_insert_pessimitic_row_ins_clust SIGNAL reached WAIT_FOR continue';
-# Cause modify_tree
---send
-INSERT INTO t1 (a00) VALUES ('cob');
-
---connection con2
-SET DEBUG_SYNC = 'now WAIT_FOR reached';
-# All searches are blocked because root page is X latched
-
-SET DEBUG_SYNC = 'rw_s_lock_waiting SIGNAL lockwait1';
-# Blocked
---send
-SELECT a00,a01 FROM t1 WHERE a00 = 'aa';
-
---connection con3
-SET DEBUG_SYNC = 'rw_s_lock_waiting SIGNAL lockwait2';
-# Blocked
---send
-SELECT a00,a01 FROM t1 WHERE a00 = 'el';
-
---connection default
-# FIXME: These occasionally time out!
---disable_warnings
-SET DEBUG_SYNC = 'now WAIT_FOR lockwait1 TIMEOUT 1';
-SET DEBUG_SYNC = 'now WAIT_FOR lockwait2 TIMEOUT 1';
---enable_warnings
-SET DEBUG_SYNC = 'now SIGNAL continue';
-
---connection con1
---reap
-
---connection con2
---reap
-
---connection con3
---reap
-
---connection default
-
-ANALYZE TABLE t1;
-SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1';
-
-
-
-# (3) Insert records to rightmost leaf page (gb..) and cause modify_page
-# - root page is not X latched, because node_ptr for 'gb' is the last record
-# of the level 1 though it is last record in the page.
-# - lathed from level 1 page (el..,gb)
-
-SET DEBUG_SYNC = 'RESET';
-
-# Filling leaf page (gb..)
-INSERT INTO t1 (a00) VALUES ('gba');
-
---connection con1
-SET DEBUG_SYNC = 'before_insert_pessimitic_row_ins_clust SIGNAL reached WAIT_FOR continue';
-# Cause modify_tree
---send
-INSERT INTO t1 (a00) VALUES ('gbb');
-
---connection con2
-SET DEBUG_SYNC = 'now WAIT_FOR reached';
-# Not blocked searches
-SELECT a00,a01 FROM t1 WHERE a00 = 'aa';
-SELECT a00,a01 FROM t1 WHERE a00 = 'ek';
-
-SET DEBUG_SYNC = 'rw_s_lock_waiting SIGNAL lockwait1';
-# Blocked
---send
-SELECT a00,a01 FROM t1 WHERE a00 = 'el';
-
---connection con3
-SET DEBUG_SYNC = 'rw_s_lock_waiting SIGNAL lockwait2';
-# Blocked
---send
-SELECT a00,a01 FROM t1 WHERE a00 = 'gb';
-
---connection default
-# FIXME: These occasionally time out!
---disable_warnings
-SET DEBUG_SYNC = 'now WAIT_FOR lockwait1 TIMEOUT 1';
-SET DEBUG_SYNC = 'now WAIT_FOR lockwait2 TIMEOUT 1';
---enable_warnings
-SET DEBUG_SYNC = 'now SIGNAL continue';
-
---connection con1
---reap
-
---connection con2
---reap
-
---connection con3
---reap
-
---connection default
-ANALYZE TABLE t1;
-SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1';
-
-
-
-# Cleanup
-SET DEBUG_SYNC = 'RESET';
-
---connection default
---disconnect con1
---disconnect con2
---disconnect con3
-
-DROP TABLE t1;
-
---disable_query_log
-SET GLOBAL innodb_limit_optimistic_insert_debug = @old_innodb_limit_optimistic_insert_debug;
-SET GLOBAL innodb_adaptive_hash_index = @old_innodb_adaptive_hash_index;
-SET GLOBAL innodb_stats_persistent = @old_innodb_stats_persistent;
---enable_query_log
-
-# Wait till all disconnects are completed.
---source include/wait_until_count_sessions.inc
diff --git a/mysql-test/suite/innodb_gis/r/rtree_split.result b/mysql-test/suite/innodb_gis/r/rtree_split.result
index 8e475776ce0..10262f0220b 100644
--- a/mysql-test/suite/innodb_gis/r/rtree_split.result
+++ b/mysql-test/suite/innodb_gis/r/rtree_split.result
@@ -61,3 +61,15 @@ select count(*) from t1 where MBRWithin(t1.c2, @g1);
count(*)
57344
drop table t1;
+#
+# MDEV-30400 Assertion height == btr_page_get_level ... on INSERT
+#
+CREATE TABLE t1 (c POINT NOT NULL,SPATIAL (c)) ENGINE=InnoDB;
+SET @save_limit=@@GLOBAL.innodb_limit_optimistic_insert_debug;
+SET GLOBAL innodb_limit_optimistic_insert_debug=2;
+BEGIN;
+INSERT INTO t1 SELECT POINTFROMTEXT ('POINT(0 0)') FROM seq_1_to_366;
+ROLLBACK;
+SET GLOBAL innodb_limit_optimistic_insert_debug=@save_limit;
+DROP TABLE t1;
+# End of 10.6 tests
diff --git a/mysql-test/suite/innodb_gis/t/rtree_split.test b/mysql-test/suite/innodb_gis/t/rtree_split.test
index 6f285187508..de7fc676e0e 100644
--- a/mysql-test/suite/innodb_gis/t/rtree_split.test
+++ b/mysql-test/suite/innodb_gis/t/rtree_split.test
@@ -73,3 +73,18 @@ select count(*) from t1 where MBRWithin(t1.c2, @g1);
# Clean up.
drop table t1;
+
+--echo #
+--echo # MDEV-30400 Assertion height == btr_page_get_level ... on INSERT
+--echo #
+
+CREATE TABLE t1 (c POINT NOT NULL,SPATIAL (c)) ENGINE=InnoDB;
+SET @save_limit=@@GLOBAL.innodb_limit_optimistic_insert_debug;
+SET GLOBAL innodb_limit_optimistic_insert_debug=2;
+BEGIN;
+INSERT INTO t1 SELECT POINTFROMTEXT ('POINT(0 0)') FROM seq_1_to_366;
+ROLLBACK;
+SET GLOBAL innodb_limit_optimistic_insert_debug=@save_limit;
+DROP TABLE t1;
+
+--echo # End of 10.6 tests
diff --git a/storage/innobase/btr/btr0btr.cc b/storage/innobase/btr/btr0btr.cc
index 0bb16dba374..ef44ed5d9d6 100644
--- a/storage/innobase/btr/btr0btr.cc
+++ b/storage/innobase/btr/btr0btr.cc
@@ -2,7 +2,7 @@
Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2012, Facebook Inc.
-Copyright (c) 2014, 2022, MariaDB Corporation.
+Copyright (c) 2014, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -460,6 +460,53 @@ btr_page_create(
}
}
+buf_block_t *
+mtr_t::get_already_latched(const page_id_t id, mtr_memo_type_t type) const
+{
+ ut_ad(is_active());
+ ut_ad(type == MTR_MEMO_PAGE_X_FIX || type == MTR_MEMO_PAGE_SX_FIX ||
+ type == MTR_MEMO_PAGE_S_FIX);
+ for (ulint i= 0; i < m_memo.size(); i++)
+ {
+ const mtr_memo_slot_t &slot= m_memo[i];
+ const auto slot_type= mtr_memo_type_t(slot.type & ~MTR_MEMO_MODIFY);
+ if (slot_type == MTR_MEMO_PAGE_X_FIX || slot_type == type)
+ {
+ buf_block_t *block= static_cast<buf_block_t*>(slot.object);
+ if (block->page.id() == id)
+ return block;
+ }
+ }
+ return nullptr;
+}
+
+/** Fetch an index root page that was already latched in the
+mini-transaction. */
+static buf_block_t *btr_get_latched_root(const dict_index_t &index, mtr_t *mtr)
+{
+ return mtr->get_already_latched(page_id_t{index.table->space_id, index.page},
+ MTR_MEMO_PAGE_SX_FIX);
+}
+
+/** Fet an index page that should have been already latched in the
+mini-transaction. */
+static buf_block_t *
+btr_block_reget(mtr_t *mtr, const dict_index_t &index,
+ const page_id_t id, rw_lock_type_t rw_latch,
+ dberr_t *err)
+{
+ if (buf_block_t *block=
+ mtr->get_already_latched(id, mtr_memo_type_t(rw_latch)))
+ {
+ *err= DB_SUCCESS;
+ return block;
+ }
+
+ /* MDEV-29385 FIXME: Acquire the page latch upfront. */
+ ut_ad(mtr->memo_contains_flagged(&index.lock, MTR_MEMO_X_LOCK));
+ return btr_block_get(index, id.page_no(), rw_latch, true, mtr, err);
+}
+
/**************************************************************//**
Allocates a new file page to be used in an ibuf tree. Takes the page from
the free list of the tree, which must contain pages!
@@ -472,18 +519,16 @@ btr_page_alloc_for_ibuf(
mtr_t* mtr, /*!< in: mtr */
dberr_t* err) /*!< out: error code */
{
- buf_block_t *root= btr_root_block_get(index, RW_SX_LATCH, mtr, err);
+ buf_block_t *root= btr_get_latched_root(*index, mtr);
if (UNIV_UNLIKELY(!root))
return root;
-
buf_block_t *new_block=
- buf_page_get_gen(page_id_t(index->table->space_id,
+ buf_page_get_gen(page_id_t(IBUF_SPACE_ID,
mach_read_from_4(PAGE_HEADER +
PAGE_BTR_IBUF_FREE_LIST +
FLST_FIRST + FIL_ADDR_PAGE +
root->page.frame)),
- index->table->space->zip_size(), RW_X_LATCH, nullptr,
- BUF_GET, mtr, err);
+ 0, RW_X_LATCH, nullptr, BUF_GET, mtr, err);
if (new_block)
*err= flst_remove(root, PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, new_block,
PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, mtr);
@@ -523,11 +568,11 @@ btr_page_alloc_low(
#ifdef BTR_CUR_HASH_ADAPT
ut_ad(!root->index || !root->index->freed());
#endif
- mtr->release_block_at_savepoint(savepoint, root);
+ mtr->rollback_to_savepoint(savepoint);
}
else
{
- mtr->u_lock_register(savepoint);
+ mtr->lock_register(savepoint, MTR_MEMO_PAGE_SX_FIX);
root->page.lock.u_lock();
#ifdef BTR_CUR_HASH_ADAPT
btr_search_drop_page_hash_index(root, true);
@@ -579,15 +624,12 @@ btr_page_free_for_ibuf(
mtr_t* mtr) /*!< in: mtr */
{
ut_ad(mtr->memo_contains_flagged(block, MTR_MEMO_PAGE_X_FIX));
-
- dberr_t err;
- if (buf_block_t *root= btr_root_block_get(index, RW_SX_LATCH, mtr, &err))
- {
- err= flst_add_first(root, PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
+ buf_block_t *root= btr_get_latched_root(*index, mtr);
+ dberr_t err=
+ flst_add_first(root, PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
block, PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, mtr);
- ut_d(if (err == DB_SUCCESS)
- flst_validate(root, PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, mtr));
- }
+ ut_d(if (err == DB_SUCCESS)
+ flst_validate(root, PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, mtr));
return err;
}
@@ -637,11 +679,11 @@ dberr_t btr_page_free(dict_index_t* index, buf_block_t* block, mtr_t* mtr,
#ifdef BTR_CUR_HASH_ADAPT
ut_ad(!root->index || !root->index->freed());
#endif
- mtr->release_block_at_savepoint(savepoint, root);
+ mtr->rollback_to_savepoint(savepoint);
}
else
{
- mtr->u_lock_register(savepoint);
+ mtr->lock_register(savepoint, MTR_MEMO_PAGE_SX_FIX);
root->page.lock.u_lock();
#ifdef BTR_CUR_HASH_ADAPT
btr_search_drop_page_hash_index(root, true);
@@ -712,35 +754,27 @@ btr_node_ptr_get_child(
mtr, err);
}
-MY_ATTRIBUTE((nonnull(2,3,5), warn_unused_result))
+MY_ATTRIBUTE((nonnull(2,3,4), warn_unused_result))
/************************************************************//**
Returns the upper level node pointer to a page. It is assumed that mtr holds
an sx-latch on the tree.
@return rec_get_offsets() of the node pointer record */
static
rec_offs*
-btr_page_get_father_node_ptr_func(
-/*==============================*/
+btr_page_get_father_node_ptr_for_validate(
rec_offs* offsets,/*!< in: work area for the return value */
mem_heap_t* heap, /*!< in: memory heap to use */
btr_cur_t* cursor, /*!< in: cursor pointing to user record,
out: cursor on node pointer record,
its page x-latched */
- btr_latch_mode latch_mode,/*!< in: BTR_CONT_MODIFY_TREE
- or BTR_CONT_SEARCH_TREE */
mtr_t* mtr) /*!< in: mtr */
{
- ut_ad(latch_mode == BTR_CONT_MODIFY_TREE
- || latch_mode == BTR_CONT_SEARCH_TREE);
-
const uint32_t page_no = btr_cur_get_block(cursor)->page.id().page_no();
dict_index_t* index = btr_cur_get_index(cursor);
ut_ad(!dict_index_is_spatial(index));
- ut_ad(srv_read_only_mode
- || mtr->memo_contains_flagged(&index->lock, MTR_MEMO_X_LOCK
- | MTR_MEMO_SX_LOCK));
-
+ ut_ad(mtr->memo_contains_flagged(&index->lock, MTR_MEMO_X_LOCK
+ | MTR_MEMO_SX_LOCK));
ut_ad(dict_index_get_page(index) != page_no);
const auto level = btr_page_get_level(btr_cur_get_page(cursor));
@@ -752,12 +786,16 @@ btr_page_get_father_node_ptr_func(
dict_index_build_node_ptr(index,
user_rec, 0,
heap, level),
- PAGE_CUR_LE, latch_mode,
+ RW_S_LATCH,
cursor, mtr) != DB_SUCCESS) {
return nullptr;
}
const rec_t* node_ptr = btr_cur_get_rec(cursor);
+#if 0 /* MDEV-29835 FIXME */
+ ut_ad(!btr_cur_get_block(cursor)->page.lock.not_recursive()
+ || mtr->memo_contains(index->lock, MTR_MEMO_X_LOCK));
+#endif
offsets = rec_get_offsets(node_ptr, index, offsets, 0,
ULINT_UNDEFINED, &heap);
@@ -769,13 +807,64 @@ btr_page_get_father_node_ptr_func(
return(offsets);
}
-#define btr_page_get_father_node_ptr(of,heap,cur,mtr) \
- btr_page_get_father_node_ptr_func( \
- of,heap,cur,BTR_CONT_MODIFY_TREE,mtr)
+MY_ATTRIBUTE((nonnull(2,3,4), warn_unused_result))
+/************************************************************//**
+Returns the upper level node pointer to a page. It is assumed that
+it has already been latched.
+@return rec_get_offsets() of the node pointer record */
+static
+rec_offs*
+btr_page_get_parent(
+ rec_offs* offsets,/*!< in: work area for the return value */
+ mem_heap_t* heap, /*!< in: memory heap to use */
+ btr_cur_t* cursor, /*!< in: cursor pointing to user record,
+ out: cursor on node pointer record,
+ its page x-latched */
+ mtr_t* mtr) /*!< in: mtr */
+{
+ const uint32_t page_no= cursor->block()->page.id().page_no();
+ const dict_index_t *index= cursor->index();
+ ut_ad(!index->is_spatial());
+ ut_ad(index->page != page_no);
+
+ uint32_t p= index->page;
+ const dtuple_t *tuple=
+ dict_index_build_node_ptr(index, btr_cur_get_rec(cursor), 0, heap,
+ btr_page_get_level(btr_cur_get_page(cursor)));
+
+ ulint i;
+ for (i= 0; i < mtr->get_savepoint(); i++)
+ if (buf_block_t *block= mtr->block_at_savepoint(i))
+ if (block->page.id().page_no() == p)
+ {
+ ut_ad(block->page.lock.have_u_or_x() ||
+ (!block->page.lock.have_s() && index->lock.have_x()));
+ ulint up_match= 0, low_match= 0;
+ cursor->page_cur.block= block;
+ if (page_cur_search_with_match(tuple, PAGE_CUR_LE, &up_match,
+ &low_match, &cursor->page_cur,
+ nullptr))
+ return nullptr;
+ offsets= rec_get_offsets(cursor->page_cur.rec, index, offsets, 0,
+ ULINT_UNDEFINED, &heap);
+ p= btr_node_ptr_get_child_page_no(cursor->page_cur.rec, offsets);
+ if (p != page_no)
+ {
+ i= 0; // MDEV-29835 FIXME: require all pages to be latched in order!
+ continue;
+ }
+ ut_ad(block->page.lock.have_u_or_x());
+ if (block->page.lock.have_u_not_x())
+ {
+ ut_ad(block->page.id().page_no() == index->page);
+ block->page.lock.u_x_upgrade();
+ mtr->page_lock_upgrade(*block);
+ }
+ return offsets;
+ }
-#define btr_page_get_father_node_ptr_for_validate(of,heap,cur,mtr) \
- btr_page_get_father_node_ptr_func( \
- of,heap,cur,BTR_CONT_SEARCH_TREE,mtr)
+ return nullptr;
+}
/************************************************************//**
Returns the upper level node pointer to a page. It is assumed that mtr holds
@@ -796,7 +885,7 @@ btr_page_get_father_block(
if (UNIV_UNLIKELY(!rec))
return nullptr;
cursor->page_cur.rec= rec;
- return btr_page_get_father_node_ptr(offsets, heap, cursor, mtr);
+ return btr_page_get_parent(offsets, heap, cursor, mtr);
}
/** Seek to the parent page of a B-tree page.
@@ -811,7 +900,7 @@ bool btr_page_get_father(mtr_t* mtr, btr_cur_t* cursor)
return false;
cursor->page_cur.rec= rec;
mem_heap_t *heap= mem_heap_create(100);
- const bool got= btr_page_get_father_node_ptr(nullptr, heap, cursor, mtr);
+ const bool got= btr_page_get_parent(nullptr, heap, cursor, mtr);
mem_heap_free(heap);
return got;
}
@@ -1718,48 +1807,43 @@ void btr_set_instant(buf_block_t* root, const dict_index_t& index, mtr_t* mtr)
/** Reset the table to the canonical format on ROLLBACK of instant ALTER TABLE.
@param[in] index clustered index with instant ALTER TABLE
@param[in] all whether to reset FIL_PAGE_TYPE as well
-@param[in,out] mtr mini-transaction
-@return error code */
+@param[in,out] mtr mini-transaction */
ATTRIBUTE_COLD
-dberr_t btr_reset_instant(const dict_index_t &index, bool all, mtr_t *mtr)
+void btr_reset_instant(const dict_index_t &index, bool all, mtr_t *mtr)
{
ut_ad(!index.table->is_temporary());
ut_ad(index.is_primary());
- dberr_t err;
- if (buf_block_t *root= btr_root_block_get(&index, RW_SX_LATCH, mtr, &err))
+ buf_block_t *root= btr_get_latched_root(index, mtr);
+ byte *page_type= root->page.frame + FIL_PAGE_TYPE;
+ if (all)
{
- byte *page_type= root->page.frame + FIL_PAGE_TYPE;
- if (all)
- {
- ut_ad(mach_read_from_2(page_type) == FIL_PAGE_TYPE_INSTANT ||
- mach_read_from_2(page_type) == FIL_PAGE_INDEX);
- mtr->write<2,mtr_t::MAYBE_NOP>(*root, page_type, FIL_PAGE_INDEX);
- byte *instant= PAGE_INSTANT + PAGE_HEADER + root->page.frame;
- mtr->write<2,mtr_t::MAYBE_NOP>(*root, instant,
- page_ptr_get_direction(instant + 1));
- }
- else
- ut_ad(mach_read_from_2(page_type) == FIL_PAGE_TYPE_INSTANT);
- static const byte supremuminfimum[8 + 8] = "supremuminfimum";
- uint16_t infimum, supremum;
- if (page_is_comp(root->page.frame))
- {
- infimum= PAGE_NEW_INFIMUM;
- supremum= PAGE_NEW_SUPREMUM;
- }
- else
- {
- infimum= PAGE_OLD_INFIMUM;
- supremum= PAGE_OLD_SUPREMUM;
- }
- ut_ad(!memcmp(&root->page.frame[infimum], supremuminfimum + 8, 8) ==
- !memcmp(&root->page.frame[supremum], supremuminfimum, 8));
- mtr->memcpy<mtr_t::MAYBE_NOP>(*root, &root->page.frame[infimum],
- supremuminfimum + 8, 8);
- mtr->memcpy<mtr_t::MAYBE_NOP>(*root, &root->page.frame[supremum],
- supremuminfimum, 8);
+ ut_ad(mach_read_from_2(page_type) == FIL_PAGE_TYPE_INSTANT ||
+ mach_read_from_2(page_type) == FIL_PAGE_INDEX);
+ mtr->write<2,mtr_t::MAYBE_NOP>(*root, page_type, FIL_PAGE_INDEX);
+ byte *instant= PAGE_INSTANT + PAGE_HEADER + root->page.frame;
+ mtr->write<2,mtr_t::MAYBE_NOP>(*root, instant,
+ page_ptr_get_direction(instant + 1));
}
- return err;
+ else
+ ut_ad(mach_read_from_2(page_type) == FIL_PAGE_TYPE_INSTANT);
+ static const byte supremuminfimum[8 + 8] = "supremuminfimum";
+ uint16_t infimum, supremum;
+ if (page_is_comp(root->page.frame))
+ {
+ infimum= PAGE_NEW_INFIMUM;
+ supremum= PAGE_NEW_SUPREMUM;
+ }
+ else
+ {
+ infimum= PAGE_OLD_INFIMUM;
+ supremum= PAGE_OLD_SUPREMUM;
+ }
+ ut_ad(!memcmp(&root->page.frame[infimum], supremuminfimum + 8, 8) ==
+ !memcmp(&root->page.frame[supremum], supremuminfimum, 8));
+ mtr->memcpy<mtr_t::MAYBE_NOP>(*root, &root->page.frame[infimum],
+ supremuminfimum + 8, 8);
+ mtr->memcpy<mtr_t::MAYBE_NOP>(*root, &root->page.frame[supremum],
+ supremuminfimum, 8);
}
/*************************************************************//**
@@ -1856,11 +1940,6 @@ btr_root_raise_and_insert(
}
/* Copy the records from root to the new page one by one. */
- dberr_t e;
- if (!err) {
- err = &e;
- }
-
if (0
#ifdef UNIV_ZIP_COPY
|| new_page_zip
@@ -2004,21 +2083,15 @@ btr_root_raise_and_insert(
page_cursor->block = new_block;
page_cursor->index = index;
- if (tuple) {
- ut_ad(dtuple_check_typed(tuple));
- /* Reposition the cursor to the child node */
- ulint low_match = 0, up_match = 0;
+ ut_ad(dtuple_check_typed(tuple));
+ /* Reposition the cursor to the child node */
+ ulint low_match = 0, up_match = 0;
- if (page_cur_search_with_match(tuple, PAGE_CUR_LE,
- &up_match, &low_match,
- page_cursor, nullptr)) {
- if (err) {
- *err = DB_CORRUPTION;
- }
- return nullptr;
- }
- } else {
- page_cursor->rec = page_get_infimum_rec(new_block->page.frame);
+ if (page_cur_search_with_match(tuple, PAGE_CUR_LE,
+ &up_match, &low_match,
+ page_cursor, nullptr)) {
+ *err = DB_CORRUPTION;
+ return nullptr;
}
/* Split the child and insert tuple */
@@ -2237,6 +2310,7 @@ func_exit:
return(rec);
}
+#ifdef UNIV_DEBUG
/*************************************************************//**
Returns TRUE if the insert fits on the appropriate half-page with the
chosen split_rec.
@@ -2334,6 +2408,7 @@ got_rec:
return(false);
}
+#endif
/*******************************************************//**
Inserts a data tuple to a tree on a non-leaf level. It is assumed
@@ -2356,25 +2431,34 @@ btr_insert_on_non_leaf_level(
rtr_info_t rtr_info;
ut_ad(level > 0);
- auto mode = PAGE_CUR_LE;
+
+ flags |= BTR_NO_LOCKING_FLAG | BTR_KEEP_SYS_FLAG
+ | BTR_NO_UNDO_LOG_FLAG;
+ cursor.page_cur.index = index;
+
+ dberr_t err;
if (index->is_spatial()) {
- mode = PAGE_CUR_RTREE_INSERT;
/* For spatial index, initialize structures to track
its parents etc. */
rtr_init_rtr_info(&rtr_info, false, &cursor, index, false);
rtr_info_update_btr(&cursor, &rtr_info);
+ err = rtr_search_to_nth_level(level, tuple,
+ PAGE_CUR_RTREE_INSERT,
+ BTR_CONT_MODIFY_TREE,
+ &cursor, mtr);
+ } else {
+ err = btr_cur_search_to_nth_level(level, tuple, RW_X_LATCH,
+ &cursor, mtr);
}
- flags |= BTR_NO_LOCKING_FLAG | BTR_KEEP_SYS_FLAG
- | BTR_NO_UNDO_LOG_FLAG;
- cursor.page_cur.index = index;
-
- dberr_t err = btr_cur_search_to_nth_level(level, tuple, mode,
- BTR_CONT_MODIFY_TREE,
- &cursor, mtr);
ut_ad(cursor.flag == BTR_CUR_BINARY);
+#if 0 /* MDEV-29835 FIXME */
+ ut_ad(!btr_cur_get_block(&cursor)->page.lock.not_recursive()
+ || index->is_spatial()
+ || mtr->memo_contains(index->lock, MTR_MEMO_X_LOCK));
+#endif
if (UNIV_LIKELY(err == DB_SUCCESS)) {
err = btr_cur_optimistic_insert(flags,
@@ -2470,6 +2554,7 @@ btr_attach_half_pages(
/* Get the level of the split pages */
const ulint level = btr_page_get_level(block->page.frame);
ut_ad(level == btr_page_get_level(new_block->page.frame));
+ page_id_t id{block->page.id()};
/* Get the previous and next pages of page */
const uint32_t prev_page_no = btr_page_get_prev(block->page.frame);
@@ -2477,12 +2562,32 @@ btr_attach_half_pages(
/* for consistency, both blocks should be locked, before change */
if (prev_page_no != FIL_NULL && direction == FSP_DOWN) {
- prev_block = btr_block_get(*index, prev_page_no, RW_X_LATCH,
- !level, mtr);
+ id.set_page_no(prev_page_no);
+ prev_block = mtr->get_already_latched(id, MTR_MEMO_PAGE_X_FIX);
+#if 1 /* MDEV-29835 FIXME: acquire page latches upfront */
+ if (!prev_block) {
+# if 0 /* MDEV-29835 FIXME */
+ ut_ad(mtr->memo_contains(index->lock,
+ MTR_MEMO_X_LOCK));
+# endif
+ prev_block = btr_block_get(*index, prev_page_no,
+ RW_X_LATCH, !level, mtr);
+ }
+#endif
}
if (next_page_no != FIL_NULL && direction != FSP_DOWN) {
- next_block = btr_block_get(*index, next_page_no, RW_X_LATCH,
- !level, mtr);
+ id.set_page_no(next_page_no);
+ next_block = mtr->get_already_latched(id, MTR_MEMO_PAGE_X_FIX);
+#if 1 /* MDEV-29835 FIXME: acquire page latches upfront */
+ if (!next_block) {
+# if 0 /* MDEV-29835 FIXME */
+ ut_ad(mtr->memo_contains(index->lock,
+ MTR_MEMO_X_LOCK));
+# endif
+ next_block = btr_block_get(*index, next_page_no,
+ RW_X_LATCH, !level, mtr);
+ }
+#endif
}
/* Build the node pointer (= node key and page address) for the upper
@@ -3018,6 +3123,7 @@ insert_empty:
return nullptr;
}
+#ifdef UNIV_DEBUG
/* If the split is made on the leaf level and the insert will fit
on the appropriate half-page, we may release the tree x-latch.
We can then move the records after releasing the tree latch,
@@ -3025,21 +3131,21 @@ insert_empty:
const bool insert_will_fit = !new_page_zip
&& btr_page_insert_fits(cursor, split_rec, offsets, tuple,
n_ext, heap);
+#endif
if (!split_rec && !insert_left) {
UT_DELETE_ARRAY(buf);
buf = NULL;
}
- if (!srv_read_only_mode
- && insert_will_fit
+#if 0 // FIXME: this used to be a no-op, and may cause trouble if enabled
+ if (insert_will_fit
&& page_is_leaf(page)
&& !dict_index_is_online_ddl(cursor->index())) {
-#if 0 // FIXME: this used to be a no-op, and may cause trouble if enabled
mtr->release(cursor->index()->lock);
-#endif
/* NOTE: We cannot release root block latch here, because it
has segment header and already modified in most of cases.*/
}
+#endif
/* 5. Move then the records to the new page */
if (direction == FSP_DOWN) {
@@ -3271,52 +3377,58 @@ func_exit:
dberr_t btr_level_list_remove(const buf_block_t& block,
const dict_index_t& index, mtr_t* mtr)
{
- ut_ad(mtr->memo_contains_flagged(&block, MTR_MEMO_PAGE_X_FIX));
- ut_ad(block.zip_size() == index.table->space->zip_size());
- ut_ad(index.table->space->id == block.page.id().space());
- /* Get the previous and next page numbers of page */
-
- const page_t* page = block.page.frame;
- const uint32_t prev_page_no = btr_page_get_prev(page);
- const uint32_t next_page_no = btr_page_get_next(page);
-
- /* Update page links of the level */
- dberr_t err;
+ ut_ad(mtr->memo_contains_flagged(&block, MTR_MEMO_PAGE_X_FIX));
+ ut_ad(block.zip_size() == index.table->space->zip_size());
+ ut_ad(index.table->space->id == block.page.id().space());
+ /* Get the previous and next page numbers of page */
+ const uint32_t prev_page_no= btr_page_get_prev(block.page.frame);
+ const uint32_t next_page_no= btr_page_get_next(block.page.frame);
+ page_id_t id{block.page.id()};
+ buf_block_t *prev= nullptr, *next;
+ dberr_t err;
- if (prev_page_no != FIL_NULL) {
- buf_block_t* prev_block = btr_block_get(
- index, prev_page_no, RW_X_LATCH, page_is_leaf(page),
- mtr, &err);
- if (UNIV_UNLIKELY(!prev_block)) {
- return err;
- }
- if (UNIV_UNLIKELY(memcmp_aligned<4>(prev_block->page.frame
- + FIL_PAGE_NEXT,
- page + FIL_PAGE_OFFSET,
- 4))) {
- return DB_CORRUPTION;
- }
- btr_page_set_next(prev_block, next_page_no, mtr);
- }
+ /* Update page links of the level */
+ if (prev_page_no != FIL_NULL)
+ {
+ id.set_page_no(prev_page_no);
+ prev= mtr->get_already_latched(id, MTR_MEMO_PAGE_X_FIX);
+#if 1 /* MDEV-29835 FIXME: acquire page latches upfront */
+ if (!prev)
+ {
+# if 0 /* MDEV-29835 FIXME */
+ ut_ad(mtr->memo_contains(index.lock, MTR_MEMO_X_LOCK));
+# endif
+ prev= btr_block_get(index, id.page_no(), RW_X_LATCH,
+ page_is_leaf(block.page.frame), mtr, &err);
+ if (UNIV_UNLIKELY(!prev))
+ return err;
+ }
+#endif
+ }
- if (next_page_no != FIL_NULL) {
- buf_block_t* next_block = btr_block_get(
- index, next_page_no, RW_X_LATCH, page_is_leaf(page),
- mtr, &err);
+ if (next_page_no != FIL_NULL)
+ {
+ id.set_page_no(next_page_no);
+ next= mtr->get_already_latched(id, MTR_MEMO_PAGE_X_FIX);
+#if 1 /* MDEV-29835 FIXME: acquire page latches upfront */
+ if (!next)
+ {
+# if 0 /* MDEV-29835 FIXME */
+ ut_ad(mtr->memo_contains(index.lock, MTR_MEMO_X_LOCK));
+# endif
+ next= btr_block_get(index, id.page_no(), RW_X_LATCH,
+ page_is_leaf(block.page.frame), mtr, &err);
+ if (UNIV_UNLIKELY(!next))
+ return err;
+ }
+#endif
+ btr_page_set_prev(next, prev_page_no, mtr);
+ }
- if (UNIV_UNLIKELY(!next_block)) {
- return err;
- }
- if (UNIV_UNLIKELY(memcmp_aligned<4>(next_block->page.frame
- + FIL_PAGE_PREV,
- page + FIL_PAGE_OFFSET,
- 4))) {
- return DB_CORRUPTION;
- }
- btr_page_set_prev(next_block, prev_page_no, mtr);
- }
+ if (prev)
+ btr_page_set_next(prev, next_page_no, mtr);
- return DB_SUCCESS;
+ return DB_SUCCESS;
}
/*************************************************************//**
@@ -4166,23 +4278,30 @@ btr_discard_page(
const uint32_t left_page_no = btr_page_get_prev(block->page.frame);
const uint32_t right_page_no = btr_page_get_next(block->page.frame);
+ page_id_t merge_page_id{block->page.id()};
ut_d(bool parent_is_different = false);
+ dberr_t err;
if (left_page_no != FIL_NULL) {
- dberr_t err;
- merge_block = btr_block_get(*index, left_page_no, RW_X_LATCH,
- true, mtr, &err);
+ merge_page_id.set_page_no(left_page_no);
+ merge_block = btr_block_reget(mtr, *index, merge_page_id,
+ RW_X_LATCH, &err);
if (UNIV_UNLIKELY(!merge_block)) {
return err;
}
-
+#if 0 /* MDEV-29385 FIXME: Acquire the page latch upfront. */
+ ut_ad(!memcmp_aligned<4>(merge_block->page.frame
+ + FIL_PAGE_NEXT,
+ block->page.frame + FIL_PAGE_OFFSET,
+ 4));
+#else
if (UNIV_UNLIKELY(memcmp_aligned<4>(merge_block->page.frame
+ FIL_PAGE_NEXT,
block->page.frame
+ FIL_PAGE_OFFSET, 4))) {
return DB_CORRUPTION;
}
-
+#endif
ut_d(parent_is_different =
(page_rec_get_next(
page_get_infimum_rec(
@@ -4190,19 +4309,25 @@ btr_discard_page(
&parent_cursor)))
== btr_cur_get_rec(&parent_cursor)));
} else if (right_page_no != FIL_NULL) {
- dberr_t err;
- merge_block = btr_block_get(*index, right_page_no, RW_X_LATCH,
- true, mtr, &err);
+ merge_page_id.set_page_no(right_page_no);
+ merge_block = btr_block_reget(mtr, *index, merge_page_id,
+ RW_X_LATCH, &err);
if (UNIV_UNLIKELY(!merge_block)) {
return err;
}
+#if 0 /* MDEV-29385 FIXME: Acquire the page latch upfront. */
+ ut_ad(!memcmp_aligned<4>(merge_block->page.frame
+ + FIL_PAGE_PREV,
+ block->page.frame + FIL_PAGE_OFFSET,
+ 4));
+#else
if (UNIV_UNLIKELY(memcmp_aligned<4>(merge_block->page.frame
+ FIL_PAGE_PREV,
block->page.frame
+ FIL_PAGE_OFFSET, 4))) {
return DB_CORRUPTION;
}
-
+#endif
ut_d(parent_is_different = page_rec_is_supremum(
page_rec_get_next(btr_cur_get_rec(&parent_cursor))));
if (page_is_leaf(merge_block->page.frame)) {
@@ -4244,13 +4369,10 @@ btr_discard_page(
}
#ifdef UNIV_ZIP_DEBUG
- {
- page_zip_des_t* merge_page_zip
- = buf_block_get_page_zip(merge_block);
- ut_a(!merge_page_zip
- || page_zip_validate(merge_page_zip,
- merge_block->page.frame, index));
- }
+ if (page_zip_des_t* merge_page_zip
+ = buf_block_get_page_zip(merge_block));
+ ut_a(page_zip_validate(merge_page_zip,
+ merge_block->page.frame, index));
#endif /* UNIV_ZIP_DEBUG */
if (index->has_locking()) {
@@ -4269,7 +4391,7 @@ btr_discard_page(
}
/* Free the file page */
- dberr_t err = btr_page_free(index, block, mtr);
+ err = btr_page_free(index, block, mtr);
if (err == DB_SUCCESS) {
/* btr_check_node_ptr() needs parent block latched.
@@ -4462,6 +4584,8 @@ btr_check_node_ptr(
offsets = btr_page_get_father_block(NULL, heap, mtr, &cursor);
}
+ ut_ad(offsets);
+
if (page_is_leaf(page)) {
goto func_exit;
@@ -4793,19 +4917,16 @@ btr_validate_level(
page_zip_des_t* page_zip;
#endif /* UNIV_ZIP_DEBUG */
ulint savepoint = 0;
- ulint savepoint2 = 0;
uint32_t parent_page_no = FIL_NULL;
uint32_t parent_right_page_no = FIL_NULL;
bool rightmost_child = false;
mtr.start();
- if (!srv_read_only_mode) {
- if (lockout) {
- mtr_x_lock_index(index, &mtr);
- } else {
- mtr_sx_lock_index(index, &mtr);
- }
+ if (lockout) {
+ mtr_x_lock_index(index, &mtr);
+ } else {
+ mtr_sx_lock_index(index, &mtr);
}
dberr_t err;
@@ -4853,7 +4974,6 @@ corrupted:
offsets = rec_get_offsets(node_ptr, index, offsets, 0,
ULINT_UNDEFINED, &heap);
- savepoint2 = mtr_set_savepoint(&mtr);
block = btr_node_ptr_get_child(node_ptr, index, offsets, &mtr,
&err);
if (!block) {
@@ -4874,10 +4994,8 @@ corrupted:
/* To obey latch order of tree blocks,
we should release the right_block once to
obtain lock of the uncle block. */
- mtr_release_block_at_savepoint(
- &mtr, savepoint2, block);
+ mtr.release_last_page();
- savepoint2 = mtr_set_savepoint(&mtr);
block = btr_block_get(*index, left_page_no,
RW_SX_LATCH, false,
&mtr, &err);
@@ -4905,12 +5023,10 @@ func_exit:
mem_heap_empty(heap);
offsets = offsets2 = NULL;
- if (!srv_read_only_mode) {
- if (lockout) {
- mtr_x_lock_index(index, &mtr);
- } else {
- mtr_sx_lock_index(index, &mtr);
- }
+ if (lockout) {
+ mtr_x_lock_index(index, &mtr);
+ } else {
+ mtr_sx_lock_index(index, &mtr);
}
page = block->page.frame;
@@ -4955,7 +5071,7 @@ func_exit:
if (right_page_no != FIL_NULL) {
const rec_t* right_rec;
- savepoint = mtr_set_savepoint(&mtr);
+ savepoint = mtr.get_savepoint();
right_block = btr_block_get(*index, right_page_no, RW_SX_LATCH,
!level, &mtr, &err);
@@ -5150,8 +5266,10 @@ broken_links:
/* To obey latch order of tree blocks,
we should release the right_block once to
obtain lock of the uncle block. */
- mtr_release_block_at_savepoint(
- &mtr, savepoint, right_block);
+ ut_ad(right_block
+ == mtr.at_savepoint(savepoint));
+ mtr.rollback_to_savepoint(savepoint,
+ savepoint + 1);
if (parent_right_page_no != FIL_NULL) {
btr_block_get(*index,
diff --git a/storage/innobase/btr/btr0cur.cc b/storage/innobase/btr/btr0cur.cc
index ac06d9b1568..b3bfb74bb8b 100644
--- a/storage/innobase/btr/btr0cur.cc
+++ b/storage/innobase/btr/btr0cur.cc
@@ -3,7 +3,7 @@
Copyright (c) 1994, 2019, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2008, Google Inc.
Copyright (c) 2012, Facebook Inc.
-Copyright (c) 2015, 2022, MariaDB Corporation.
+Copyright (c) 2015, 2023, MariaDB Corporation.
Portions of this file contain modifications contributed and copyrighted by
Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -103,14 +103,14 @@ throughput clearly from about 100000. */
#define BTR_CUR_FINE_HISTORY_LENGTH 100000
#ifdef BTR_CUR_HASH_ADAPT
-/** Number of searches down the B-tree in btr_cur_search_to_nth_level(). */
+/** Number of searches down the B-tree in btr_cur_t::search_leaf(). */
ib_counter_t<ulint, ib_counter_element_t> btr_cur_n_non_sea;
/** Old value of btr_cur_n_non_sea. Copied by
srv_refresh_innodb_monitor_stats(). Referenced by
srv_printf_innodb_monitor(). */
ulint btr_cur_n_non_sea_old;
/** Number of successful adaptive hash index lookups in
-btr_cur_search_to_nth_level(). */
+btr_cur_t::search_leaf(). */
ib_counter_t<ulint, ib_counter_element_t> btr_cur_n_sea;
/** Old value of btr_cur_n_sea. Copied by
srv_refresh_innodb_monitor_stats(). Referenced by
@@ -188,164 +188,106 @@ btr_rec_free_externally_stored_fields(
/*==================== B-TREE SEARCH =========================*/
/** Latches the leaf page or pages requested.
-@param[in] block leaf page where the search converged
+@param[in] block_savepoint leaf page where the search converged
@param[in] latch_mode BTR_SEARCH_LEAF, ...
@param[in] cursor cursor
-@param[in] mtr mini-transaction
-@param[out] latch_leaves latched blocks and savepoints */
+@param[in] mtr mini-transaction */
void
btr_cur_latch_leaves(
- buf_block_t* block,
+ ulint block_savepoint,
btr_latch_mode latch_mode,
btr_cur_t* cursor,
- mtr_t* mtr,
- btr_latch_leaves_t* latch_leaves)
+ mtr_t* mtr)
{
compile_time_assert(int(MTR_MEMO_PAGE_S_FIX) == int(RW_S_LATCH));
compile_time_assert(int(MTR_MEMO_PAGE_X_FIX) == int(RW_X_LATCH));
compile_time_assert(int(MTR_MEMO_PAGE_SX_FIX) == int(RW_SX_LATCH));
+
+ buf_block_t* block = mtr->at_savepoint(block_savepoint);
+
ut_ad(block->page.id().space() == cursor->index()->table->space->id);
ut_ad(block->page.in_file());
- ut_ad(srv_read_only_mode
- || mtr->memo_contains_flagged(&cursor->index()->lock,
- MTR_MEMO_S_LOCK
- | MTR_MEMO_X_LOCK
- | MTR_MEMO_SX_LOCK));
- auto rtr_info = cursor->rtr_info;
- if (UNIV_LIKELY_NULL(rtr_info) && !cursor->index()->is_spatial()) {
- rtr_info = nullptr;
- }
-
+ ut_ad(mtr->memo_contains_flagged(&cursor->index()->lock,
+ MTR_MEMO_S_LOCK
+ | MTR_MEMO_X_LOCK
+ | MTR_MEMO_SX_LOCK));
const rw_lock_type_t mode = rw_lock_type_t(
latch_mode & (RW_X_LATCH | RW_S_LATCH));
static_assert(ulint{RW_S_LATCH} == ulint{BTR_SEARCH_LEAF}, "");
static_assert(ulint{RW_X_LATCH} == ulint{BTR_MODIFY_LEAF}, "");
- static_assert(BTR_SEARCH_LEAF & BTR_SEARCH_TREE, "");
switch (latch_mode) {
- default:
- break;
uint32_t left_page_no;
uint32_t right_page_no;
- ulint save;
+ default:
+ ut_ad(latch_mode == BTR_CONT_MODIFY_TREE);
+ ut_ad(cursor->index()->is_spatial());
+ break;
case BTR_SEARCH_LEAF:
- case BTR_MODIFY_LEAF:
- case BTR_SEARCH_TREE:
- if (UNIV_LIKELY_NULL(rtr_info)) {
- rtr_info->tree_savepoints[RTR_MAX_LEVELS]
- = mtr->get_savepoint();
- }
-latch_block:
- if (latch_leaves) {
- latch_leaves->savepoints[1] = mtr->get_savepoint();
- latch_leaves->blocks[1] = block;
- }
- block->page.fix();
- mtr->page_lock(block, mode);
- if (UNIV_LIKELY_NULL(rtr_info)) {
- rtr_info->tree_blocks[RTR_MAX_LEVELS] = block;
- }
- return;
+ s_latch_block:
+ block->page.lock.s_lock();
+#ifdef BTR_CUR_HASH_ADAPT
+ btr_search_drop_page_hash_index(block, true);
+#endif
+ mtr->lock_register(block_savepoint, MTR_MEMO_PAGE_S_FIX);
+ break;
case BTR_MODIFY_TREE:
/* It is exclusive for other operations which calls
btr_page_set_prev() */
ut_ad(mtr->memo_contains_flagged(&cursor->index()->lock,
MTR_MEMO_X_LOCK
| MTR_MEMO_SX_LOCK));
- save = mtr->get_savepoint();
/* x-latch also siblings from left to right */
left_page_no = btr_page_get_prev(block->page.frame);
if (left_page_no != FIL_NULL) {
- buf_block_t *b = btr_block_get(
- *cursor->index(), left_page_no, RW_X_LATCH,
- true, mtr);
-
- if (latch_leaves) {
- latch_leaves->savepoints[0] = save;
- latch_leaves->blocks[0] = b;
- }
-
- if (UNIV_LIKELY_NULL(rtr_info)) {
- rtr_info->tree_savepoints[RTR_MAX_LEVELS]
- = save;
- rtr_info->tree_blocks[RTR_MAX_LEVELS] = b;
- }
-
- save = mtr->get_savepoint();
+ btr_block_get(*cursor->index(), left_page_no, RW_X_LATCH,
+ true, mtr);
}
- if (latch_leaves) {
- latch_leaves->savepoints[1] = mtr->get_savepoint();
- latch_leaves->blocks[1] = block;
- }
-
- block->page.fix();
- block->page.lock.x_lock();
-
- mtr->memo_push(block, MTR_MEMO_PAGE_X_FIX);
+ mtr->x_latch_at_savepoint(block_savepoint, block);
#ifdef BTR_CUR_HASH_ADAPT
- ut_ad(!btr_search_check_marked_free_index(block));
+ btr_search_drop_page_hash_index(block, true);
#endif
- if (UNIV_LIKELY_NULL(rtr_info)) {
- rtr_info->tree_savepoints[RTR_MAX_LEVELS + 1] = save;
- rtr_info->tree_blocks[RTR_MAX_LEVELS + 1] = block;
- }
-
right_page_no = btr_page_get_next(block->page.frame);
if (right_page_no != FIL_NULL) {
- save = mtr->get_savepoint();
-
- buf_block_t* b = btr_block_get(
- *cursor->index(), right_page_no, RW_X_LATCH,
- true, mtr);
- if (latch_leaves) {
- latch_leaves->savepoints[2] = save;
- latch_leaves->blocks[2] = b;
- }
-
- if (UNIV_LIKELY_NULL(rtr_info)) {
- rtr_info->tree_savepoints[RTR_MAX_LEVELS + 2]
- = save;
- rtr_info->tree_blocks[RTR_MAX_LEVELS + 2] = b;
- }
+ btr_block_get(*cursor->index(), right_page_no,
+ RW_X_LATCH, true, mtr);
}
-
- return;
+ break;
case BTR_SEARCH_PREV:
case BTR_MODIFY_PREV:
- ut_ad(!rtr_info);
- static_assert(BTR_SEARCH_PREV & BTR_SEARCH_LEAF, "");
static_assert(BTR_MODIFY_PREV & BTR_MODIFY_LEAF, "");
- static_assert((BTR_SEARCH_PREV ^ BTR_MODIFY_PREV)
- == (RW_S_LATCH ^ RW_X_LATCH), "");
-
+ static_assert(BTR_SEARCH_PREV & BTR_SEARCH_LEAF, "");
+ ut_ad(cursor->index()->is_ibuf()
+ || mtr->memo_contains_flagged(&cursor->index()->lock,
+ MTR_MEMO_X_LOCK
+ | MTR_MEMO_SX_LOCK));
/* Because we are holding index->lock, no page splits
or merges may run concurrently, and we may read
FIL_PAGE_PREV from a buffer-fixed, unlatched page. */
left_page_no = btr_page_get_prev(block->page.frame);
if (left_page_no != FIL_NULL) {
- save = mtr->get_savepoint();
cursor->left_block = btr_block_get(
*cursor->index(), left_page_no,
mode, true, mtr);
- if (latch_leaves) {
- latch_leaves->savepoints[0] = save;
- latch_leaves->blocks[0] = cursor->left_block;
- }
}
- goto latch_block;
- case BTR_CONT_MODIFY_TREE:
- ut_ad(cursor->index()->is_spatial());
- return;
- }
+ if (latch_mode == BTR_SEARCH_PREV) {
+ goto s_latch_block;
+ }
- MY_ASSERT_UNREACHABLE();
+ /* fall through */
+ case BTR_MODIFY_LEAF:
+ mtr->x_latch_at_savepoint(block_savepoint, block);
+#ifdef BTR_CUR_HASH_ADAPT
+ btr_search_drop_page_hash_index(block, true);
+#endif
+ }
}
/** Load the instant ALTER TABLE metadata from the clustered index
@@ -729,98 +671,6 @@ bool btr_cur_instant_root_init(dict_index_t* index, const page_t* page)
return index->n_core_null_bytes > 128;
}
-/** Optimistically latches the leaf page or pages requested.
-@param[in] block guessed buffer block
-@param[in] modify_clock modify clock value
-@param[in,out] latch_mode BTR_SEARCH_LEAF, ...
-@param[in,out] cursor cursor
-@param[in] mtr mini-transaction
-@return true if success */
-TRANSACTIONAL_TARGET
-bool
-btr_cur_optimistic_latch_leaves(
- buf_block_t* block,
- ib_uint64_t modify_clock,
- btr_latch_mode* latch_mode,
- btr_cur_t* cursor,
- mtr_t* mtr)
-{
- ut_ad(block->page.buf_fix_count());
- ut_ad(block->page.in_file());
- ut_ad(block->page.frame);
-
- switch (*latch_mode) {
- default:
- MY_ASSERT_UNREACHABLE();
- return(false);
- case BTR_SEARCH_LEAF:
- case BTR_MODIFY_LEAF:
- return(buf_page_optimistic_get(*latch_mode, block,
- modify_clock, mtr));
- case BTR_SEARCH_PREV: /* btr_pcur_move_backward_from_page() */
- case BTR_MODIFY_PREV: /* Ditto, or ibuf_insert() */
- uint32_t curr_page_no, left_page_no;
- {
- transactional_shared_lock_guard<block_lock> g{
- block->page.lock};
- if (block->modify_clock != modify_clock) {
- return false;
- }
- curr_page_no = block->page.id().page_no();
- left_page_no = btr_page_get_prev(block->page.frame);
- }
-
- static_assert(BTR_SEARCH_PREV & BTR_SEARCH_LEAF, "");
- static_assert(BTR_MODIFY_PREV & BTR_MODIFY_LEAF, "");
- static_assert((BTR_SEARCH_PREV ^ BTR_MODIFY_PREV)
- == (RW_S_LATCH ^ RW_X_LATCH), "");
-
- const rw_lock_type_t mode = rw_lock_type_t(
- *latch_mode & (RW_X_LATCH | RW_S_LATCH));
-
- if (left_page_no != FIL_NULL) {
- cursor->left_block = buf_page_get_gen(
- page_id_t(cursor->index()->table->space_id,
- left_page_no),
- cursor->index()->table->space->zip_size(),
- mode, nullptr, BUF_GET_POSSIBLY_FREED, mtr);
-
- if (cursor->left_block
- && btr_page_get_next(
- cursor->left_block->page.frame)
- != curr_page_no) {
-release_left_block:
- mtr->release_last_page();
- return false;
- }
- } else {
- cursor->left_block = nullptr;
- }
-
- if (buf_page_optimistic_get(mode, block, modify_clock, mtr)) {
- if (btr_page_get_prev(block->page.frame)
- == left_page_no) {
- /* block was already buffer-fixed while
- entering the function and
- buf_page_optimistic_get() buffer-fixes
- it again. */
- ut_ad(2 <= block->page.buf_fix_count());
- *latch_mode = btr_latch_mode(mode);
- return(true);
- }
-
- mtr->release_last_page();
- }
-
- ut_ad(block->page.buf_fix_count());
- if (cursor->left_block) {
- goto release_left_block;
- }
- }
-
- return false;
-}
-
/**
Gets intention in btr_intention_t from latch_mode, and cleares the intention
at the latch_mode.
@@ -848,38 +698,6 @@ btr_intention_t btr_cur_get_and_clear_intention(btr_latch_mode *latch_mode)
return(intention);
}
-/**
-Gets the desired latch type for the root leaf (root page is root leaf)
-at the latch mode.
-@param latch_mode in: BTR_SEARCH_LEAF, ...
-@return latch type */
-static
-rw_lock_type_t
-btr_cur_latch_for_root_leaf(
- ulint latch_mode)
-{
- switch (latch_mode) {
- case BTR_SEARCH_LEAF:
- case BTR_SEARCH_TREE:
- case BTR_SEARCH_PREV:
- return(RW_S_LATCH);
- case BTR_MODIFY_LEAF:
- case BTR_MODIFY_TREE:
- case BTR_MODIFY_PREV:
- return(RW_X_LATCH);
- case BTR_CONT_MODIFY_TREE:
- case BTR_CONT_SEARCH_TREE:
- /* A root page should be latched already,
- and don't need to be latched here.
- fall through (RW_NO_LATCH) */
- case BTR_NO_LATCHES:
- return(RW_NO_LATCH);
- }
-
- MY_ASSERT_UNREACHABLE();
- return(RW_NO_LATCH); /* avoid compiler warnings */
-}
-
/** @return whether the distance between two records is at most the
specified value */
static bool
@@ -1197,1223 +1015,841 @@ static ulint btr_node_ptr_max_size(const dict_index_t* index)
return rec_max_size;
}
-/********************************************************************//**
-Searches an index tree and positions a tree cursor on a given level.
-NOTE: n_fields_cmp in tuple must be set so that it cannot be compared
-to node pointer page number fields on the upper levels of the tree!
-Note that if mode is PAGE_CUR_LE, which is used in inserts, then
-cursor->up_match and cursor->low_match both will have sensible values.
-If mode is PAGE_CUR_GE, then up_match will a have a sensible value.
-
-If mode is PAGE_CUR_LE , cursor is left at the place where an insert of the
-search tuple should be performed in the B-tree. InnoDB does an insert
-immediately after the cursor. Thus, the cursor may end up on a user record,
-or on a page infimum record.
-@param level the tree level of search
-@param tuple data tuple; NOTE: n_fields_cmp in tuple must be set so that
- it cannot get compared to the node ptr page number field!
-@param mode PAGE_CUR_L, ...; NOTE that if the search is made using a
- unique prefix of a record, mode should be PAGE_CUR_LE, not
- PAGE_CUR_GE, as the latter may end up on the previous page of
- the record! Inserts should always be made using PAGE_CUR_LE
- to search the position!
-@param latch_mode BTR_SEARCH_LEAF, ..., ORed with at most one of BTR_INSERT,
- BTR_DELETE_MARK, or BTR_DELETE;
- cursor->left_block is used to store a pointer to the left
- neighbor page
-@param cursor tree cursor; the cursor page is s- or x-latched, but see also
- above!
-@param mtr mini-transaction
-@param autoinc PAGE_ROOT_AUTO_INC to be written (0 if none)
-@return DB_SUCCESS on success or error code otherwise */
-TRANSACTIONAL_TARGET
-dberr_t btr_cur_search_to_nth_level(ulint level,
- const dtuple_t *tuple,
- page_cur_mode_t mode,
- btr_latch_mode latch_mode,
- btr_cur_t *cursor, mtr_t *mtr,
- ib_uint64_t autoinc)
+/** @return a B-tree search mode suitable for non-leaf pages
+@param mode leaf page search mode */
+static inline page_cur_mode_t btr_cur_nonleaf_mode(page_cur_mode_t mode)
{
- page_t* page = NULL; /* remove warning */
- buf_block_t* block;
- buf_block_t* guess;
- ulint height;
- ulint up_match;
- ulint up_bytes;
- ulint low_match;
- ulint low_bytes;
- ulint rw_latch;
- page_cur_mode_t page_mode;
- page_cur_mode_t search_mode = PAGE_CUR_UNSUPP;
- ulint buf_mode;
- ulint node_ptr_max_size = srv_page_size / 2;
- page_cur_t* page_cursor;
- btr_op_t btr_op;
- ulint root_height = 0; /* remove warning */
-
- btr_intention_t lock_intention;
- buf_block_t* tree_blocks[BTR_MAX_LEVELS];
- ulint tree_savepoints[BTR_MAX_LEVELS];
- ulint n_blocks = 0;
- ulint n_releases = 0;
- bool detected_same_key_root = false;
-
- ulint leftmost_from_level = 0;
- buf_block_t** prev_tree_blocks = NULL;
- ulint* prev_tree_savepoints = NULL;
- ulint prev_n_blocks = 0;
- ulint prev_n_releases = 0;
- bool need_path = true;
- bool rtree_parent_modified = false;
- bool mbr_adj = false;
- bool found = false;
- dict_index_t * const index = cursor->index();
-
- DBUG_ENTER("btr_cur_search_to_nth_level");
-
-#ifdef BTR_CUR_ADAPT
- btr_search_t* info;
-#endif /* BTR_CUR_ADAPT */
- mem_heap_t* heap = NULL;
- rec_offs offsets_[REC_OFFS_NORMAL_SIZE];
- rec_offs* offsets = offsets_;
- rec_offs offsets2_[REC_OFFS_NORMAL_SIZE];
- rec_offs* offsets2 = offsets2_;
- rec_offs_init(offsets_);
- rec_offs_init(offsets2_);
- /* Currently, PAGE_CUR_LE is the only search mode used for searches
- ending to upper levels */
-
- ut_ad(level == 0 || mode == PAGE_CUR_LE
- || RTREE_SEARCH_MODE(mode));
- ut_ad(dict_index_check_search_tuple(index, tuple));
- ut_ad(!dict_index_is_ibuf(index) || ibuf_inside(mtr));
- ut_ad(dtuple_check_typed(tuple));
- ut_ad(!(index->type & DICT_FTS));
- ut_ad(index->page != FIL_NULL);
-
- MEM_UNDEFINED(&cursor->up_match, sizeof cursor->up_match);
- MEM_UNDEFINED(&cursor->up_bytes, sizeof cursor->up_bytes);
- MEM_UNDEFINED(&cursor->low_match, sizeof cursor->low_match);
- MEM_UNDEFINED(&cursor->low_bytes, sizeof cursor->low_bytes);
-#ifdef UNIV_DEBUG
- cursor->up_match = ULINT_UNDEFINED;
- cursor->low_match = ULINT_UNDEFINED;
-#endif /* UNIV_DEBUG */
-
- const bool latch_by_caller = latch_mode & BTR_ALREADY_S_LATCHED;
-
- ut_ad(!latch_by_caller
- || srv_read_only_mode
- || mtr->memo_contains_flagged(&index->lock, MTR_MEMO_S_LOCK
- | MTR_MEMO_SX_LOCK));
-
- /* These flags are mutually exclusive, they are lumped together
- with the latch mode for historical reasons. It's possible for
- none of the flags to be set. */
- switch (UNIV_EXPECT(latch_mode & BTR_DELETE, 0)) {
- default:
- btr_op = BTR_NO_OP;
- break;
- case BTR_INSERT:
- btr_op = (latch_mode & BTR_IGNORE_SEC_UNIQUE)
- ? BTR_INSERT_IGNORE_UNIQUE_OP
- : BTR_INSERT_OP;
- break;
- case BTR_DELETE:
- btr_op = BTR_DELETE_OP;
- ut_a(cursor->purge_node);
- break;
- case BTR_DELETE_MARK:
- btr_op = BTR_DELMARK_OP;
- break;
- }
+ if (mode > PAGE_CUR_GE)
+ {
+ ut_ad(mode == PAGE_CUR_L || mode == PAGE_CUR_LE);
+ return mode;
+ }
+ if (mode == PAGE_CUR_GE)
+ return PAGE_CUR_L;
+ ut_ad(mode == PAGE_CUR_G);
+ return PAGE_CUR_LE;
+}
- /* Operations on the insert buffer tree cannot be buffered. */
- ut_ad(btr_op == BTR_NO_OP || !dict_index_is_ibuf(index));
- /* Operations on the clustered index cannot be buffered. */
- ut_ad(btr_op == BTR_NO_OP || !dict_index_is_clust(index));
- /* Operations on the temporary table(indexes) cannot be buffered. */
- ut_ad(btr_op == BTR_NO_OP || !index->table->is_temporary());
- /* Operation on the spatial index cannot be buffered. */
- ut_ad(btr_op == BTR_NO_OP || !dict_index_is_spatial(index));
+dberr_t btr_cur_t::search_leaf(const dtuple_t *tuple, page_cur_mode_t mode,
+ btr_latch_mode latch_mode, mtr_t *mtr)
+{
+ ut_ad(index()->is_btree() || index()->is_ibuf());
+ ut_ad(!index()->is_ibuf() || ibuf_inside(mtr));
- lock_intention = btr_cur_get_and_clear_intention(&latch_mode);
+ buf_block_t *guess;
+ btr_op_t btr_op;
+ btr_intention_t lock_intention;
+ bool detected_same_key_root= false;
- /* Turn the flags unrelated to the latch mode off. */
- latch_mode = BTR_LATCH_MODE_WITHOUT_FLAGS(latch_mode);
+ mem_heap_t* heap = NULL;
+ rec_offs offsets_[REC_OFFS_NORMAL_SIZE];
+ rec_offs* offsets = offsets_;
+ rec_offs offsets2_[REC_OFFS_NORMAL_SIZE];
+ rec_offs* offsets2 = offsets2_;
+ rec_offs_init(offsets_);
+ rec_offs_init(offsets2_);
+
+ ut_ad(dict_index_check_search_tuple(index(), tuple));
+ ut_ad(dtuple_check_typed(tuple));
+ ut_ad(index()->page != FIL_NULL);
+
+ MEM_UNDEFINED(&up_match, sizeof up_match);
+ MEM_UNDEFINED(&up_bytes, sizeof up_bytes);
+ MEM_UNDEFINED(&low_match, sizeof low_match);
+ MEM_UNDEFINED(&low_bytes, sizeof low_bytes);
+ ut_d(up_match= ULINT_UNDEFINED);
+ ut_d(low_match= ULINT_UNDEFINED);
+
+ ut_ad(!(latch_mode & BTR_ALREADY_S_LATCHED) ||
+ mtr->memo_contains_flagged(&index()->lock,
+ MTR_MEMO_S_LOCK | MTR_MEMO_SX_LOCK |
+ MTR_MEMO_X_LOCK));
+
+ /* These flags are mutually exclusive, they are lumped together
+ with the latch mode for historical reasons. It's possible for
+ none of the flags to be set. */
+ switch (UNIV_EXPECT(latch_mode & BTR_DELETE, 0)) {
+ default:
+ btr_op= BTR_NO_OP;
+ break;
+ case BTR_INSERT:
+ btr_op= (latch_mode & BTR_IGNORE_SEC_UNIQUE)
+ ? BTR_INSERT_IGNORE_UNIQUE_OP
+ : BTR_INSERT_OP;
+ break;
+ case BTR_DELETE:
+ btr_op= BTR_DELETE_OP;
+ ut_a(purge_node);
+ break;
+ case BTR_DELETE_MARK:
+ btr_op= BTR_DELMARK_OP;
+ break;
+ }
- ut_ad(!latch_by_caller
- || latch_mode == BTR_SEARCH_LEAF
- || latch_mode == BTR_SEARCH_TREE
- || latch_mode == BTR_MODIFY_LEAF);
+ /* Operations on the insert buffer tree cannot be buffered. */
+ ut_ad(btr_op == BTR_NO_OP || !index()->is_ibuf());
+ /* Operations on the clustered index cannot be buffered. */
+ ut_ad(btr_op == BTR_NO_OP || !index()->is_clust());
+ /* Operations on the temporary table(indexes) cannot be buffered. */
+ ut_ad(btr_op == BTR_NO_OP || !index()->table->is_temporary());
- ut_ad(autoinc == 0 || dict_index_is_clust(index));
- ut_ad(autoinc == 0
- || latch_mode == BTR_MODIFY_TREE
- || latch_mode == BTR_MODIFY_LEAF);
- ut_ad(autoinc == 0 || level == 0);
+ const bool latch_by_caller= latch_mode & BTR_ALREADY_S_LATCHED;
+ lock_intention= btr_cur_get_and_clear_intention(&latch_mode);
+ latch_mode= BTR_LATCH_MODE_WITHOUT_FLAGS(latch_mode);
- cursor->flag = BTR_CUR_BINARY;
+ ut_ad(!latch_by_caller
+ || latch_mode == BTR_SEARCH_LEAF
+ || latch_mode == BTR_MODIFY_LEAF
+ || latch_mode == BTR_MODIFY_TREE
+ || latch_mode == BTR_MODIFY_ROOT_AND_LEAF);
+ flag= BTR_CUR_BINARY;
#ifndef BTR_CUR_ADAPT
- guess = NULL;
+ guess= nullptr;
#else
- info = btr_search_get_info(index);
- guess = info->root_guess;
-
-#ifdef BTR_CUR_HASH_ADAPT
+ btr_search_t *info= btr_search_get_info(index());
+ guess= info->root_guess;
+
+# ifdef BTR_CUR_HASH_ADAPT
+# ifdef UNIV_SEARCH_PERF_STAT
+ info->n_searches++;
+# endif
+ /* We do a dirty read of btr_search_enabled below,
+ and btr_search_guess_on_hash() will have to check it again. */
+ if (!btr_search_enabled);
+ else if (btr_search_guess_on_hash(index(), info, tuple, mode,
+ latch_mode, this, mtr))
+ {
+ /* Search using the hash index succeeded */
+ ut_ad(up_match != ULINT_UNDEFINED || mode != PAGE_CUR_GE);
+ ut_ad(up_match != ULINT_UNDEFINED || mode != PAGE_CUR_LE);
+ ut_ad(low_match != ULINT_UNDEFINED || mode != PAGE_CUR_LE);
+ ++btr_cur_n_sea;
-# ifdef UNIV_SEARCH_PERF_STAT
- info->n_searches++;
+ return DB_SUCCESS;
+ }
+ else
+ ++btr_cur_n_non_sea;
# endif
- /* We do a dirty read of btr_search_enabled below,
- and btr_search_guess_on_hash() will have to check it again. */
- if (!btr_search_enabled) {
- } else if (autoinc == 0
- && latch_mode <= BTR_MODIFY_LEAF
-# ifdef PAGE_CUR_LE_OR_EXTENDS
- && mode != PAGE_CUR_LE_OR_EXTENDS
-# endif /* PAGE_CUR_LE_OR_EXTENDS */
- && info->last_hash_succ
- && !(tuple->info_bits & REC_INFO_MIN_REC_FLAG)
- && !index->is_spatial() && !index->table->is_temporary()
- && btr_search_guess_on_hash(index, info, tuple, mode,
- latch_mode, cursor, mtr)) {
-
- /* Search using the hash index succeeded */
-
- ut_ad(cursor->up_match != ULINT_UNDEFINED
- || mode != PAGE_CUR_GE);
- ut_ad(cursor->up_match != ULINT_UNDEFINED
- || mode != PAGE_CUR_LE);
- ut_ad(cursor->low_match != ULINT_UNDEFINED
- || mode != PAGE_CUR_LE);
- ++btr_cur_n_sea;
-
- DBUG_RETURN(DB_SUCCESS);
- } else {
- ++btr_cur_n_non_sea;
- }
-# endif /* BTR_CUR_HASH_ADAPT */
-#endif /* BTR_CUR_ADAPT */
-
- /* If the hash search did not succeed, do binary search down the
- tree */
-
- /* Store the position of the tree latch we push to mtr so that we
- know how to release it when we have latched leaf node(s) */
-
- ulint savepoint = mtr_set_savepoint(mtr);
-
- rw_lock_type_t upper_rw_latch;
-
- switch (latch_mode) {
- case BTR_MODIFY_TREE:
- /* Most of delete-intended operations are purging.
- Free blocks and read IO bandwidth should be prior
- for them, when the history list is glowing huge. */
- if (lock_intention == BTR_INTENTION_DELETE
- && buf_pool.n_pend_reads
- && trx_sys.history_size_approx()
- > BTR_CUR_FINE_HISTORY_LENGTH) {
-x_latch_index:
- mtr_x_lock_index(index, mtr);
- } else if (index->is_spatial()
- && lock_intention <= BTR_INTENTION_BOTH) {
- /* X lock the if there is possibility of
- pessimistic delete on spatial index. As we could
- lock upward for the tree */
- goto x_latch_index;
- } else {
- mtr_sx_lock_index(index, mtr);
- }
- upper_rw_latch = RW_X_LATCH;
- break;
- case BTR_CONT_MODIFY_TREE:
- ut_ad(srv_read_only_mode
- || mtr->memo_contains_flagged(&index->lock,
- MTR_MEMO_X_LOCK
- | MTR_MEMO_SX_LOCK));
- if (index->is_spatial()) {
- /* If we are about to locate parent page for split
- and/or merge operation for R-Tree index, X latch
- the parent */
- upper_rw_latch = RW_X_LATCH;
- break;
- }
- /* fall through */
- case BTR_CONT_SEARCH_TREE:
- /* Do nothing */
- ut_ad(srv_read_only_mode
- || mtr->memo_contains_flagged(&index->lock,
- MTR_MEMO_X_LOCK
- | MTR_MEMO_SX_LOCK));
- upper_rw_latch = RW_NO_LATCH;
- break;
- default:
- if (!srv_read_only_mode) {
- if (!latch_by_caller) {
- ut_ad(latch_mode != BTR_SEARCH_TREE);
- mtr_s_lock_index(index, mtr);
- }
- upper_rw_latch = RW_S_LATCH;
- } else {
- upper_rw_latch = RW_NO_LATCH;
- }
- }
- const rw_lock_type_t root_leaf_rw_latch = btr_cur_latch_for_root_leaf(
- latch_mode);
-
- page_cursor = btr_cur_get_page_cur(cursor);
- page_cursor->index = index;
-
- const ulint zip_size = index->table->space->zip_size();
-
- /* Start with the root page. */
- page_id_t page_id(index->table->space_id, index->page);
-
- if (root_leaf_rw_latch == RW_X_LATCH) {
- node_ptr_max_size = btr_node_ptr_max_size(index);
- }
-
- up_match = 0;
- up_bytes = 0;
- low_match = 0;
- low_bytes = 0;
-
- height = ULINT_UNDEFINED;
-
- /* We use these modified search modes on non-leaf levels of the
- B-tree. These let us end up in the right B-tree leaf. In that leaf
- we use the original search mode. */
-
- switch (mode) {
- case PAGE_CUR_GE:
- page_mode = PAGE_CUR_L;
- break;
- case PAGE_CUR_G:
- page_mode = PAGE_CUR_LE;
- break;
- default:
-#ifdef PAGE_CUR_LE_OR_EXTENDS
- ut_ad(mode == PAGE_CUR_L || mode == PAGE_CUR_LE
- || RTREE_SEARCH_MODE(mode)
- || mode == PAGE_CUR_LE_OR_EXTENDS);
-#else /* PAGE_CUR_LE_OR_EXTENDS */
- ut_ad(mode == PAGE_CUR_L || mode == PAGE_CUR_LE
- || RTREE_SEARCH_MODE(mode));
-#endif /* PAGE_CUR_LE_OR_EXTENDS */
- page_mode = mode;
- break;
- }
-
- /* Loop and search until we arrive at the desired level */
- btr_latch_leaves_t latch_leaves = {{NULL, NULL, NULL}, {0, 0, 0}};
-
-search_loop:
- buf_mode = BUF_GET;
- rw_latch = RW_NO_LATCH;
- rtree_parent_modified = false;
-
- if (height != 0) {
- /* We are about to fetch the root or a non-leaf page. */
- if ((latch_mode != BTR_MODIFY_TREE || height == level)
- && !prev_tree_blocks) {
- /* If doesn't have SX or X latch of index,
- each pages should be latched before reading. */
- if (height == ULINT_UNDEFINED
- && upper_rw_latch == RW_S_LATCH
- && autoinc) {
- /* needs sx-latch of root page
- for writing PAGE_ROOT_AUTO_INC */
- rw_latch = RW_SX_LATCH;
- } else {
- rw_latch = upper_rw_latch;
- }
- }
- } else if (latch_mode <= BTR_MODIFY_LEAF) {
- rw_latch = latch_mode;
-
- if (btr_op != BTR_NO_OP
- && ibuf_should_try(index, btr_op != BTR_INSERT_OP)) {
-
- /* Try to buffer the operation if the leaf
- page is not in the buffer pool. */
-
- buf_mode = btr_op == BTR_DELETE_OP
- ? BUF_GET_IF_IN_POOL_OR_WATCH
- : BUF_GET_IF_IN_POOL;
- }
- }
-
-retry_page_get:
- ut_ad(n_blocks < BTR_MAX_LEVELS);
- tree_savepoints[n_blocks] = mtr_set_savepoint(mtr);
- dberr_t err;
- block = buf_page_get_gen(page_id, zip_size, rw_latch, guess,
- buf_mode, mtr, &err,
- height == 0 && !index->is_clust());
- if (!block) {
- switch (err) {
- case DB_SUCCESS:
- /* change buffering */
- break;
- case DB_DECRYPTION_FAILED:
- btr_decryption_failed(*index);
- /* fall through */
- default:
- goto func_exit;
- }
-
- /* This must be a search to perform an insert/delete
- mark/ delete; try using the insert/delete buffer */
-
- ut_ad(height == 0);
- ut_ad(cursor->thr);
-
- switch (btr_op) {
- default:
- MY_ASSERT_UNREACHABLE();
- break;
- case BTR_INSERT_OP:
- case BTR_INSERT_IGNORE_UNIQUE_OP:
- ut_ad(buf_mode == BUF_GET_IF_IN_POOL);
- ut_ad(!dict_index_is_spatial(index));
-
- if (ibuf_insert(IBUF_OP_INSERT, tuple, index,
- page_id, zip_size, cursor->thr)) {
-
- cursor->flag = BTR_CUR_INSERT_TO_IBUF;
-
- goto func_exit;
- }
- break;
-
- case BTR_DELMARK_OP:
- ut_ad(buf_mode == BUF_GET_IF_IN_POOL);
- ut_ad(!dict_index_is_spatial(index));
-
- if (ibuf_insert(IBUF_OP_DELETE_MARK, tuple,
- index, page_id, zip_size,
- cursor->thr)) {
-
- cursor->flag = BTR_CUR_DEL_MARK_IBUF;
-
- goto func_exit;
- }
-
- break;
-
- case BTR_DELETE_OP:
- ut_ad(buf_mode == BUF_GET_IF_IN_POOL_OR_WATCH);
- ut_ad(!dict_index_is_spatial(index));
- auto& chain = buf_pool.page_hash.cell_get(
- page_id.fold());
-
- if (!row_purge_poss_sec(cursor->purge_node,
- index, tuple)) {
+#endif
- /* The record cannot be purged yet. */
- cursor->flag = BTR_CUR_DELETE_REF;
- } else if (ibuf_insert(IBUF_OP_DELETE, tuple,
- index, page_id, zip_size,
- cursor->thr)) {
+ /* If the hash search did not succeed, do binary search down the
+ tree */
- /* The purge was buffered. */
- cursor->flag = BTR_CUR_DELETE_IBUF;
- } else {
- /* The purge could not be buffered. */
- buf_pool.watch_unset(page_id, chain);
- break;
- }
+ /* Store the position of the tree latch we push to mtr so that we
+ know how to release it when we have latched leaf node(s) */
- buf_pool.watch_unset(page_id, chain);
- goto func_exit;
- }
+ const ulint savepoint = mtr->get_savepoint();
- /* Insert to the insert/delete buffer did not succeed, we
- must read the page from disk. */
+ ulint node_ptr_max_size= 0;
+ rw_lock_type_t rw_latch= RW_S_LATCH;
- buf_mode = BUF_GET;
+ switch (latch_mode) {
+ case BTR_MODIFY_TREE:
+ rw_latch= RW_X_LATCH;
+ node_ptr_max_size= btr_node_ptr_max_size(index());
+ if (latch_by_caller)
+ {
+ ut_ad(mtr->memo_contains_flagged(&index()->lock, MTR_MEMO_X_LOCK));
+ break;
+ }
+ if (lock_intention == BTR_INTENTION_DELETE && buf_pool.n_pend_reads &&
+ trx_sys.history_size_approx() > BTR_CUR_FINE_HISTORY_LENGTH)
+ /* Most delete-intended operations are due to the purge of history.
+ Prioritize them when the history list is growing huge. */
+ mtr_x_lock_index(index(), mtr);
+ else
+ mtr_sx_lock_index(index(), mtr);
+ break;
+#ifdef UNIV_DEBUG
+ case BTR_CONT_MODIFY_TREE:
+ ut_ad("invalid mode" == 0);
+ break;
+#endif
+ case BTR_MODIFY_ROOT_AND_LEAF:
+ rw_latch= RW_SX_LATCH;
+ /* fall through */
+ default:
+ if (!latch_by_caller)
+ mtr_s_lock_index(index(), mtr);
+ }
- goto retry_page_get;
- }
+ const ulint zip_size= index()->table->space->zip_size();
- tree_blocks[n_blocks] = block;
+ /* Start with the root page. */
+ page_id_t page_id(index()->table->space_id, index()->page);
- if (height && prev_tree_blocks) {
- /* also latch left sibling */
- ut_ad(rw_latch == RW_NO_LATCH);
+ const page_cur_mode_t page_mode= btr_cur_nonleaf_mode(mode);
+ ulint height= ULINT_UNDEFINED;
+ up_match= 0;
+ up_bytes= 0;
+ low_match= 0;
+ low_bytes= 0;
+ ulint buf_mode= BUF_GET;
+ search_loop:
+ dberr_t err;
+ auto block_savepoint= mtr->get_savepoint();
+ buf_block_t *block=
+ buf_page_get_gen(page_id, zip_size, rw_latch, guess, buf_mode, mtr,
+ &err, height == 0 && !index()->is_clust());
+ if (!block)
+ {
+ switch (err) {
+ case DB_DECRYPTION_FAILED:
+ btr_decryption_failed(*index());
+ /* fall through */
+ default:
+ func_exit:
+ if (UNIV_LIKELY_NULL(heap))
+ mem_heap_free(heap);
+ return err;
+ case DB_SUCCESS:
+ /* This must be a search to perform an insert, delete mark, or delete;
+ try using the change buffer */
+ ut_ad(height == 0);
+ ut_ad(thr);
+ break;
+ }
- rw_latch = upper_rw_latch;
+ switch (btr_op) {
+ default:
+ MY_ASSERT_UNREACHABLE();
+ break;
+ case BTR_INSERT_OP:
+ case BTR_INSERT_IGNORE_UNIQUE_OP:
+ ut_ad(buf_mode == BUF_GET_IF_IN_POOL);
- /* Because we are holding index->lock, no page splits
- or merges may run concurrently, and we may read
- FIL_PAGE_PREV from a buffer-fixed, unlatched page. */
- uint32_t left_page_no = btr_page_get_prev(block->page.frame);
+ if (ibuf_insert(IBUF_OP_INSERT, tuple, index(), page_id, zip_size, thr))
+ {
+ flag= BTR_CUR_INSERT_TO_IBUF;
+ goto func_exit;
+ }
+ break;
- if (left_page_no != FIL_NULL) {
- ut_ad(prev_n_blocks < leftmost_from_level);
-
- prev_tree_savepoints[prev_n_blocks]
- = mtr_set_savepoint(mtr);
- buf_block_t* get_block = buf_page_get_gen(
- page_id_t(page_id.space(), left_page_no),
- zip_size, rw_latch, NULL, buf_mode,
- mtr, &err);
- if (!get_block) {
- if (err == DB_DECRYPTION_FAILED) {
- btr_decryption_failed(*index);
- }
- goto func_exit;
- }
+ case BTR_DELMARK_OP:
+ ut_ad(buf_mode == BUF_GET_IF_IN_POOL);
- prev_tree_blocks[prev_n_blocks++] = get_block;
- /* BTR_MODIFY_TREE doesn't update prev/next_page_no,
- without their parent page's lock. So, not needed to
- retry here, because we have the parent page's lock. */
- }
+ if (ibuf_insert(IBUF_OP_DELETE_MARK, tuple,
+ index(), page_id, zip_size, thr))
+ {
+ flag = BTR_CUR_DEL_MARK_IBUF;
+ goto func_exit;
+ }
- mtr->s_lock_register(tree_savepoints[n_blocks]);
- block->page.lock.s_lock();
- }
+ break;
- page = buf_block_get_frame(block);
+ case BTR_DELETE_OP:
+ ut_ad(buf_mode == BUF_GET_IF_IN_POOL_OR_WATCH);
+ auto& chain = buf_pool.page_hash.cell_get(page_id.fold());
+
+ if (!row_purge_poss_sec(purge_node, index(), tuple))
+ /* The record cannot be purged yet. */
+ flag= BTR_CUR_DELETE_REF;
+ else if (ibuf_insert(IBUF_OP_DELETE, tuple, index(),
+ page_id, zip_size, thr))
+ /* The purge was buffered. */
+ flag= BTR_CUR_DELETE_IBUF;
+ else
+ {
+ /* The purge could not be buffered. */
+ buf_pool.watch_unset(page_id, chain);
+ break;
+ }
- if (height == ULINT_UNDEFINED
- && page_is_leaf(page)
- && rw_latch != RW_NO_LATCH
- && rw_latch != root_leaf_rw_latch) {
- /* The root page is also a leaf page (root_leaf).
- We should reacquire the page, because the root page
- is latched differently from leaf pages. */
- ut_ad(root_leaf_rw_latch != RW_NO_LATCH);
- ut_ad(rw_latch == RW_S_LATCH || rw_latch == RW_SX_LATCH);
- ut_ad(rw_latch == RW_S_LATCH || autoinc);
- ut_ad(!autoinc || root_leaf_rw_latch == RW_X_LATCH);
+ buf_pool.watch_unset(page_id, chain);
+ goto func_exit;
+ }
- ut_ad(n_blocks == 0);
- mtr_release_block_at_savepoint(
- mtr, tree_savepoints[n_blocks],
- tree_blocks[n_blocks]);
+ /* Change buffering did not succeed, we must read the page. */
+ buf_mode= BUF_GET;
+ goto search_loop;
+ }
- upper_rw_latch = root_leaf_rw_latch;
- goto search_loop;
- }
+ if (!!page_is_comp(block->page.frame) != index()->table->not_redundant() ||
+ btr_page_get_index_id(block->page.frame) != index()->id ||
+ fil_page_get_type(block->page.frame) == FIL_PAGE_RTREE ||
+ !fil_page_index_page_check(block->page.frame))
+ {
+ corrupted:
+ ut_ad("corrupted" == 0); // FIXME: remove this
+ err= DB_CORRUPTION;
+ goto func_exit;
+ }
+ page_cur.block= block;
+ ut_ad(block == mtr->at_savepoint(block_savepoint));
+ const page_t *page= buf_block_get_frame(block);
#ifdef UNIV_ZIP_DEBUG
- if (rw_latch != RW_NO_LATCH) {
- const page_zip_des_t* page_zip
- = buf_block_get_page_zip(block);
- ut_a(!page_zip || page_zip_validate(page_zip, page, index));
- }
+ if (rw_latch != RW_NO_LATCH)
+ {
+ const page_zip_des_t *page_zip= buf_block_get_page_zip(block);
+ ut_a(!page_zip || page_zip_validate(page_zip, page, index()));
+ }
#endif /* UNIV_ZIP_DEBUG */
+ const uint32_t page_level= btr_page_get_level(page);
- ut_ad(fil_page_index_page_check(page));
- ut_ad(index->id == btr_page_get_index_id(page));
-
- if (height == ULINT_UNDEFINED) {
- /* We are in the root node */
-
- height = btr_page_get_level(page);
- root_height = height;
- cursor->tree_height = root_height + 1;
-
- if (dict_index_is_spatial(index)) {
- ut_ad(cursor->rtr_info);
-
- /* If SSN in memory is not initialized, fetch
- it from root page */
- if (!rtr_get_current_ssn_id(index)) {
- /* FIXME: do this in dict_load_table_one() */
- index->set_ssn(page_get_ssn_id(page) + 1);
- }
-
- /* Save the MBR */
- cursor->rtr_info->thr = cursor->thr;
- rtr_get_mbr_from_tuple(tuple, &cursor->rtr_info->mbr);
- }
-
+ if (height == ULINT_UNDEFINED)
+ {
+ /* We are in the B-tree index root page. */
#ifdef BTR_CUR_ADAPT
- info->root_guess = block;
+ info->root_guess= block;
#endif
- }
-
- if (height == 0) {
- if (rw_latch == RW_NO_LATCH) {
- btr_cur_latch_leaves(block, latch_mode, cursor, mtr,
- &latch_leaves);
- }
-
- switch (latch_mode) {
- case BTR_MODIFY_TREE:
- case BTR_CONT_MODIFY_TREE:
- case BTR_CONT_SEARCH_TREE:
- break;
- default:
- if (!latch_by_caller
- && !srv_read_only_mode) {
- /* Release the tree s-latch */
- mtr_release_s_latch_at_savepoint(
- mtr, savepoint,
- &index->lock);
- }
-
- /* release upper blocks */
- if (prev_tree_blocks) {
- ut_ad(!autoinc);
- for (;
- prev_n_releases < prev_n_blocks;
- prev_n_releases++) {
- mtr_release_block_at_savepoint(
- mtr,
- prev_tree_savepoints[
- prev_n_releases],
- prev_tree_blocks[
- prev_n_releases]);
- }
- }
-
- for (; n_releases < n_blocks; n_releases++) {
- if (n_releases == 0
- && (autoinc)) {
- /* keep the root page latch */
- ut_ad(mtr->memo_contains_flagged(
- tree_blocks[n_releases],
- MTR_MEMO_PAGE_SX_FIX
- | MTR_MEMO_PAGE_X_FIX));
- continue;
- }
+ height= page_level;
+ tree_height= height + 1;
- mtr_release_block_at_savepoint(
- mtr, tree_savepoints[n_releases],
- tree_blocks[n_releases]);
- }
- }
-
- page_mode = mode;
- }
-
- if (dict_index_is_spatial(index)) {
- /* Remember the page search mode */
- search_mode = page_mode;
-
- /* Some adjustment on search mode, when the
- page search mode is PAGE_CUR_RTREE_LOCATE
- or PAGE_CUR_RTREE_INSERT, as we are searching
- with MBRs. When it is not the target level, we
- should search all sub-trees that "CONTAIN" the
- search range/MBR. When it is at the target
- level, the search becomes PAGE_CUR_LE */
- if (page_mode == PAGE_CUR_RTREE_LOCATE
- && level == height) {
- if (level == 0) {
- page_mode = PAGE_CUR_LE;
- } else {
- page_mode = PAGE_CUR_RTREE_GET_FATHER;
- }
- }
-
- if (page_mode == PAGE_CUR_RTREE_INSERT) {
- page_mode = (level == height)
- ? PAGE_CUR_LE
- : PAGE_CUR_RTREE_INSERT;
-
- ut_ad(!page_is_leaf(page) || page_mode == PAGE_CUR_LE);
- }
-
- /* "need_path" indicates if we need to tracking the parent
- pages, if it is not spatial comparison, then no need to
- track it */
- if (page_mode < PAGE_CUR_CONTAIN) {
- need_path = false;
- }
-
- up_match = 0;
- low_match = 0;
-
- if (latch_mode == BTR_MODIFY_TREE
- || latch_mode == BTR_CONT_MODIFY_TREE
- || latch_mode == BTR_CONT_SEARCH_TREE) {
- /* Tree are locked, no need for Page Lock to protect
- the "path" */
- cursor->rtr_info->need_page_lock = false;
- }
+ if (!height)
+ {
+ /* The root page is also a leaf page.
+ We may have to reacquire the page latch in a different mode. */
+ switch (rw_latch) {
+ case RW_S_LATCH:
+ if ((latch_mode & ~12) != RW_S_LATCH)
+ {
+ rw_latch= rw_lock_type_t(latch_mode & ~12);
+ ut_ad(rw_latch == RW_X_LATCH || rw_latch == RW_SX_LATCH);
+ goto relatch;
}
+ if (latch_mode != BTR_MODIFY_PREV)
+ {
+ if (!latch_by_caller)
+ /* Release the tree s-latch */
+ mtr->rollback_to_savepoint(savepoint, savepoint + 1);
+ goto reached_latched_leaf;
+ }
+ /* fall through */
+ case RW_SX_LATCH:
+ ut_ad(rw_latch == RW_S_LATCH ||
+ latch_mode == BTR_MODIFY_ROOT_AND_LEAF);
+ rw_latch= RW_X_LATCH;
+ relatch:
+ mtr->rollback_to_savepoint(block_savepoint);
+ height= ULINT_UNDEFINED;
+ goto search_loop;
+ case RW_X_LATCH:
+ if (latch_mode == BTR_MODIFY_TREE)
+ goto reached_index_root_and_leaf;
+ goto reached_root_and_leaf;
+ case RW_NO_LATCH:
+ ut_ad(mtr->memo_contains_flagged(&index()->lock, MTR_MEMO_X_LOCK));
+ }
+ goto reached_leaf;
+ }
+ }
+ else if (UNIV_UNLIKELY(height != page_level))
+ goto corrupted;
+ else
+ switch (latch_mode) {
+ case BTR_MODIFY_TREE:
+ break;
+ case BTR_MODIFY_ROOT_AND_LEAF:
+ ut_ad((mtr->at_savepoint(block_savepoint - 1)->page.id().page_no() ==
+ index()->page) == (tree_height <= height + 2));
+ if (tree_height <= height + 2)
+ /* Retain the root page latch. */
+ break;
+ /* fall through */
+ default:
+ /* Release the parent page latch. */
+ ut_ad(block_savepoint > savepoint);
+ mtr->rollback_to_savepoint(block_savepoint - 1, block_savepoint);
+ block_savepoint--;
+ }
- page_cursor->block = block;
-
- if (dict_index_is_spatial(index) && page_mode >= PAGE_CUR_CONTAIN) {
- ut_ad(need_path);
- found = rtr_cur_search_with_match(
- block, index, tuple, page_mode, page_cursor,
- cursor->rtr_info);
+ if (!height)
+ {
+ reached_leaf:
+ /* We reached the leaf level. */
+ ut_ad(block == mtr->at_savepoint(block_savepoint));
- /* Need to use BTR_MODIFY_TREE to do the MBR adjustment */
- if (search_mode == PAGE_CUR_RTREE_INSERT
- && cursor->rtr_info->mbr_adj) {
- static_assert(BTR_MODIFY_TREE
- == (8 | BTR_MODIFY_LEAF), "");
+ if (latch_mode == BTR_MODIFY_ROOT_AND_LEAF)
+ {
+ reached_root_and_leaf:
+ if (!latch_by_caller)
+ mtr->rollback_to_savepoint(savepoint, savepoint + 1);
+ reached_index_root_and_leaf:
+ ut_ad(rw_latch == RW_X_LATCH);
+#ifdef BTR_CUR_HASH_ADAPT
+ btr_search_drop_page_hash_index(block, true);
+#endif
+ if (page_cur_search_with_match(tuple, mode, &up_match, &low_match,
+ &page_cur, nullptr))
+ goto corrupted;
+ ut_ad(up_match != ULINT_UNDEFINED || mode != PAGE_CUR_GE);
+ ut_ad(up_match != ULINT_UNDEFINED || mode != PAGE_CUR_LE);
+ ut_ad(low_match != ULINT_UNDEFINED || mode != PAGE_CUR_LE);
+ goto func_exit;
+ }
- if (!(latch_mode & 8)) {
- /* Parent MBR needs updated, should retry
- with BTR_MODIFY_TREE */
- goto func_exit;
- }
+ if (rw_latch == RW_NO_LATCH)
+ btr_cur_latch_leaves(block_savepoint, latch_mode, this, mtr);
- rtree_parent_modified = true;
- cursor->rtr_info->mbr_adj = false;
- mbr_adj = true;
- }
+ if (latch_mode != BTR_MODIFY_TREE)
+ {
+ if (!latch_by_caller)
+ {
+ /* Release the tree s-latch */
+ block_savepoint--;
+ mtr->rollback_to_savepoint(savepoint, savepoint + 1);
+ }
+ /* release upper blocks */
+ if (savepoint < block_savepoint)
+ mtr->rollback_to_savepoint(savepoint, block_savepoint);
+ }
+ else
+ ut_ad(rw_latch == RW_NO_LATCH);
- if (found && page_mode == PAGE_CUR_RTREE_GET_FATHER) {
- cursor->low_match =
- DICT_INDEX_SPATIAL_NODEPTR_SIZE + 1;
- }
+ reached_latched_leaf:
#ifdef BTR_CUR_HASH_ADAPT
- } else if (height == 0 && btr_search_enabled
- && !(tuple->info_bits & REC_INFO_MIN_REC_FLAG)
- && index->is_btree()) {
- /* The adaptive hash index is only used when searching
- for leaf pages (height==0), but not in r-trees.
- We only need the byte prefix comparison for the purpose
- of updating the adaptive hash index. */
- if (page_cur_search_with_match_bytes(
- tuple, page_mode, &up_match, &up_bytes,
- &low_match, &low_bytes, page_cursor)) {
- err = DB_CORRUPTION;
- goto func_exit;
- }
+ if (btr_search_enabled && !(tuple->info_bits & REC_INFO_MIN_REC_FLAG))
+ {
+ if (page_cur_search_with_match_bytes(tuple, mode,
+ &up_match, &up_bytes,
+ &low_match, &low_bytes, &page_cur))
+ goto corrupted;
+ }
+ else
#endif /* BTR_CUR_HASH_ADAPT */
- } else {
- /* Search for complete index fields. */
- up_bytes = low_bytes = 0;
- if (page_cur_search_with_match(
- tuple, page_mode, &up_match,
- &low_match, page_cursor,
- need_path ? cursor->rtr_info : nullptr)) {
- err = DB_CORRUPTION;
- goto func_exit;
- }
- }
-
- /* If this is the desired level, leave the loop */
-
- ut_ad(height == btr_page_get_level(page_cur_get_page(page_cursor)));
-
- /* Add Predicate lock if it is serializable isolation
- and only if it is in the search case */
- if (dict_index_is_spatial(index)
- && cursor->rtr_info->need_prdt_lock
- && mode != PAGE_CUR_RTREE_INSERT
- && mode != PAGE_CUR_RTREE_LOCATE
- && mode >= PAGE_CUR_CONTAIN) {
- lock_prdt_t prdt;
-
- {
- trx_t* trx = thr_get_trx(cursor->thr);
- TMLockTrxGuard g{TMLockTrxArgs(*trx)};
- lock_init_prdt_from_mbr(
- &prdt, &cursor->rtr_info->mbr, mode,
- trx->lock.lock_heap);
- }
-
- if (rw_latch == RW_NO_LATCH && height != 0) {
- block->page.lock.s_lock();
- }
-
- lock_prdt_lock(block, &prdt, index, LOCK_S,
- LOCK_PREDICATE, cursor->thr);
-
- if (rw_latch == RW_NO_LATCH && height != 0) {
- block->page.lock.s_unlock();
- }
- }
-
- if (level != height) {
-
- const rec_t* node_ptr;
- ut_ad(height > 0);
-
- height--;
- guess = NULL;
+ if (page_cur_search_with_match(tuple, mode, &up_match, &low_match,
+ &page_cur, nullptr))
+ goto corrupted;
- node_ptr = page_cur_get_rec(page_cursor);
+ ut_ad(up_match != ULINT_UNDEFINED || mode != PAGE_CUR_GE);
+ ut_ad(up_match != ULINT_UNDEFINED || mode != PAGE_CUR_LE);
+ ut_ad(low_match != ULINT_UNDEFINED || mode != PAGE_CUR_LE);
- offsets = rec_get_offsets(node_ptr, index, offsets, 0,
- ULINT_UNDEFINED, &heap);
+#ifdef BTR_CUR_HASH_ADAPT
+ /* We do a dirty read of btr_search_enabled here. We will
+ properly check btr_search_enabled again in
+ btr_search_build_page_hash_index() before building a page hash
+ index, while holding search latch. */
+ if (!btr_search_enabled);
+ else if (tuple->info_bits & REC_INFO_MIN_REC_FLAG)
+ /* This may be a search tuple for btr_pcur_t::restore_position(). */
+ ut_ad(tuple->is_metadata() ||
+ (tuple->is_metadata(tuple->info_bits ^ REC_STATUS_INSTANT)));
+ else if (index()->table->is_temporary());
+ else if (!rec_is_metadata(page_cur.rec, *index()))
+ btr_search_info_update(index(), this);
+#endif /* BTR_CUR_HASH_ADAPT */
- /* If the rec is the first or last in the page for
- pessimistic delete intention, it might cause node_ptr insert
- for the upper level. We should change the intention and retry.
- */
- if (latch_mode == BTR_MODIFY_TREE
- && btr_cur_need_opposite_intention(
- page, lock_intention, node_ptr)) {
+ goto func_exit;
+ }
-need_opposite_intention:
- ut_ad(upper_rw_latch == RW_X_LATCH);
+ guess= nullptr;
+ if (page_cur_search_with_match(tuple, page_mode, &up_match, &low_match,
+ &page_cur, nullptr))
+ goto corrupted;
+ offsets= rec_get_offsets(page_cur.rec, index(), offsets, 0, ULINT_UNDEFINED,
+ &heap);
- if (n_releases > 0) {
- /* release root block */
- mtr_release_block_at_savepoint(
- mtr, tree_savepoints[0],
- tree_blocks[0]);
- }
+ ut_ad(block == mtr->at_savepoint(block_savepoint));
- /* release all blocks */
- for (; n_releases <= n_blocks; n_releases++) {
- mtr_release_block_at_savepoint(
- mtr, tree_savepoints[n_releases],
- tree_blocks[n_releases]);
- }
+ switch (latch_mode) {
+ default:
+ break;
+ case BTR_MODIFY_TREE:
+ if (btr_cur_need_opposite_intention(page, lock_intention, page_cur.rec))
+ {
+ /* If the rec is the first or last in the page for pessimistic
+ delete intention, it might cause node_ptr insert for the upper
+ level. We should change the intention and retry. */
+ need_opposite_intention:
+ return pessimistic_search_leaf(tuple, mode, mtr);
+ }
- lock_intention = BTR_INTENTION_BOTH;
+ if (detected_same_key_root || lock_intention != BTR_INTENTION_BOTH ||
+ index()->is_unique() ||
+ (up_match <= rec_offs_n_fields(offsets) &&
+ low_match <= rec_offs_n_fields(offsets)))
+ break;
- page_id.set_page_no(index->page);
- up_match = 0;
- low_match = 0;
- height = ULINT_UNDEFINED;
+ /* If the first or the last record of the page or the same key
+ value to the first record or last record, then another page might
+ be chosen when BTR_CONT_MODIFY_TREE. So, the parent page should
+ not released to avoiding deadlock with blocking the another search
+ with the same key value. */
+ const rec_t *first= page_rec_get_next_const(page_get_infimum_rec(page));
+ ulint matched_fields;
- n_blocks = 0;
- n_releases = 0;
+ if (UNIV_UNLIKELY(!first))
+ goto corrupted;
+ if (page_cur.rec == first || page_rec_is_last(page_cur.rec, page))
+ {
+ same_key_root:
+ detected_same_key_root= true;
+ break;
+ }
- goto search_loop;
- }
+ matched_fields= 0;
+ offsets2= rec_get_offsets(first, index(), offsets2, 0, ULINT_UNDEFINED,
+ &heap);
+ cmp_rec_rec(page_cur.rec, first, offsets, offsets2, index(), false,
+ &matched_fields);
+ if (matched_fields >= rec_offs_n_fields(offsets) - 1)
+ goto same_key_root;
+ if (const rec_t* last=
+ page_rec_get_prev_const(page_get_supremum_rec(page)))
+ {
+ matched_fields= 0;
+ offsets2= rec_get_offsets(last, index(), offsets2, 0, ULINT_UNDEFINED,
+ &heap);
+ cmp_rec_rec(page_cur.rec, last, offsets, offsets2, index(), false,
+ &matched_fields);
+ if (matched_fields >= rec_offs_n_fields(offsets) - 1)
+ goto same_key_root;
+ }
+ else
+ goto corrupted;
- if (dict_index_is_spatial(index)) {
- if (page_rec_is_supremum(node_ptr)) {
- cursor->low_match = 0;
- cursor->up_match = 0;
- goto func_exit;
- }
+ /* Release the non-root parent page unless it may need to be modified. */
+ if (tree_height > height + 1 &&
+ !btr_cur_will_modify_tree(index(), page, lock_intention,
+ page_cur.rec, node_ptr_max_size,
+ zip_size, mtr))
+ {
+ mtr->rollback_to_savepoint(block_savepoint - 1, block_savepoint);
+ block_savepoint--;
+ }
+ }
- /* If we are doing insertion or record locating,
- remember the tree nodes we visited */
- if (page_mode == PAGE_CUR_RTREE_INSERT
- || (search_mode == PAGE_CUR_RTREE_LOCATE
- && (latch_mode != BTR_MODIFY_LEAF))) {
- bool add_latch = false;
-
- if (latch_mode == BTR_MODIFY_TREE
- && rw_latch == RW_NO_LATCH) {
- ut_ad(mtr->memo_contains_flagged(
- &index->lock, MTR_MEMO_X_LOCK
- | MTR_MEMO_SX_LOCK));
- block->page.lock.s_lock();
- add_latch = true;
- }
+ /* Go to the child node */
+ page_id.set_page_no(btr_node_ptr_get_child_page_no(page_cur.rec, offsets));
- /* Store the parent cursor location */
-#ifdef UNIV_DEBUG
- ulint num_stored = rtr_store_parent_path(
- block, cursor, latch_mode,
- height + 1, mtr);
-#else
- rtr_store_parent_path(
- block, cursor, latch_mode,
- height + 1, mtr);
-#endif
+ if (!--height)
+ {
+ /* We are about to access the leaf level. */
+ rw_latch= RW_NO_LATCH;
- if (page_mode == PAGE_CUR_RTREE_INSERT) {
- btr_pcur_t* r_cursor =
- rtr_get_parent_cursor(
- cursor, height + 1,
- true);
- /* If it is insertion, there should
- be only one parent for each level
- traverse */
-#ifdef UNIV_DEBUG
- ut_ad(num_stored == 1);
+ switch (latch_mode) {
+ case BTR_MODIFY_ROOT_AND_LEAF:
+ rw_latch= RW_X_LATCH;
+ break;
+ default:
+ break;
+ case BTR_MODIFY_PREV:
+ /* This is almost exclusively for ibuf_insert(), but also for
+ btr_pcur_move_to_prev(); the latter is not exercised by mtr */
+ case BTR_SEARCH_PREV:
+ if (page_has_prev(page) && page_rec_is_first(page_cur.rec, page))
+ {
+ ut_ad(block_savepoint + 1 == mtr->get_savepoint());
+ /* Latch the previous page if the node pointer is the leftmost
+ of the current page. */
+ buf_block_t *left= btr_block_get(*index(), btr_page_get_prev(page),
+ RW_NO_LATCH, false, mtr, &err);
+ if (!left)
+ goto func_exit;
+ static_assert(mtr_memo_type_t(BTR_MODIFY_PREV & ~4) ==
+ MTR_MEMO_PAGE_X_FIX, "");
+ static_assert(mtr_memo_type_t(BTR_SEARCH_PREV & ~4) ==
+ MTR_MEMO_PAGE_S_FIX, "");
+ mtr->lock_register(block_savepoint + 1,
+ mtr_memo_type_t(latch_mode & ~4));
+ /* Because we are violating the latching order here, we will
+ have to temporarily release the right page latch if the left
+ page latch cannot be acquired without waiting. Concurrent page
+ splits or merges are impossible because we are holding a latch
+ on the parent of these sibling pages. */
+ if (latch_mode == BTR_MODIFY_PREV)
+ {
+ if (!left->page.lock.x_lock_try())
+ {
+ block->page.lock.x_unlock();
+ left->page.lock.x_lock();
+ }
+ }
+ else if (!left->page.lock.s_lock_try())
+ {
+ block->page.lock.s_unlock();
+ left->page.lock.s_lock();
+ }
+#ifdef BTR_CUR_HASH_ADAPT
+ btr_search_drop_page_hash_index(left, true);
#endif
+ }
+ break;
+ case BTR_MODIFY_LEAF:
+ case BTR_SEARCH_LEAF:
+ if (index()->is_ibuf())
+ break;
+ rw_latch= rw_lock_type_t(latch_mode);
+ if (btr_op != BTR_NO_OP &&
+ ibuf_should_try(index(), btr_op != BTR_INSERT_OP))
+ /* Try to buffer the operation if the leaf page
+ is not in the buffer pool. */
+ buf_mode= btr_op == BTR_DELETE_OP
+ ? BUF_GET_IF_IN_POOL_OR_WATCH
+ : BUF_GET_IF_IN_POOL;
+ break;
+ case BTR_MODIFY_TREE:
+ if (index()->is_ibuf())
+ break;
+ if (lock_intention == BTR_INTENTION_INSERT && page_has_next(page) &&
+ page_rec_is_last(page_cur.rec, page))
+ {
+ /* btr_insert_into_right_sibling() might cause deleting node_ptr
+ at upper level */
+ mtr->rollback_to_savepoint(block_savepoint);
+ goto need_opposite_intention;
+ }
+ }
+ }
- node_ptr = btr_pcur_get_rec(r_cursor);
-
- }
-
- if (add_latch) {
- block->page.lock.s_unlock();
- }
-
- ut_ad(!page_rec_is_supremum(node_ptr));
- }
-
- ut_ad(page_mode == search_mode
- || (page_mode == PAGE_CUR_WITHIN
- && search_mode == PAGE_CUR_RTREE_LOCATE));
-
- page_mode = search_mode;
- }
-
- /* If the first or the last record of the page
- or the same key value to the first record or last record,
- the another page might be chosen when BTR_CONT_MODIFY_TREE.
- So, the parent page should not released to avoiding deadlock
- with blocking the another search with the same key value. */
- if (!detected_same_key_root
- && lock_intention == BTR_INTENTION_BOTH
- && !dict_index_is_unique(index)
- && latch_mode == BTR_MODIFY_TREE
- && (up_match >= rec_offs_n_fields(offsets) - 1
- || low_match >= rec_offs_n_fields(offsets) - 1)) {
- const rec_t* first_rec = page_rec_get_next_const(
- page_get_infimum_rec(page));
- ulint matched_fields;
-
- ut_ad(upper_rw_latch == RW_X_LATCH);
-
- if (UNIV_UNLIKELY(!first_rec)) {
- corrupted:
- err = DB_CORRUPTION;
- goto func_exit;
- }
- if (node_ptr == first_rec
- || page_rec_is_last(node_ptr, page)) {
- detected_same_key_root = true;
- } else {
- matched_fields = 0;
-
- offsets2 = rec_get_offsets(
- first_rec, index, offsets2,
- 0, ULINT_UNDEFINED, &heap);
- cmp_rec_rec(node_ptr, first_rec,
- offsets, offsets2, index, false,
- &matched_fields);
-
- if (matched_fields
- >= rec_offs_n_fields(offsets) - 1) {
- detected_same_key_root = true;
- } else if (const rec_t* last_rec
- = page_rec_get_prev_const(
- page_get_supremum_rec(
- page))) {
- matched_fields = 0;
-
- offsets2 = rec_get_offsets(
- last_rec, index, offsets2,
- 0, ULINT_UNDEFINED, &heap);
- cmp_rec_rec(
- node_ptr, last_rec,
- offsets, offsets2, index,
- false, &matched_fields);
- if (matched_fields
- >= rec_offs_n_fields(offsets) - 1) {
- detected_same_key_root = true;
- }
- } else {
- goto corrupted;
- }
- }
- }
-
- /* If the page might cause modify_tree,
- we should not release the parent page's lock. */
- if (!detected_same_key_root
- && latch_mode == BTR_MODIFY_TREE
- && !btr_cur_will_modify_tree(
- index, page, lock_intention, node_ptr,
- node_ptr_max_size, zip_size, mtr)
- && !rtree_parent_modified) {
- ut_ad(upper_rw_latch == RW_X_LATCH);
- ut_ad(n_releases <= n_blocks);
-
- /* we can release upper blocks */
- for (; n_releases < n_blocks; n_releases++) {
- if (n_releases == 0) {
- /* we should not release root page
- to pin to same block. */
- continue;
- }
+ goto search_loop;
+}
- /* release unused blocks to unpin */
- mtr_release_block_at_savepoint(
- mtr, tree_savepoints[n_releases],
- tree_blocks[n_releases]);
- }
- }
+ATTRIBUTE_COLD
+dberr_t btr_cur_t::pessimistic_search_leaf(const dtuple_t *tuple,
+ page_cur_mode_t mode, mtr_t *mtr)
+{
+ ut_ad(index()->is_btree() || index()->is_ibuf());
+ ut_ad(!index()->is_ibuf() || ibuf_inside(mtr));
- if (height == level
- && latch_mode == BTR_MODIFY_TREE) {
- ut_ad(upper_rw_latch == RW_X_LATCH);
- /* we should sx-latch root page, if released already.
- It contains seg_header. */
- if (n_releases > 0) {
- mtr->sx_latch_at_savepoint(
- tree_savepoints[0],
- tree_blocks[0]);
- }
+ rec_offs offsets_[REC_OFFS_NORMAL_SIZE];
+ rec_offs* offsets = offsets_;
+ rec_offs_init(offsets_);
- /* x-latch the branch blocks not released yet. */
- for (ulint i = n_releases; i <= n_blocks; i++) {
- mtr->x_latch_at_savepoint(
- tree_savepoints[i],
- tree_blocks[i]);
- }
- }
+ ut_ad(flag == BTR_CUR_BINARY);
+ ut_ad(dict_index_check_search_tuple(index(), tuple));
+ ut_ad(dtuple_check_typed(tuple));
+ buf_block_t *block= mtr->at_savepoint(1);
+ ut_ad(block->page.id().page_no() == index()->page);
+ block->page.fix();
+ mtr->rollback_to_savepoint(1);
+ ut_ad(mtr->memo_contains_flagged(&index()->lock,
+ MTR_MEMO_SX_LOCK | MTR_MEMO_X_LOCK));
+
+ const page_cur_mode_t page_mode{btr_cur_nonleaf_mode(mode)};
+
+ mtr->page_lock(block, RW_X_LATCH);
+
+ up_match= 0;
+ up_bytes= 0;
+ low_match= 0;
+ low_bytes= 0;
+ ulint height= btr_page_get_level(block->page.frame);
+ tree_height= height + 1;
+ mem_heap_t *heap= nullptr;
- /* We should consider prev_page of parent page, if the node_ptr
- is the leftmost of the page. because BTR_SEARCH_PREV and
- BTR_MODIFY_PREV latches prev_page of the leaf page. */
- if ((latch_mode == BTR_SEARCH_PREV
- || latch_mode == BTR_MODIFY_PREV)
- && !prev_tree_blocks) {
- /* block should be latched for consistent
- btr_page_get_prev() */
- ut_ad(mtr->memo_contains_flagged(
- block, MTR_MEMO_PAGE_S_FIX
- | MTR_MEMO_PAGE_X_FIX));
+ search_loop:
+ dberr_t err;
+ page_cur.block= block;
- if (page_has_prev(page)
- && page_rec_is_first(node_ptr, page)) {
+ if (UNIV_UNLIKELY(!height))
+ {
+ if (page_cur_search_with_match(tuple, mode, &up_match, &low_match,
+ &page_cur, nullptr))
+ corrupted:
+ err= DB_CORRUPTION;
+ else
+ {
+ ut_ad(up_match != ULINT_UNDEFINED || mode != PAGE_CUR_GE);
+ ut_ad(up_match != ULINT_UNDEFINED || mode != PAGE_CUR_LE);
+ ut_ad(low_match != ULINT_UNDEFINED || mode != PAGE_CUR_LE);
- if (leftmost_from_level == 0) {
- leftmost_from_level = height + 1;
- }
- } else {
- leftmost_from_level = 0;
- }
+#ifdef BTR_CUR_HASH_ADAPT
+ /* We do a dirty read of btr_search_enabled here. We will
+ properly check btr_search_enabled again in
+ btr_search_build_page_hash_index() before building a page hash
+ index, while holding search latch. */
+ if (!btr_search_enabled);
+ else if (tuple->info_bits & REC_INFO_MIN_REC_FLAG)
+ /* This may be a search tuple for btr_pcur_t::restore_position(). */
+ ut_ad(tuple->is_metadata() ||
+ (tuple->is_metadata(tuple->info_bits ^ REC_STATUS_INSTANT)));
+ else if (index()->table->is_temporary());
+ else if (!rec_is_metadata(page_cur.rec, *index()))
+ btr_search_info_update(index(), this);
+#endif /* BTR_CUR_HASH_ADAPT */
+ err= DB_SUCCESS;
+ }
- if (height == 0 && leftmost_from_level > 0) {
- /* should retry to get also prev_page
- from level==leftmost_from_level. */
- prev_tree_blocks = static_cast<buf_block_t**>(
- ut_malloc_nokey(sizeof(buf_block_t*)
- * leftmost_from_level));
-
- prev_tree_savepoints = static_cast<ulint*>(
- ut_malloc_nokey(sizeof(ulint)
- * leftmost_from_level));
-
- /* back to the level (leftmost_from_level+1) */
- ulint idx = n_blocks
- - (leftmost_from_level - 1);
-
- page_id.set_page_no(
- tree_blocks[idx]->page.id().page_no());
-
- for (ulint i = n_blocks
- - (leftmost_from_level - 1);
- i <= n_blocks; i++) {
- mtr_release_block_at_savepoint(
- mtr, tree_savepoints[i],
- tree_blocks[i]);
- }
+ func_exit:
+ if (UNIV_LIKELY_NULL(heap))
+ mem_heap_free(heap);
+ return err;
+ }
- n_blocks -= (leftmost_from_level - 1);
- height = leftmost_from_level;
- ut_ad(n_releases == 0);
-
- /* replay up_match, low_match */
- up_match = 0;
- low_match = 0;
- rtr_info_t* rtr_info = need_path
- ? cursor->rtr_info : NULL;
-
- for (ulint i = 0; i < n_blocks; i++) {
- page_cursor->block = tree_blocks[i];
- if (page_cur_search_with_match(
- tuple,
- page_mode, &up_match,
- &low_match, page_cursor,
- rtr_info)) {
- err = DB_CORRUPTION;
- goto func_exit;
- }
- }
+ if (page_cur_search_with_match(tuple, page_mode, &up_match, &low_match,
+ &page_cur, nullptr))
+ goto corrupted;
- goto search_loop;
- }
- }
+ page_id_t page_id{block->page.id()};
- /* Go to the child node */
- page_id.set_page_no(
- btr_node_ptr_get_child_page_no(node_ptr, offsets));
+ offsets= rec_get_offsets(page_cur.rec, index(), offsets, 0, ULINT_UNDEFINED,
+ &heap);
+ /* Go to the child node */
+ page_id.set_page_no(btr_node_ptr_get_child_page_no(page_cur.rec, offsets));
- n_blocks++;
+ const auto block_savepoint= mtr->get_savepoint();
+ block=
+ buf_page_get_gen(page_id, block->zip_size(), RW_NO_LATCH, nullptr, BUF_GET,
+ mtr, &err, !--height && !index()->is_clust());
- if (UNIV_UNLIKELY(height == 0 && dict_index_is_ibuf(index))) {
- /* We're doing a search on an ibuf tree and we're one
- level above the leaf page. */
+ if (!block)
+ {
+ if (err == DB_DECRYPTION_FAILED)
+ btr_decryption_failed(*index());
+ goto func_exit;
+ }
- ut_ad(level == 0);
+ if (!!page_is_comp(block->page.frame) != index()->table->not_redundant() ||
+ btr_page_get_index_id(block->page.frame) != index()->id ||
+ fil_page_get_type(block->page.frame) == FIL_PAGE_RTREE ||
+ !fil_page_index_page_check(block->page.frame))
+ goto corrupted;
- buf_mode = BUF_GET;
- rw_latch = RW_NO_LATCH;
- goto retry_page_get;
- }
+ if (height != btr_page_get_level(block->page.frame))
+ goto corrupted;
- if (dict_index_is_spatial(index)
- && page_mode >= PAGE_CUR_CONTAIN
- && page_mode != PAGE_CUR_RTREE_INSERT) {
- ut_ad(need_path);
- rtr_node_path_t* path =
- cursor->rtr_info->path;
-
- if (!path->empty() && found) {
- ut_ad(path->back().page_no
- == page_id.page_no());
- path->pop_back();
-#ifdef UNIV_DEBUG
- if (page_mode == PAGE_CUR_RTREE_LOCATE
- && (latch_mode != BTR_MODIFY_LEAF)) {
- btr_pcur_t* cur
- = cursor->rtr_info->parent_path->back(
- ).cursor;
- rec_t* my_node_ptr
- = btr_pcur_get_rec(cur);
-
- offsets = rec_get_offsets(
- my_node_ptr, index, offsets,
- 0, ULINT_UNDEFINED, &heap);
-
- ulint my_page_no
- = btr_node_ptr_get_child_page_no(
- my_node_ptr, offsets);
-
- ut_ad(page_id.page_no() == my_page_no);
- }
+ if (page_has_prev(block->page.frame) &&
+ !btr_block_get(*index(), btr_page_get_prev(block->page.frame),
+ RW_X_LATCH, false, mtr, &err))
+ goto func_exit;
+ mtr->x_latch_at_savepoint(block_savepoint, block);
+#ifdef BTR_CUR_HASH_ADAPT
+ btr_search_drop_page_hash_index(block, true);
#endif
- }
- }
-
- goto search_loop;
- } else if (!dict_index_is_spatial(index)
- && latch_mode == BTR_MODIFY_TREE
- && lock_intention == BTR_INTENTION_INSERT
- && page_has_next(page)
- && page_rec_is_last(page_cur_get_rec(page_cursor), page)) {
-
- /* btr_insert_into_right_sibling() might cause
- deleting node_ptr at upper level */
-
- guess = NULL;
-
- if (height == 0) {
- /* release the leaf pages if latched */
- for (uint i = 0; i < 3; i++) {
- if (latch_leaves.blocks[i] != NULL) {
- mtr_release_block_at_savepoint(
- mtr, latch_leaves.savepoints[i],
- latch_leaves.blocks[i]);
- latch_leaves.blocks[i] = NULL;
- }
- }
- }
-
- goto need_opposite_intention;
- }
+#ifdef UNIV_ZIP_DEBUG
+ const page_zip_des_t *page_zip= buf_block_get_page_zip(block);
+ ut_a(!page_zip || page_zip_validate(page_zip, page, index()));
+#endif /* UNIV_ZIP_DEBUG */
+ if (page_has_next(block->page.frame) &&
+ !btr_block_get(*index(), btr_page_get_next(block->page.frame),
+ RW_X_LATCH, false, mtr, &err))
+ goto func_exit;
+ goto search_loop;
+}
- if (level != 0) {
- ut_ad(!autoinc);
+/********************************************************************//**
+Searches an index tree and positions a tree cursor on a given non-leaf level.
+NOTE: n_fields_cmp in tuple must be set so that it cannot be compared
+to node pointer page number fields on the upper levels of the tree!
+cursor->up_match and cursor->low_match both will have sensible values.
+Cursor is left at the place where an insert of the
+search tuple should be performed in the B-tree. InnoDB does an insert
+immediately after the cursor. Thus, the cursor may end up on a user record,
+or on a page infimum record.
+@param level the tree level of search
+@param tuple data tuple; NOTE: n_fields_cmp in tuple must be set so that
+ it cannot get compared to the node ptr page number field!
+@param latch RW_S_LATCH or RW_X_LATCH
+@param cursor tree cursor; the cursor page is s- or x-latched, but see also
+ above!
+@param mtr mini-transaction
+@return DB_SUCCESS on success or error code otherwise */
+TRANSACTIONAL_TARGET
+dberr_t btr_cur_search_to_nth_level(ulint level,
+ const dtuple_t *tuple,
+ rw_lock_type_t rw_latch,
+ btr_cur_t *cursor, mtr_t *mtr)
+{
+ dict_index_t *const index= cursor->index();
- if (upper_rw_latch == RW_NO_LATCH) {
- ut_ad(latch_mode == BTR_CONT_MODIFY_TREE
- || latch_mode == BTR_CONT_SEARCH_TREE);
- btr_block_get(
- *index, page_id.page_no(),
- latch_mode == BTR_CONT_MODIFY_TREE
- ? RW_X_LATCH : RW_SX_LATCH, false, mtr, &err);
- } else {
- ut_ad(mtr->memo_contains_flagged(block,
- upper_rw_latch));
-
- if (latch_by_caller) {
- ut_ad(latch_mode == BTR_SEARCH_TREE);
- /* to exclude modifying tree operations
- should sx-latch the index. */
- ut_ad(mtr->memo_contains(index->lock,
- MTR_MEMO_SX_LOCK));
- /* because has sx-latch of index,
- can release upper blocks. */
- for (; n_releases < n_blocks; n_releases++) {
- mtr_release_block_at_savepoint(
- mtr,
- tree_savepoints[n_releases],
- tree_blocks[n_releases]);
- }
- }
- }
+ ut_ad(index->is_btree() || index->is_ibuf());
+ mem_heap_t *heap= nullptr;
+ rec_offs offsets_[REC_OFFS_NORMAL_SIZE];
+ rec_offs *offsets= offsets_;
+ rec_offs_init(offsets_);
+ ut_ad(level);
+ ut_ad(dict_index_check_search_tuple(index, tuple));
+ ut_ad(index->is_ibuf() ? ibuf_inside(mtr) : index->is_btree());
+ ut_ad(dtuple_check_typed(tuple));
+ ut_ad(index->page != FIL_NULL);
+
+ MEM_UNDEFINED(&cursor->up_bytes, sizeof cursor->up_bytes);
+ MEM_UNDEFINED(&cursor->low_bytes, sizeof cursor->low_bytes);
+ cursor->up_match= 0;
+ cursor->low_match= 0;
+ cursor->flag= BTR_CUR_BINARY;
- if (page_mode <= PAGE_CUR_LE) {
- cursor->low_match = low_match;
- cursor->up_match = up_match;
- }
- } else {
- cursor->low_match = low_match;
- cursor->low_bytes = low_bytes;
- cursor->up_match = up_match;
- cursor->up_bytes = up_bytes;
+#ifndef BTR_CUR_ADAPT
+ buf_block_t *block= nullptr;
+#else
+ btr_search_t *info= btr_search_get_info(index);
+ buf_block_t *block= info->root_guess;
+#endif /* BTR_CUR_ADAPT */
- if (autoinc) {
- page_set_autoinc(tree_blocks[0], autoinc, mtr, false);
- }
+ ut_ad(mtr->memo_contains_flagged(&index->lock,
+ MTR_MEMO_X_LOCK | MTR_MEMO_SX_LOCK));
-#ifdef BTR_CUR_HASH_ADAPT
- /* We do a dirty read of btr_search_enabled here. We
- will properly check btr_search_enabled again in
- btr_search_build_page_hash_index() before building a
- page hash index, while holding search latch. */
- if (!btr_search_enabled) {
- } else if (tuple->info_bits & REC_INFO_MIN_REC_FLAG) {
- /* This may be a search tuple for
- btr_pcur_t::restore_position(). */
- ut_ad(tuple->is_metadata()
- || (tuple->is_metadata(tuple->info_bits
- ^ REC_STATUS_INSTANT)));
- } else if (index->is_spatial()) {
- } else if (index->table->is_temporary()) {
- } else if (rec_is_metadata(btr_cur_get_rec(cursor), *index)) {
- /* Only user records belong in the adaptive
- hash index. */
- } else {
- btr_search_info_update(index, cursor);
- }
-#endif /* BTR_CUR_HASH_ADAPT */
- ut_ad(cursor->up_match != ULINT_UNDEFINED
- || mode != PAGE_CUR_GE);
- ut_ad(cursor->up_match != ULINT_UNDEFINED
- || mode != PAGE_CUR_LE);
- ut_ad(cursor->low_match != ULINT_UNDEFINED
- || mode != PAGE_CUR_LE);
- }
-
- /* For spatial index, remember what blocks are still latched */
- if (dict_index_is_spatial(index)
- && (latch_mode == BTR_MODIFY_TREE
- || latch_mode == BTR_MODIFY_LEAF)) {
- for (ulint i = 0; i < n_releases; i++) {
- cursor->rtr_info->tree_blocks[i] = NULL;
- cursor->rtr_info->tree_savepoints[i] = 0;
- }
+ const ulint zip_size= index->table->space->zip_size();
- for (ulint i = n_releases; i <= n_blocks; i++) {
- cursor->rtr_info->tree_blocks[i] = tree_blocks[i];
- cursor->rtr_info->tree_savepoints[i] = tree_savepoints[i];
- }
- }
+ /* Start with the root page. */
+ page_id_t page_id(index->table->space_id, index->page);
+ ulint height= ULINT_UNDEFINED;
-func_exit:
+search_loop:
+ dberr_t err= DB_SUCCESS;
+ if (buf_block_t *b=
+ mtr->get_already_latched(page_id, mtr_memo_type_t(rw_latch)))
+ block= b;
+ else if (!(block= buf_page_get_gen(page_id, zip_size, rw_latch,
+ block, BUF_GET, mtr, &err)))
+ {
+ if (err == DB_DECRYPTION_FAILED)
+ btr_decryption_failed(*index);
+ goto func_exit;
+ }
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
+#ifdef UNIV_ZIP_DEBUG
+ if (const page_zip_des_t *page_zip= buf_block_get_page_zip(block))
+ ut_a(page_zip_validate(page_zip, block->page.frame, index));
+#endif /* UNIV_ZIP_DEBUG */
- ut_free(prev_tree_blocks);
- ut_free(prev_tree_savepoints);
+ if (!!page_is_comp(block->page.frame) != index->table->not_redundant() ||
+ btr_page_get_index_id(block->page.frame) != index->id ||
+ fil_page_get_type(block->page.frame) == FIL_PAGE_RTREE ||
+ !fil_page_index_page_check(block->page.frame))
+ {
+ corrupted:
+ err= DB_CORRUPTION;
+ func_exit:
+ if (UNIV_LIKELY_NULL(heap))
+ mem_heap_free(heap);
+ return err;
+ }
- if (mbr_adj) {
- /* remember that we will need to adjust parent MBR */
- cursor->rtr_info->mbr_adj = true;
- }
+ const uint32_t page_level= btr_page_get_level(block->page.frame);
- DBUG_RETURN(err);
+ if (height == ULINT_UNDEFINED)
+ {
+ /* We are in the root node */
+ height= page_level;
+ if (!height)
+ goto corrupted;
+ cursor->tree_height= height + 1;
+ }
+ else if (height != ulint{page_level})
+ goto corrupted;
+
+ cursor->page_cur.block= block;
+
+ /* Search for complete index fields. */
+ if (page_cur_search_with_match(tuple, PAGE_CUR_LE, &cursor->up_match,
+ &cursor->low_match, &cursor->page_cur,
+ nullptr))
+ goto corrupted;
+
+ /* If this is the desired level, leave the loop */
+ if (level == height)
+ goto func_exit;
+
+ ut_ad(height > level);
+ height--;
+
+ offsets = rec_get_offsets(cursor->page_cur.rec, index, offsets, 0,
+ ULINT_UNDEFINED, &heap);
+ /* Go to the child node */
+ page_id.set_page_no(btr_node_ptr_get_child_page_no(cursor->page_cur.rec,
+ offsets));
+ block= nullptr;
+ goto search_loop;
}
dberr_t btr_cur_t::open_leaf(bool first, dict_index_t *index,
btr_latch_mode latch_mode, mtr_t *mtr)
{
- ulint node_ptr_max_size= srv_page_size / 2;
btr_intention_t lock_intention;
ulint n_blocks= 0;
mem_heap_t *heap= nullptr;
@@ -2424,29 +1860,21 @@ dberr_t btr_cur_t::open_leaf(bool first, dict_index_t *index,
rec_offs_init(offsets_);
const bool latch_by_caller= latch_mode & BTR_ALREADY_S_LATCHED;
- latch_mode = btr_latch_mode(latch_mode & ~BTR_ALREADY_S_LATCHED);
+ latch_mode= btr_latch_mode(latch_mode & ~BTR_ALREADY_S_LATCHED);
lock_intention= btr_cur_get_and_clear_intention(&latch_mode);
- /* This function doesn't need to lock left page of the leaf page */
- if (latch_mode == BTR_SEARCH_PREV)
- latch_mode= BTR_SEARCH_LEAF;
- else if (latch_mode == BTR_MODIFY_PREV)
- latch_mode= BTR_MODIFY_LEAF;
-
/* Store the position of the tree latch we push to mtr so that we
know how to release it when we have latched the leaf node */
auto savepoint= mtr->get_savepoint();
rw_lock_type_t upper_rw_latch= RW_X_LATCH;
+ ulint node_ptr_max_size= 0;
- switch (latch_mode) {
- case BTR_CONT_MODIFY_TREE:
- case BTR_CONT_SEARCH_TREE:
- abort();
- break;
- case BTR_MODIFY_TREE:
+ if (latch_mode == BTR_MODIFY_TREE)
+ {
+ node_ptr_max_size= btr_node_ptr_max_size(index);
/* Most of delete-intended operations are purging. Free blocks
and read IO bandwidth should be prioritized for them, when the
history list is growing huge. */
@@ -2457,32 +1885,35 @@ dberr_t btr_cur_t::open_leaf(bool first, dict_index_t *index,
mtr_x_lock_index(index, mtr);
else
mtr_sx_lock_index(index, mtr);
- break;
- default:
+ }
+ else
+ {
+ static_assert(int{BTR_CONT_MODIFY_TREE} == (12 | BTR_MODIFY_LEAF), "");
+ ut_ad(!(latch_mode & 8));
+ /* This function doesn't need to lock left page of the leaf page */
+ static_assert(int{BTR_SEARCH_PREV} == (4 | BTR_SEARCH_LEAF), "");
+ static_assert(int{BTR_MODIFY_PREV} == (4 | BTR_MODIFY_LEAF), "");
+ latch_mode= btr_latch_mode(latch_mode & ~4);
ut_ad(!latch_by_caller ||
mtr->memo_contains_flagged(&index->lock,
MTR_MEMO_SX_LOCK | MTR_MEMO_S_LOCK));
upper_rw_latch= RW_S_LATCH;
- if (latch_by_caller)
- break;
- ut_ad(latch_mode != BTR_SEARCH_TREE);
- savepoint++;
- mtr_s_lock_index(index, mtr);
+ if (!latch_by_caller)
+ {
+ savepoint++;
+ mtr_s_lock_index(index, mtr);
+ }
}
ut_ad(savepoint == mtr->get_savepoint());
- const rw_lock_type_t root_leaf_rw_latch=
- btr_cur_latch_for_root_leaf(latch_mode);
+ const rw_lock_type_t root_leaf_rw_latch= rw_lock_type_t(latch_mode & ~12);
page_cur.index = index;
uint32_t page= index->page;
const auto zip_size= index->table->space->zip_size();
- if (root_leaf_rw_latch == RW_X_LATCH)
- node_ptr_max_size= btr_node_ptr_max_size(index);
-
for (ulint height= ULINT_UNDEFINED;;)
{
ut_ad(n_blocks < BTR_MAX_LEVELS);
@@ -2531,20 +1962,15 @@ dberr_t btr_cur_t::open_leaf(bool first, dict_index_t *index,
reached_leaf:
const auto leaf_savepoint= mtr->get_savepoint();
ut_ad(leaf_savepoint);
+ ut_ad(block == mtr->at_savepoint(leaf_savepoint - 1));
if (rw_latch == RW_NO_LATCH)
- btr_cur_latch_leaves(block, latch_mode, this, mtr);
+ btr_cur_latch_leaves(leaf_savepoint - 1, latch_mode, this, mtr);
- switch (latch_mode) {
- case BTR_MODIFY_TREE:
- case BTR_CONT_MODIFY_TREE:
- case BTR_CONT_SEARCH_TREE:
- break;
- default:
+ if (latch_mode != BTR_MODIFY_TREE)
/* Release index->lock if needed, and the non-leaf pages. */
mtr->rollback_to_savepoint(savepoint - !latch_by_caller,
leaf_savepoint - 1);
- }
break;
}
}
@@ -4669,16 +4095,15 @@ btr_cur_pessimistic_update(
}
}
- if (!srv_read_only_mode
- && !big_rec_vec
+#if 0 // FIXME: this used to be a no-op, and will cause trouble if enabled
+ if (!big_rec_vec
&& page_is_leaf(block->page.frame)
&& !dict_index_is_online_ddl(index)) {
-#if 0 // FIXME: this used to be a no-op, and will cause trouble if enabled
mtr->release(index->lock);
-#endif
/* NOTE: We cannot release root block latch here, because it
has segment header and already modified in most of cases.*/
}
+#endif
err = DB_SUCCESS;
goto return_after_reservations;
@@ -5420,15 +4845,14 @@ return_after_reservations:
err_exit:
mem_heap_free(heap);
- if (!srv_read_only_mode
- && page_is_leaf(page)
- && !dict_index_is_online_ddl(index)) {
#if 0 // FIXME: this used to be a no-op, and will cause trouble if enabled
+ if (page_is_leaf(page)
+ && !dict_index_is_online_ddl(index)) {
mtr->release(index->lock);
-#endif
/* NOTE: We cannot release root block latch here, because it
has segment header and already modified in most of cases.*/
}
+#endif
index->table->space->release_free_extents(n_reserved);
return(ret);
@@ -5545,16 +4969,18 @@ public:
buf_block_t *parent_block= m_block;
ulint parent_savepoint= m_savepoint;
- m_savepoint= mtr_set_savepoint(&mtr);
m_block= btr_block_get(*index(), m_page_id.page_no(), RW_S_LATCH, !level,
&mtr, nullptr);
+ if (!m_block)
+ return false;
if (parent_block && parent_block != right_parent)
- mtr_release_block_at_savepoint(&mtr, parent_savepoint, parent_block);
+ mtr.rollback_to_savepoint(parent_savepoint, parent_savepoint + 1);
+
+ m_savepoint= mtr.get_savepoint() - 1;
- return m_block &&
- (level == ULINT_UNDEFINED ||
- btr_page_get_level(buf_block_get_frame(m_block)) == level);
+ return level == ULINT_UNDEFINED ||
+ btr_page_get_level(m_block->page.frame) == level;
}
/** Sets page mode for leaves */
@@ -5761,14 +5187,18 @@ static ha_rows btr_estimate_n_rows_in_range_on_level(
buf_block_t *prev_block= block;
ulint prev_savepoint= savepoint;
- savepoint= mtr_set_savepoint(&mtr);
+ savepoint= mtr.get_savepoint();
/* Fetch the page. */
block= btr_block_get(*index, page_id.page_no(), RW_S_LATCH, !level, &mtr,
nullptr);
if (prev_block)
- mtr_release_block_at_savepoint(&mtr, prev_savepoint, prev_block);
+ {
+ mtr.rollback_to_savepoint(prev_savepoint, prev_savepoint + 1);
+ if (block)
+ savepoint--;
+ }
if (!block || btr_page_get_level(buf_block_get_frame(block)) != level)
goto inexact;
@@ -5797,14 +5227,20 @@ static ha_rows btr_estimate_n_rows_in_range_on_level(
} while (page_id.page_no() != right_page_no);
if (block)
- mtr_release_block_at_savepoint(&mtr, savepoint, block);
+ {
+ ut_ad(block == mtr.at_savepoint(savepoint));
+ mtr.rollback_to_savepoint(savepoint, savepoint + 1);
+ }
return (n_rows);
inexact:
if (block)
- mtr_release_block_at_savepoint(&mtr, savepoint, block);
+ {
+ ut_ad(block == mtr.at_savepoint(savepoint));
+ mtr.rollback_to_savepoint(savepoint, savepoint + 1);
+ }
is_n_rows_exact= false;
@@ -5863,9 +5299,7 @@ ha_rows btr_estimate_n_rows_in_range(dict_index_t *index,
mtr.start();
- /* Store the position of the tree latch we push to mtr so that we
- know how to release it when we have latched leaf node(s) */
- ulint savepoint= mtr_set_savepoint(&mtr);
+ ut_ad(mtr.get_savepoint() == 0);
mtr_s_lock_index(index, &mtr);
ha_rows table_n_rows= dict_table_get_n_rows(index->table);
@@ -5920,10 +5354,10 @@ search_loop:
}
if (height == 0)
- /* There is no need to unlach non-leaf pages here as they must already be
+ /* There is no need to release non-leaf pages here as they must already be
unlatched in btr_est_cur_t::fetch_child(). Try to search on pages after
- index->lock unlatching to decrease contention. */
- mtr_release_s_latch_at_savepoint(&mtr, savepoint, &index->lock);
+ releasing the index latch, to decrease contention. */
+ mtr.rollback_to_savepoint(0, 1);
/* There is no need to search on left page if
divergence_height != ULINT_UNDEFINED, as it was already searched before
diff --git a/storage/innobase/btr/btr0defragment.cc b/storage/innobase/btr/btr0defragment.cc
index 76b173359da..4e0a7d1f86a 100644
--- a/storage/innobase/btr/btr0defragment.cc
+++ b/storage/innobase/btr/btr0defragment.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (C) 2012, 2014 Facebook, Inc. All Rights Reserved.
-Copyright (C) 2014, 2022, MariaDB Corporation.
+Copyright (C) 2014, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -280,6 +280,70 @@ btr_defragment_calc_n_recs_for_size(
return n_recs;
}
+MY_ATTRIBUTE((nonnull(2,3,4), warn_unused_result))
+/************************************************************//**
+Returns the upper level node pointer to a page. It is assumed that mtr holds
+an sx-latch on the tree.
+@return rec_get_offsets() of the node pointer record */
+static
+rec_offs*
+btr_page_search_father_node_ptr(
+ rec_offs* offsets,/*!< in: work area for the return value */
+ mem_heap_t* heap, /*!< in: memory heap to use */
+ btr_cur_t* cursor, /*!< in: cursor pointing to user record,
+ out: cursor on node pointer record,
+ its page x-latched */
+ mtr_t* mtr) /*!< in: mtr */
+{
+ const uint32_t page_no = btr_cur_get_block(cursor)->page.id().page_no();
+ dict_index_t* index = btr_cur_get_index(cursor);
+ ut_ad(!index->is_spatial());
+
+ ut_ad(mtr->memo_contains_flagged(&index->lock, MTR_MEMO_X_LOCK
+ | MTR_MEMO_SX_LOCK));
+ ut_ad(dict_index_get_page(index) != page_no);
+
+ const auto level = btr_page_get_level(btr_cur_get_page(cursor));
+
+ const rec_t* user_rec = btr_cur_get_rec(cursor);
+ ut_a(page_rec_is_user_rec(user_rec));
+
+ if (btr_cur_search_to_nth_level(level + 1,
+ dict_index_build_node_ptr(index,
+ user_rec, 0,
+ heap, level),
+ RW_X_LATCH,
+ cursor, mtr) != DB_SUCCESS) {
+ return nullptr;
+ }
+
+ const rec_t* node_ptr = btr_cur_get_rec(cursor);
+ ut_ad(!btr_cur_get_block(cursor)->page.lock.not_recursive()
+ || mtr->memo_contains(index->lock, MTR_MEMO_X_LOCK));
+
+ offsets = rec_get_offsets(node_ptr, index, offsets, 0,
+ ULINT_UNDEFINED, &heap);
+
+ if (btr_node_ptr_get_child_page_no(node_ptr, offsets) != page_no) {
+ offsets = nullptr;
+ }
+
+ return(offsets);
+}
+
+static bool btr_page_search_father(mtr_t *mtr, btr_cur_t *cursor)
+{
+ rec_t *rec=
+ page_rec_get_next(page_get_infimum_rec(cursor->block()->page.frame));
+ if (UNIV_UNLIKELY(!rec))
+ return false;
+ cursor->page_cur.rec= rec;
+ mem_heap_t *heap= mem_heap_create(100);
+ const bool got= btr_page_search_father_node_ptr(nullptr, heap, cursor, mtr);
+ mem_heap_free(heap);
+ return got;
+}
+
/*************************************************************//**
Merge as many records from the from_block to the to_block. Delete
the from_block if all records are successfully merged to to_block.
@@ -408,7 +472,7 @@ btr_defragment_merge_pages(
parent.page_cur.index = index;
parent.page_cur.block = from_block;
- if (!btr_page_get_father(mtr, &parent)) {
+ if (!btr_page_search_father(mtr, &parent)) {
to_block = nullptr;
} else if (n_recs_to_move == n_recs) {
/* The whole page is merged with the previous page,
diff --git a/storage/innobase/btr/btr0pcur.cc b/storage/innobase/btr/btr0pcur.cc
index d731bcbb893..68699ede469 100644
--- a/storage/innobase/btr/btr0pcur.cc
+++ b/storage/innobase/btr/btr0pcur.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2016, 2022, MariaDB Corporation.
+Copyright (c) 2016, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -212,24 +212,100 @@ btr_pcur_copy_stored_position(
pcur_receive->old_n_fields = pcur_donate->old_n_fields;
}
+/** Optimistically latches the leaf page or pages requested.
+@param[in] block guessed buffer block
+@param[in,out] latch_mode BTR_SEARCH_LEAF, ...
+@param[in,out] pcur cursor
+@param[in,out] mtr mini-transaction
+@return true if success */
+TRANSACTIONAL_TARGET
+static bool btr_pcur_optimistic_latch_leaves(buf_block_t *block,
+ btr_pcur_t *pcur,
+ btr_latch_mode *latch_mode,
+ mtr_t *mtr)
+{
+ ut_ad(block->page.buf_fix_count());
+ ut_ad(block->page.in_file());
+ ut_ad(block->page.frame);
+
+ static_assert(BTR_SEARCH_PREV & BTR_SEARCH_LEAF, "");
+ static_assert(BTR_MODIFY_PREV & BTR_MODIFY_LEAF, "");
+ static_assert((BTR_SEARCH_PREV ^ BTR_MODIFY_PREV) ==
+ (RW_S_LATCH ^ RW_X_LATCH), "");
+
+ const rw_lock_type_t mode=
+ rw_lock_type_t(*latch_mode & (RW_X_LATCH | RW_S_LATCH));
+
+ switch (*latch_mode) {
+ default:
+ ut_ad(*latch_mode == BTR_SEARCH_LEAF || *latch_mode == BTR_MODIFY_LEAF);
+ return buf_page_optimistic_get(mode, block, pcur->modify_clock, mtr);
+ case BTR_SEARCH_PREV: /* btr_pcur_move_backward_from_page() */
+ case BTR_MODIFY_PREV: /* Ditto, or ibuf_insert() */
+ page_id_t id{0};
+ uint32_t left_page_no;
+ ulint zip_size;
+ {
+ transactional_shared_lock_guard<block_lock> g{block->page.lock};
+ if (block->modify_clock != pcur->modify_clock)
+ return false;
+ id= block->page.id();
+ zip_size= block->zip_size();
+ left_page_no= btr_page_get_prev(block->page.frame);
+ }
+
+ if (left_page_no != FIL_NULL)
+ {
+ pcur->btr_cur.left_block=
+ buf_page_get_gen(page_id_t(id.space(), left_page_no), zip_size,
+ mode, nullptr, BUF_GET_POSSIBLY_FREED, mtr);
+
+ if (pcur->btr_cur.left_block &&
+ btr_page_get_next(pcur->btr_cur.left_block->page.frame) !=
+ id.page_no())
+ {
+release_left_block:
+ mtr->release_last_page();
+ return false;
+ }
+ }
+ else
+ pcur->btr_cur.left_block= nullptr;
+
+ if (buf_page_optimistic_get(mode, block, pcur->modify_clock, mtr))
+ {
+ if (btr_page_get_prev(block->page.frame) == left_page_no)
+ {
+ /* block was already buffer-fixed while entering the function and
+ buf_page_optimistic_get() buffer-fixes it again. */
+ ut_ad(2 <= block->page.buf_fix_count());
+ *latch_mode= btr_latch_mode(mode);
+ return true;
+ }
+
+ mtr->release_last_page();
+ }
+
+ ut_ad(block->page.buf_fix_count());
+ if (pcur->btr_cur.left_block)
+ goto release_left_block;
+ return false;
+ }
+}
+
/** Structure acts as functor to do the latching of leaf pages.
It returns true if latching of leaf pages succeeded and false
otherwise. */
struct optimistic_latch_leaves
{
btr_pcur_t *const cursor;
- btr_latch_mode *latch_mode;
+ btr_latch_mode *const latch_mode;
mtr_t *const mtr;
- optimistic_latch_leaves(btr_pcur_t *cursor, btr_latch_mode *latch_mode,
- mtr_t *mtr)
- : cursor(cursor), latch_mode(latch_mode), mtr(mtr) {}
-
bool operator() (buf_block_t *hint) const
{
- return hint && btr_cur_optimistic_latch_leaves(
- hint, cursor->modify_clock, latch_mode,
- btr_pcur_get_btr_cur(cursor), mtr);
+ return hint &&
+ btr_pcur_optimistic_latch_leaves(hint, cursor, latch_mode, mtr);
}
};
@@ -303,8 +379,8 @@ btr_pcur_t::restore_position(btr_latch_mode restore_latch_mode, mtr_t *mtr)
/* Try optimistic restoration. */
if (block_when_stored.run_with_hint(
- optimistic_latch_leaves(this, &restore_latch_mode,
- mtr))) {
+ optimistic_latch_leaves{this, &restore_latch_mode,
+ mtr})) {
pos_state = BTR_PCUR_IS_POSITIONED;
latch_mode = restore_latch_mode;
@@ -465,18 +541,9 @@ btr_pcur_move_to_next_page(
return DB_CORRUPTION;
}
- ulint mode = cursor->latch_mode;
- switch (mode) {
- case BTR_SEARCH_TREE:
- mode = BTR_SEARCH_LEAF;
- break;
- case BTR_MODIFY_TREE:
- mode = BTR_MODIFY_LEAF;
- }
-
dberr_t err;
buf_block_t* next_block = btr_block_get(
- *cursor->index(), next_page_no, mode,
+ *cursor->index(), next_page_no, cursor->latch_mode & ~12,
page_is_leaf(page), mtr, &err);
if (UNIV_UNLIKELY(!next_block)) {
diff --git a/storage/innobase/btr/btr0sea.cc b/storage/innobase/btr/btr0sea.cc
index fc890f9233b..a1609248512 100644
--- a/storage/innobase/btr/btr0sea.cc
+++ b/storage/innobase/btr/btr0sea.cc
@@ -1055,26 +1055,24 @@ btr_search_guess_on_hash(
index_id_t index_id;
ut_ad(mtr->is_active());
+ ut_ad(index->is_btree() || index->is_ibuf());
- if (!btr_search_enabled) {
+ /* Note that, for efficiency, the struct info may not be protected by
+ any latch here! */
+
+ if (latch_mode > BTR_MODIFY_LEAF
+ || !info->last_hash_succ || !info->n_hash_potential
+ || (tuple->info_bits & REC_INFO_MIN_REC_FLAG)) {
return false;
}
- ut_ad(!index->is_ibuf());
+ ut_ad(index->is_btree());
+ ut_ad(!index->table->is_temporary());
+
ut_ad(latch_mode == BTR_SEARCH_LEAF || latch_mode == BTR_MODIFY_LEAF);
compile_time_assert(ulint{BTR_SEARCH_LEAF} == ulint{RW_S_LATCH});
compile_time_assert(ulint{BTR_MODIFY_LEAF} == ulint{RW_X_LATCH});
- /* Not supported for spatial index */
- ut_ad(!dict_index_is_spatial(index));
-
- /* Note that, for efficiency, the struct info may not be protected by
- any latch here! */
-
- if (info->n_hash_potential == 0) {
- return false;
- }
-
cursor->n_fields = info->n_fields;
cursor->n_bytes = info->n_bytes;
diff --git a/storage/innobase/dict/dict0crea.cc b/storage/innobase/dict/dict0crea.cc
index daf3bc9a664..614048b7ba0 100644
--- a/storage/innobase/dict/dict0crea.cc
+++ b/storage/innobase/dict/dict0crea.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, 2022, MariaDB Corporation.
+Copyright (c) 2017, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -722,7 +722,7 @@ dict_build_field_def_step(
}
/***************************************************************//**
-Creates an index tree for the index if it is not a member of a cluster.
+Creates an index tree for the index.
@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
static MY_ATTRIBUTE((nonnull, warn_unused_result))
dberr_t
@@ -755,9 +755,8 @@ dict_create_index_tree_step(
pcur.btr_cur.page_cur.index =
UT_LIST_GET_FIRST(dict_sys.sys_indexes->indexes);
- dberr_t err =
- btr_pcur_open(search_tuple, PAGE_CUR_L, BTR_MODIFY_LEAF,
- &pcur, 0, &mtr);
+ dberr_t err = btr_pcur_open(search_tuple, PAGE_CUR_L, BTR_MODIFY_LEAF,
+ &pcur, &mtr);
if (err != DB_SUCCESS) {
func_exit:
@@ -768,10 +767,25 @@ func_exit:
btr_pcur_move_to_next_user_rec(&pcur, &mtr);
if (UNIV_UNLIKELY(btr_pcur_is_after_last_on_page(&pcur))) {
+corrupted:
err = DB_CORRUPTION;
goto func_exit;
}
+ ulint len;
+ byte* data = rec_get_nth_field_old(btr_pcur_get_rec(&pcur),
+ DICT_FLD__SYS_INDEXES__ID,
+ &len);
+ if (UNIV_UNLIKELY(len != 8 || mach_read_from_8(data) != index->id)) {
+ goto corrupted;
+ }
+
+ data = rec_get_nth_field_old(btr_pcur_get_rec(&pcur),
+ DICT_FLD__SYS_INDEXES__PAGE_NO, &len);
+ if (len != 4) {
+ goto corrupted;
+ }
+
if (index->is_readable()) {
index->set_modified(mtr);
@@ -784,11 +798,6 @@ func_exit:
err = DB_OUT_OF_FILE_SPACE; );
}
- ulint len;
- byte* data = rec_get_nth_field_old(btr_pcur_get_rec(&pcur),
- DICT_FLD__SYS_INDEXES__PAGE_NO,
- &len);
- ut_ad(len == 4);
mtr.write<4,mtr_t::MAYBE_NOP>(*btr_pcur_get_block(&pcur), data,
node->page_no);
goto func_exit;
diff --git a/storage/innobase/dict/dict0dict.cc b/storage/innobase/dict/dict0dict.cc
index 21efb525fa8..53d1031d270 100644
--- a/storage/innobase/dict/dict0dict.cc
+++ b/storage/innobase/dict/dict0dict.cc
@@ -2,7 +2,7 @@
Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2012, Facebook Inc.
-Copyright (c) 2013, 2022, MariaDB Corporation.
+Copyright (c) 2013, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -4143,8 +4143,7 @@ void dict_set_corrupted(dict_index_t *index, const char *ctx)
dict_index_copy_types(tuple, sys_index, 2);
cursor.page_cur.index = sys_index;
- if (btr_cur_search_to_nth_level(0, tuple, PAGE_CUR_LE,
- BTR_MODIFY_LEAF, &cursor, &mtr)
+ if (cursor.search_leaf(tuple, PAGE_CUR_LE, BTR_MODIFY_LEAF, &mtr)
!= DB_SUCCESS) {
goto fail;
}
@@ -4219,8 +4218,7 @@ dict_index_set_merge_threshold(
dict_index_copy_types(tuple, sys_index, 2);
cursor.page_cur.index = sys_index;
- if (btr_cur_search_to_nth_level(0, tuple, PAGE_CUR_GE,
- BTR_MODIFY_LEAF, &cursor, &mtr)
+ if (cursor.search_leaf(tuple, PAGE_CUR_GE, BTR_MODIFY_LEAF, &mtr)
!= DB_SUCCESS) {
goto func_exit;
}
diff --git a/storage/innobase/dict/dict0load.cc b/storage/innobase/dict/dict0load.cc
index 004b00615e8..9910a000b5b 100644
--- a/storage/innobase/dict/dict0load.cc
+++ b/storage/innobase/dict/dict0load.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2016, 2022, MariaDB Corporation.
+Copyright (c) 2016, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -1321,7 +1321,7 @@ static dberr_t dict_load_columns(dict_table_t *table, unsigned use_uncommitted,
dict_index_copy_types(&tuple, sys_index, 1);
pcur.btr_cur.page_cur.index = sys_index;
- dberr_t err = btr_pcur_open_on_user_rec(&tuple, PAGE_CUR_GE,
+ dberr_t err = btr_pcur_open_on_user_rec(&tuple,
BTR_SEARCH_LEAF, &pcur, &mtr);
if (err != DB_SUCCESS) {
goto func_exit;
@@ -1452,7 +1452,7 @@ dict_load_virtual_col(dict_table_t *table, bool uncommitted, ulint nth_v_col)
dict_index_copy_types(&tuple, sys_virtual_index, 2);
pcur.btr_cur.page_cur.index = sys_virtual_index;
- dberr_t err = btr_pcur_open_on_user_rec(&tuple, PAGE_CUR_GE,
+ dberr_t err = btr_pcur_open_on_user_rec(&tuple,
BTR_SEARCH_LEAF, &pcur, &mtr);
if (err != DB_SUCCESS) {
goto func_exit;
@@ -1688,8 +1688,7 @@ static dberr_t dict_load_fields(dict_index_t *index, bool uncommitted,
dict_index_copy_types(&tuple, sys_index, 1);
pcur.btr_cur.page_cur.index = sys_index;
- dberr_t error = btr_pcur_open_on_user_rec(&tuple,
- PAGE_CUR_GE, BTR_SEARCH_LEAF,
+ dberr_t error = btr_pcur_open_on_user_rec(&tuple, BTR_SEARCH_LEAF,
&pcur, &mtr);
if (error != DB_SUCCESS) {
goto func_exit;
@@ -1947,8 +1946,7 @@ dberr_t dict_load_indexes(dict_table_t *table, bool uncommitted,
dict_index_copy_types(&tuple, sys_index, 1);
pcur.btr_cur.page_cur.index = sys_index;
- dberr_t error = btr_pcur_open_on_user_rec(&tuple,
- PAGE_CUR_GE, BTR_SEARCH_LEAF,
+ dberr_t error = btr_pcur_open_on_user_rec(&tuple, BTR_SEARCH_LEAF,
&pcur, &mtr);
if (error != DB_SUCCESS) {
goto func_exit;
@@ -2349,7 +2347,7 @@ static dict_table_t *dict_load_table_one(const span<const char> &name,
bool uncommitted = false;
reload:
mtr.start();
- dberr_t err = btr_pcur_open_on_user_rec(&tuple, PAGE_CUR_GE,
+ dberr_t err = btr_pcur_open_on_user_rec(&tuple,
BTR_SEARCH_LEAF, &pcur, &mtr);
if (err != DB_SUCCESS || !btr_pcur_is_on_user_rec(&pcur)) {
@@ -2607,8 +2605,7 @@ dict_load_table_on_id(
dict_table_t* table = nullptr;
- if (btr_pcur_open_on_user_rec(&tuple, PAGE_CUR_GE,
- BTR_SEARCH_LEAF, &pcur, &mtr)
+ if (btr_pcur_open_on_user_rec(&tuple, BTR_SEARCH_LEAF, &pcur, &mtr)
== DB_SUCCESS
&& btr_pcur_is_on_user_rec(&pcur)) {
/*---------------------------------------------------*/
@@ -2714,7 +2711,7 @@ static dberr_t dict_load_foreign_cols(dict_foreign_t *foreign, trx_id_t trx_id)
pcur.btr_cur.page_cur.index = sys_index;
mem_heap_t* heap = nullptr;
- dberr_t err = btr_pcur_open_on_user_rec(&tuple, PAGE_CUR_GE,
+ dberr_t err = btr_pcur_open_on_user_rec(&tuple,
BTR_SEARCH_LEAF, &pcur, &mtr);
if (err != DB_SUCCESS) {
goto func_exit;
@@ -2891,7 +2888,7 @@ dict_load_foreign(
mtr.start();
mem_heap_t* heap = nullptr;
- dberr_t err = btr_pcur_open_on_user_rec(&tuple, PAGE_CUR_GE,
+ dberr_t err = btr_pcur_open_on_user_rec(&tuple,
BTR_SEARCH_LEAF, &pcur, &mtr);
if (err != DB_SUCCESS) {
goto err_exit;
@@ -3102,7 +3099,7 @@ start_load:
dict_index_copy_types(&tuple, sec_index, 1);
pcur.btr_cur.page_cur.index = sec_index;
- dberr_t err = btr_pcur_open_on_user_rec(&tuple, PAGE_CUR_GE,
+ dberr_t err = btr_pcur_open_on_user_rec(&tuple,
BTR_SEARCH_LEAF, &pcur, &mtr);
if (err != DB_SUCCESS) {
DBUG_RETURN(err);
diff --git a/storage/innobase/dict/dict0stats.cc b/storage/innobase/dict/dict0stats.cc
index 44fcf9f2c18..845f133f1a6 100644
--- a/storage/innobase/dict/dict0stats.cc
+++ b/storage/innobase/dict/dict0stats.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 2009, 2019, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2015, 2022, MariaDB Corporation.
+Copyright (c) 2015, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -1697,7 +1697,7 @@ static dberr_t page_cur_open_level(page_cur_t *page_cur, ulint level,
static dberr_t btr_pcur_open_level(btr_pcur_t *pcur, ulint level, mtr_t *mtr,
dict_index_t *index)
{
- pcur->latch_mode= BTR_SEARCH_TREE;
+ pcur->latch_mode= BTR_SEARCH_LEAF;
pcur->search_mode= PAGE_CUR_G;
pcur->pos_state= BTR_PCUR_IS_POSITIONED;
pcur->btr_cur.page_cur.index= index;
diff --git a/storage/innobase/fsp/fsp0fsp.cc b/storage/innobase/fsp/fsp0fsp.cc
index 12e9a6913ba..e9f3106feb0 100644
--- a/storage/innobase/fsp/fsp0fsp.cc
+++ b/storage/innobase/fsp/fsp0fsp.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, 2022, MariaDB Corporation.
+Copyright (c) 2017, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -122,15 +122,22 @@ MY_ATTRIBUTE((nonnull, warn_unused_result))
static buf_block_t *fsp_get_header(const fil_space_t *space, mtr_t *mtr,
dberr_t *err)
{
- buf_block_t *block= buf_page_get_gen(page_id_t(space->id, 0),
- space->zip_size(), RW_SX_LATCH,
- nullptr, BUF_GET_POSSIBLY_FREED,
- mtr, err);
- if (block && space->id != mach_read_from_4(FSP_HEADER_OFFSET + FSP_SPACE_ID +
- block->page.frame))
+ const page_id_t id{space->id, 0};
+ buf_block_t *block= mtr->get_already_latched(id, MTR_MEMO_PAGE_SX_FIX);
+ if (block)
+ *err= DB_SUCCESS;
+ else
{
- *err= DB_CORRUPTION;
- block= nullptr;
+ block= buf_page_get_gen(id, space->zip_size(), RW_SX_LATCH,
+ nullptr, BUF_GET_POSSIBLY_FREED,
+ mtr, err);
+ if (block &&
+ space->id != mach_read_from_4(FSP_HEADER_OFFSET + FSP_SPACE_ID +
+ block->page.frame))
+ {
+ *err= DB_CORRUPTION;
+ block= nullptr;
+ }
}
return block;
}
diff --git a/storage/innobase/gis/gis0sea.cc b/storage/innobase/gis/gis0sea.cc
index 207d49abeba..9eeeb9aeed3 100644
--- a/storage/innobase/gis/gis0sea.cc
+++ b/storage/innobase/gis/gis0sea.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 2016, 2018, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, 2022, MariaDB Corporation.
+Copyright (c) 2017, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -44,7 +44,6 @@ Created 2014/01/16 Jimmy Yang
static
bool
rtr_cur_restore_position(
- ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */
btr_cur_t* cursor, /*!< in: detached persistent cursor */
ulint level, /*!< in: index level */
mtr_t* mtr); /*!< in: mtr */
@@ -135,6 +134,7 @@ rtr_pcur_getnext_from_path(
&& (my_latch_mode | 4) == BTR_CONT_MODIFY_TREE;
if (!index_locked) {
+ ut_ad(mtr->is_empty());
mtr_s_lock_index(index, mtr);
} else {
ut_ad(mtr->memo_contains_flagged(&index->lock,
@@ -154,14 +154,12 @@ rtr_pcur_getnext_from_path(
node_seq_t path_ssn;
const page_t* page;
rw_lock_type_t rw_latch;
- ulint tree_idx;
mysql_mutex_lock(&rtr_info->rtr_path_mutex);
next_rec = rtr_info->path->back();
rtr_info->path->pop_back();
level = next_rec.level;
path_ssn = next_rec.seq_no;
- tree_idx = btr_cur->tree_height - level - 1;
/* Maintain the parent path info as well, if needed */
if (need_parent && !skip_parent && !new_split) {
@@ -223,37 +221,15 @@ rtr_pcur_getnext_from_path(
rw_latch = RW_X_LATCH;
}
- /* Release previous locked blocks */
- if (my_latch_mode != BTR_SEARCH_LEAF) {
- for (ulint idx = 0; idx < btr_cur->tree_height;
- idx++) {
- if (rtr_info->tree_blocks[idx]) {
- mtr_release_block_at_savepoint(
- mtr,
- rtr_info->tree_savepoints[idx],
- rtr_info->tree_blocks[idx]);
- rtr_info->tree_blocks[idx] = NULL;
- }
- }
- for (ulint idx = RTR_MAX_LEVELS; idx < RTR_MAX_LEVELS + 3;
- idx++) {
- if (rtr_info->tree_blocks[idx]) {
- mtr_release_block_at_savepoint(
- mtr,
- rtr_info->tree_savepoints[idx],
- rtr_info->tree_blocks[idx]);
- rtr_info->tree_blocks[idx] = NULL;
- }
- }
+ if (my_latch_mode == BTR_MODIFY_LEAF) {
+ mtr->rollback_to_savepoint(1);
}
- /* set up savepoint to record any locks to be taken */
- rtr_info->tree_savepoints[tree_idx] = mtr_set_savepoint(mtr);
-
ut_ad((my_latch_mode | 4) == BTR_CONT_MODIFY_TREE
|| !page_is_leaf(btr_cur_get_page(btr_cur))
|| !btr_cur->page_cur.block->page.lock.have_any());
+ const auto block_savepoint = mtr->get_savepoint();
block = buf_page_get_gen(
page_id_t(index->table->space_id,
next_rec.page_no), zip_size,
@@ -264,8 +240,6 @@ rtr_pcur_getnext_from_path(
break;
}
- rtr_info->tree_blocks[tree_idx] = block;
-
page = buf_block_get_frame(block);
page_ssn = page_get_ssn_id(page);
@@ -396,24 +370,23 @@ rtr_pcur_getnext_from_path(
if (found) {
if (level == target_level) {
- page_cur_t* r_cur;;
+ ut_ad(block
+ == mtr->at_savepoint(block_savepoint));
if (my_latch_mode == BTR_MODIFY_TREE
&& level == 0) {
ut_ad(rw_latch == RW_NO_LATCH);
btr_cur_latch_leaves(
- block,
+ block_savepoint,
BTR_MODIFY_TREE,
btr_cur, mtr);
}
- r_cur = btr_cur_get_page_cur(btr_cur);
-
page_cur_position(
page_cur_get_rec(page_cursor),
page_cur_get_block(page_cursor),
- r_cur);
+ btr_cur_get_page_cur(btr_cur));
btr_cur->low_match = level != 0 ?
DICT_INDEX_SPATIAL_NODEPTR_SIZE + 1
@@ -425,13 +398,7 @@ rtr_pcur_getnext_from_path(
last node just located */
skip_parent = true;
} else {
- /* Release latch on the current page */
- ut_ad(rtr_info->tree_blocks[tree_idx]);
-
- mtr_release_block_at_savepoint(
- mtr, rtr_info->tree_savepoints[tree_idx],
- rtr_info->tree_blocks[tree_idx]);
- rtr_info->tree_blocks[tree_idx] = NULL;
+ mtr->release_last_page();
}
} while (!rtr_info->path->empty());
@@ -509,50 +476,524 @@ static void rtr_compare_cursor_rec(const rec_t *rec, dict_index_t *index,
}
#endif
+TRANSACTIONAL_TARGET
+dberr_t rtr_search_to_nth_level(ulint level, const dtuple_t *tuple,
+ page_cur_mode_t mode,
+ btr_latch_mode latch_mode,
+ btr_cur_t *cur, mtr_t *mtr)
+{
+ page_cur_mode_t page_mode;
+ page_cur_mode_t search_mode= PAGE_CUR_UNSUPP;
+
+ bool mbr_adj= false;
+ bool found= false;
+ dict_index_t *const index= cur->index();
+
+ mem_heap_t *heap= nullptr;
+ rec_offs offsets_[REC_OFFS_NORMAL_SIZE];
+ rec_offs *offsets= offsets_;
+ rec_offs_init(offsets_);
+ ut_ad(level == 0 || mode == PAGE_CUR_LE || RTREE_SEARCH_MODE(mode));
+ ut_ad(dict_index_check_search_tuple(index, tuple));
+ ut_ad(dtuple_check_typed(tuple));
+ ut_ad(index->is_spatial());
+ ut_ad(index->page != FIL_NULL);
+
+ MEM_UNDEFINED(&cur->up_match, sizeof cur->up_match);
+ MEM_UNDEFINED(&cur->up_bytes, sizeof cur->up_bytes);
+ MEM_UNDEFINED(&cur->low_match, sizeof cur->low_match);
+ MEM_UNDEFINED(&cur->low_bytes, sizeof cur->low_bytes);
+ ut_d(cur->up_match= ULINT_UNDEFINED);
+ ut_d(cur->low_match= ULINT_UNDEFINED);
+
+ const bool latch_by_caller= latch_mode & BTR_ALREADY_S_LATCHED;
+
+ ut_ad(!latch_by_caller
+ || mtr->memo_contains_flagged(&index->lock, MTR_MEMO_S_LOCK
+ | MTR_MEMO_SX_LOCK));
+ latch_mode= BTR_LATCH_MODE_WITHOUT_FLAGS(latch_mode);
+
+ ut_ad(!latch_by_caller || latch_mode == BTR_SEARCH_LEAF ||
+ latch_mode == BTR_MODIFY_LEAF);
+
+ cur->flag= BTR_CUR_BINARY;
+
+#ifndef BTR_CUR_ADAPT
+ buf_block_t *guess= nullptr;
+#else
+ btr_search_t *const info= btr_search_get_info(index);
+ buf_block_t *guess= info->root_guess;
+#endif
+
+ /* Store the position of the tree latch we push to mtr so that we
+ know how to release it when we have latched leaf node(s) */
+
+ const ulint savepoint= mtr->get_savepoint();
+
+ rw_lock_type_t upper_rw_latch, root_leaf_rw_latch= RW_NO_LATCH;
+
+ switch (latch_mode) {
+ case BTR_MODIFY_TREE:
+ mtr_x_lock_index(index, mtr);
+ upper_rw_latch= root_leaf_rw_latch= RW_X_LATCH;
+ break;
+ case BTR_CONT_MODIFY_TREE:
+ ut_ad(mtr->memo_contains_flagged(&index->lock, MTR_MEMO_X_LOCK |
+ MTR_MEMO_SX_LOCK));
+ upper_rw_latch= RW_X_LATCH;
+ break;
+ default:
+ ut_ad(latch_mode != BTR_MODIFY_PREV);
+ ut_ad(latch_mode != BTR_SEARCH_PREV);
+ if (!latch_by_caller)
+ mtr_s_lock_index(index, mtr);
+ upper_rw_latch= root_leaf_rw_latch= RW_S_LATCH;
+ if (latch_mode == BTR_MODIFY_LEAF)
+ root_leaf_rw_latch= RW_X_LATCH;
+ }
+
+ auto root_savepoint= mtr->get_savepoint();
+ const ulint zip_size= index->table->space->zip_size();
+
+ /* Start with the root page. */
+ page_id_t page_id(index->table->space_id, index->page);
+
+ ulint up_match= 0, up_bytes= 0, low_match= 0, low_bytes= 0;
+ ulint height= ULINT_UNDEFINED;
+
+ /* We use these modified search modes on non-leaf levels of the
+ B-tree. These let us end up in the right B-tree leaf. In that leaf
+ we use the original search mode. */
+
+ switch (mode) {
+ case PAGE_CUR_GE:
+ page_mode= PAGE_CUR_L;
+ break;
+ case PAGE_CUR_G:
+ page_mode= PAGE_CUR_LE;
+ break;
+ default:
+#ifdef PAGE_CUR_LE_OR_EXTENDS
+ ut_ad(mode == PAGE_CUR_L || mode == PAGE_CUR_LE
+ || RTREE_SEARCH_MODE(mode)
+ || mode == PAGE_CUR_LE_OR_EXTENDS);
+#else /* PAGE_CUR_LE_OR_EXTENDS */
+ ut_ad(mode == PAGE_CUR_L || mode == PAGE_CUR_LE
+ || RTREE_SEARCH_MODE(mode));
+#endif /* PAGE_CUR_LE_OR_EXTENDS */
+ page_mode= mode;
+ break;
+ }
+
+ search_loop:
+ auto buf_mode= BUF_GET;
+ ulint rw_latch= RW_NO_LATCH;
+
+ if (height)
+ {
+ /* We are about to fetch the root or a non-leaf page. */
+ if (latch_mode != BTR_MODIFY_TREE || height == level)
+ /* If doesn't have SX or X latch of index,
+ each page should be latched before reading. */
+ rw_latch= upper_rw_latch;
+ }
+ else if (latch_mode <= BTR_MODIFY_LEAF)
+ rw_latch= latch_mode;
+
+ dberr_t err;
+ auto block_savepoint= mtr->get_savepoint();
+ buf_block_t *block= buf_page_get_gen(page_id, zip_size, rw_latch, guess,
+ buf_mode, mtr, &err, false);
+ if (!block)
+ {
+ if (err == DB_DECRYPTION_FAILED)
+ btr_decryption_failed(*index);
+ func_exit:
+ if (UNIV_LIKELY_NULL(heap))
+ mem_heap_free(heap);
+
+ if (mbr_adj)
+ /* remember that we will need to adjust parent MBR */
+ cur->rtr_info->mbr_adj= true;
+
+ return err;
+ }
+
+ const page_t *page= buf_block_get_frame(block);
+#ifdef UNIV_ZIP_DEBUG
+ if (rw_latch != RW_NO_LATCH) {
+ const page_zip_des_t *page_zip= buf_block_get_page_zip(block);
+ ut_a(!page_zip || page_zip_validate(page_zip, page, index));
+ }
+#endif /* UNIV_ZIP_DEBUG */
+
+ ut_ad(fil_page_index_page_check(page));
+ ut_ad(index->id == btr_page_get_index_id(page));
+
+ if (height != ULINT_UNDEFINED);
+ else if (page_is_leaf(page) &&
+ rw_latch != RW_NO_LATCH && rw_latch != root_leaf_rw_latch)
+ {
+ /* The root page is also a leaf page (root_leaf).
+ We should reacquire the page, because the root page
+ is latched differently from leaf pages. */
+ ut_ad(root_leaf_rw_latch != RW_NO_LATCH);
+ ut_ad(rw_latch == RW_S_LATCH || rw_latch == RW_SX_LATCH);
+
+ ut_ad(block == mtr->at_savepoint(block_savepoint));
+ mtr->rollback_to_savepoint(block_savepoint);
+
+ upper_rw_latch= root_leaf_rw_latch;
+ goto search_loop;
+ }
+ else
+ {
+ /* We are in the root node */
+
+ height= btr_page_get_level(page);
+ cur->tree_height= height + 1;
+
+ ut_ad(cur->rtr_info);
+
+ /* If SSN in memory is not initialized, fetch it from root page */
+ if (!rtr_get_current_ssn_id(index))
+ /* FIXME: do this in dict_load_table_one() */
+ index->set_ssn(page_get_ssn_id(page) + 1);
+
+ /* Save the MBR */
+ cur->rtr_info->thr= cur->thr;
+ rtr_get_mbr_from_tuple(tuple, &cur->rtr_info->mbr);
+
+#ifdef BTR_CUR_ADAPT
+ info->root_guess= block;
+#endif
+ }
+
+ if (height == 0) {
+ if (rw_latch == RW_NO_LATCH)
+ {
+ ut_ad(block == mtr->at_savepoint(block_savepoint));
+ btr_cur_latch_leaves(block_savepoint, latch_mode, cur, mtr);
+ }
+
+ switch (latch_mode) {
+ case BTR_MODIFY_TREE:
+ case BTR_CONT_MODIFY_TREE:
+ break;
+ default:
+ if (!latch_by_caller)
+ {
+ /* Release the tree s-latch */
+ mtr->rollback_to_savepoint(savepoint,
+ savepoint + 1);
+ block_savepoint--;
+ root_savepoint--;
+ }
+ /* release upper blocks */
+ if (savepoint < block_savepoint)
+ mtr->rollback_to_savepoint(savepoint, block_savepoint);
+ }
+
+ page_mode= mode;
+ }
+
+ /* Remember the page search mode */
+ search_mode= page_mode;
+
+ /* Some adjustment on search mode, when the page search mode is
+ PAGE_CUR_RTREE_LOCATE or PAGE_CUR_RTREE_INSERT, as we are searching
+ with MBRs. When it is not the target level, we should search all
+ sub-trees that "CONTAIN" the search range/MBR. When it is at the
+ target level, the search becomes PAGE_CUR_LE */
+
+ if (page_mode == PAGE_CUR_RTREE_INSERT)
+ {
+ page_mode= (level == height)
+ ? PAGE_CUR_LE
+ : PAGE_CUR_RTREE_INSERT;
+
+ ut_ad(!page_is_leaf(page) || page_mode == PAGE_CUR_LE);
+ }
+ else if (page_mode == PAGE_CUR_RTREE_LOCATE && level == height)
+ page_mode= level == 0 ? PAGE_CUR_LE : PAGE_CUR_RTREE_GET_FATHER;
+
+ up_match= 0;
+ low_match= 0;
+
+ if (latch_mode == BTR_MODIFY_TREE || latch_mode == BTR_CONT_MODIFY_TREE)
+ /* Tree are locked, no need for Page Lock to protect the "path" */
+ cur->rtr_info->need_page_lock= false;
+
+ cur->page_cur.block= block;
+
+ if (page_mode >= PAGE_CUR_CONTAIN)
+ {
+ found= rtr_cur_search_with_match(block, index, tuple, page_mode,
+ &cur->page_cur, cur->rtr_info);
+
+ /* Need to use BTR_MODIFY_TREE to do the MBR adjustment */
+ if (search_mode == PAGE_CUR_RTREE_INSERT && cur->rtr_info->mbr_adj) {
+ static_assert(BTR_MODIFY_TREE == (8 | BTR_MODIFY_LEAF), "");
+
+ if (!(latch_mode & 8))
+ /* Parent MBR needs updated, should retry with BTR_MODIFY_TREE */
+ goto func_exit;
+
+ cur->rtr_info->mbr_adj= false;
+ mbr_adj= true;
+ }
+
+ if (found && page_mode == PAGE_CUR_RTREE_GET_FATHER)
+ cur->low_match= DICT_INDEX_SPATIAL_NODEPTR_SIZE + 1;
+ }
+ else
+ {
+ /* Search for complete index fields. */
+ up_bytes= low_bytes= 0;
+ if (page_cur_search_with_match(tuple, page_mode, &up_match,
+ &low_match, &cur->page_cur, nullptr)) {
+ err= DB_CORRUPTION;
+ goto func_exit;
+ }
+ }
+
+ /* If this is the desired level, leave the loop */
+
+ ut_ad(height == btr_page_get_level(btr_cur_get_page(cur)));
+
+ /* Add Predicate lock if it is serializable isolation
+ and only if it is in the search case */
+ if (mode >= PAGE_CUR_CONTAIN && mode != PAGE_CUR_RTREE_INSERT &&
+ mode != PAGE_CUR_RTREE_LOCATE && cur->rtr_info->need_prdt_lock)
+ {
+ lock_prdt_t prdt;
+
+ {
+ trx_t* trx= thr_get_trx(cur->thr);
+ TMLockTrxGuard g{TMLockTrxArgs(*trx)};
+ lock_init_prdt_from_mbr(&prdt, &cur->rtr_info->mbr, mode,
+ trx->lock.lock_heap);
+ }
+
+ if (rw_latch == RW_NO_LATCH && height != 0)
+ block->page.lock.s_lock();
+
+ lock_prdt_lock(block, &prdt, index, LOCK_S, LOCK_PREDICATE, cur->thr);
+
+ if (rw_latch == RW_NO_LATCH && height != 0)
+ block->page.lock.s_unlock();
+ }
+
+ if (level != height)
+ {
+ ut_ad(height > 0);
+
+ height--;
+ guess= nullptr;
+
+ const rec_t *node_ptr= btr_cur_get_rec(cur);
+
+ offsets= rec_get_offsets(node_ptr, index, offsets, 0,
+ ULINT_UNDEFINED, &heap);
+
+ if (page_rec_is_supremum(node_ptr))
+ {
+ cur->low_match= 0;
+ cur->up_match= 0;
+ goto func_exit;
+ }
+
+ /* If we are doing insertion or record locating,
+ remember the tree nodes we visited */
+ if (page_mode == PAGE_CUR_RTREE_INSERT ||
+ (search_mode == PAGE_CUR_RTREE_LOCATE &&
+ latch_mode != BTR_MODIFY_LEAF))
+ {
+ const bool add_latch= latch_mode == BTR_MODIFY_TREE &&
+ rw_latch == RW_NO_LATCH;
+
+ if (add_latch)
+ {
+ ut_ad(mtr->memo_contains_flagged(&index->lock, MTR_MEMO_X_LOCK |
+ MTR_MEMO_SX_LOCK));
+ block->page.lock.s_lock();
+ }
+
+ /* Store the parent cursor location */
+ ut_d(auto num_stored=)
+ rtr_store_parent_path(block, cur, latch_mode, height + 1, mtr);
+
+ if (page_mode == PAGE_CUR_RTREE_INSERT)
+ {
+ btr_pcur_t *r_cursor= rtr_get_parent_cursor(cur, height + 1, true);
+ /* If it is insertion, there should be only one parent for
+ each level traverse */
+ ut_ad(num_stored == 1);
+ node_ptr= btr_pcur_get_rec(r_cursor);
+ }
+
+ if (add_latch)
+ block->page.lock.s_unlock();
+
+ ut_ad(!page_rec_is_supremum(node_ptr));
+ }
+
+ ut_ad(page_mode == search_mode ||
+ (page_mode == PAGE_CUR_WITHIN &&
+ search_mode == PAGE_CUR_RTREE_LOCATE));
+ page_mode= search_mode;
+
+ if (height == level && latch_mode == BTR_MODIFY_TREE)
+ {
+ ut_ad(upper_rw_latch == RW_X_LATCH);
+ for (auto i= root_savepoint, n= mtr->get_savepoint(); i < n; i++)
+ mtr->x_latch_at_savepoint(i, mtr->at_savepoint(i));
+ }
+
+ /* Go to the child node */
+ page_id.set_page_no(btr_node_ptr_get_child_page_no(node_ptr, offsets));
+
+ if (page_mode >= PAGE_CUR_CONTAIN && page_mode != PAGE_CUR_RTREE_INSERT)
+ {
+ rtr_node_path_t *path= cur->rtr_info->path;
+
+ if (found && !path->empty())
+ {
+ ut_ad(path->back().page_no == page_id.page_no());
+ path->pop_back();
+#ifdef UNIV_DEBUG
+ if (page_mode == PAGE_CUR_RTREE_LOCATE &&
+ latch_mode != BTR_MODIFY_LEAF)
+ {
+ btr_pcur_t* pcur= cur->rtr_info->parent_path->back().cursor;
+ rec_t *my_node_ptr= btr_pcur_get_rec(pcur);
+
+ offsets= rec_get_offsets(my_node_ptr, index, offsets,
+ 0, ULINT_UNDEFINED, &heap);
+
+ ut_ad(page_id.page_no() ==
+ btr_node_ptr_get_child_page_no(my_node_ptr, offsets));
+ }
+#endif
+ }
+ }
+
+ goto search_loop;
+ }
+
+ if (level)
+ {
+ if (upper_rw_latch == RW_NO_LATCH)
+ {
+ ut_ad(latch_mode == BTR_CONT_MODIFY_TREE);
+ btr_block_get(*index, page_id.page_no(), RW_X_LATCH, false, mtr, &err);
+ }
+ else
+ {
+ ut_ad(mtr->memo_contains_flagged(block, upper_rw_latch));
+ ut_ad(!latch_by_caller);
+ }
+
+ if (page_mode <= PAGE_CUR_LE)
+ {
+ cur->low_match= low_match;
+ cur->up_match= up_match;
+ }
+ }
+ else
+ {
+ cur->low_match= low_match;
+ cur->low_bytes= low_bytes;
+ cur->up_match= up_match;
+ cur->up_bytes= up_bytes;
+
+ ut_ad(up_match != ULINT_UNDEFINED || mode != PAGE_CUR_GE);
+ ut_ad(up_match != ULINT_UNDEFINED || mode != PAGE_CUR_LE);
+ ut_ad(low_match != ULINT_UNDEFINED || mode != PAGE_CUR_LE);
+ }
+
+ goto func_exit;
+}
+
+dberr_t rtr_search_leaf(btr_cur_t *cur, const dtuple_t *tuple,
+ btr_latch_mode latch_mode,
+ mtr_t *mtr, page_cur_mode_t mode)
+{
+ return rtr_search_to_nth_level(0, tuple, mode, latch_mode, cur, mtr);
+}
+
+/** Search for a spatial index leaf page record.
+@param pcur cursor
+@param tuple search tuple
+@param mode search mode
+@param mtr mini-transaction */
+dberr_t rtr_search_leaf(btr_pcur_t *pcur, const dtuple_t *tuple,
+ page_cur_mode_t mode, mtr_t *mtr)
+{
+#ifdef UNIV_DEBUG
+ switch (mode) {
+ case PAGE_CUR_CONTAIN:
+ case PAGE_CUR_INTERSECT:
+ case PAGE_CUR_WITHIN:
+ case PAGE_CUR_DISJOINT:
+ case PAGE_CUR_MBR_EQUAL:
+ break;
+ default:
+ ut_ad("invalid mode" == 0);
+ }
+#endif
+ pcur->latch_mode= BTR_SEARCH_LEAF;
+ pcur->search_mode= mode;
+ pcur->pos_state= BTR_PCUR_IS_POSITIONED;
+ pcur->trx_if_known= nullptr;
+ return rtr_search_leaf(&pcur->btr_cur, tuple, BTR_SEARCH_LEAF, mtr, mode);
+}
+
/**************************************************************//**
Initializes and opens a persistent cursor to an index tree. It should be
-closed with btr_pcur_close. Mainly called by row_search_index_entry() */
-bool
-rtr_pcur_open(
- dict_index_t* index, /*!< in: index */
+closed with btr_pcur_close. */
+bool rtr_search(
const dtuple_t* tuple, /*!< in: tuple on which search done */
- btr_latch_mode latch_mode,/*!< in: BTR_SEARCH_LEAF, ... */
+ btr_latch_mode latch_mode,/*!< in: BTR_MODIFY_LEAF, ... */
btr_pcur_t* cursor, /*!< in: memory buffer for persistent cursor */
mtr_t* mtr) /*!< in: mtr */
{
static_assert(BTR_MODIFY_TREE == (8 | BTR_MODIFY_LEAF), "");
ut_ad(latch_mode & BTR_MODIFY_LEAF);
+ ut_ad(!(latch_mode & BTR_ALREADY_S_LATCHED));
+ ut_ad(mtr->is_empty());
/* Initialize the cursor */
btr_pcur_init(cursor);
cursor->latch_mode = BTR_LATCH_MODE_WITHOUT_FLAGS(latch_mode);
- cursor->search_mode = PAGE_CUR_RTREE_LOCATE;
- cursor->trx_if_known = NULL;
+ cursor->search_mode = PAGE_CUR_RTREE_LOCATE;
+ cursor->trx_if_known = nullptr;
+
+ if (latch_mode & 8) {
+ mtr_x_lock_index(cursor->index(), mtr);
+ } else {
+ latch_mode
+ = btr_latch_mode(latch_mode | BTR_ALREADY_S_LATCHED);
+ mtr_sx_lock_index(cursor->index(), mtr);
+ }
/* Search with the tree cursor */
btr_cur_t* btr_cursor = btr_pcur_get_btr_cur(cursor);
- btr_cursor->page_cur.index = index;
- btr_cursor->rtr_info = rtr_create_rtr_info(false, false,
- btr_cursor, index);
+ btr_cursor->rtr_info
+ = rtr_create_rtr_info(false, false,
+ btr_cursor, cursor->index());
- /* Purge will SX lock the tree instead of take Page Locks */
if (btr_cursor->thr) {
btr_cursor->rtr_info->need_page_lock = true;
btr_cursor->rtr_info->thr = btr_cursor->thr;
}
- if ((latch_mode & 8) && index->lock.have_u_not_x()) {
- index->lock.u_x_upgrade(SRW_LOCK_CALL);
- mtr->lock_upgrade(index->lock);
- }
-
- if (btr_cur_search_to_nth_level(0, tuple, PAGE_CUR_RTREE_LOCATE,
- latch_mode,
- btr_cursor, mtr) != DB_SUCCESS) {
+ if (rtr_search_leaf(btr_cursor, tuple, latch_mode, mtr)
+ != DB_SUCCESS) {
return true;
}
@@ -560,7 +1001,8 @@ rtr_pcur_open(
const rec_t* rec = btr_pcur_get_rec(cursor);
- const bool d= rec_get_deleted_flag(rec, index->table->not_redundant());
+ const bool d= rec_get_deleted_flag(
+ rec, cursor->index()->table->not_redundant());
if (page_rec_is_infimum(rec)
|| btr_pcur_get_low_match(cursor) != dtuple_get_n_fields(tuple)
@@ -571,26 +1013,12 @@ rtr_pcur_open(
btr_cursor->rtr_info->fd_del = true;
btr_cursor->low_match = 0;
}
- /* Did not find matched row in first dive. Release
- latched block if any before search more pages */
- if (!(latch_mode & 8)) {
- ulint tree_idx = btr_cursor->tree_height - 1;
- rtr_info_t* rtr_info = btr_cursor->rtr_info;
-
- if (rtr_info->tree_blocks[tree_idx]) {
- mtr_release_block_at_savepoint(
- mtr,
- rtr_info->tree_savepoints[tree_idx],
- rtr_info->tree_blocks[tree_idx]);
- rtr_info->tree_blocks[tree_idx] = NULL;
- }
- }
+
+ mtr->rollback_to_savepoint(1);
if (!rtr_pcur_getnext_from_path(tuple, PAGE_CUR_RTREE_LOCATE,
btr_cursor, 0, latch_mode,
- latch_mode
- & (8 | BTR_ALREADY_S_LATCHED),
- mtr)) {
+ true, mtr)) {
return true;
}
@@ -598,6 +1026,10 @@ rtr_pcur_open(
== dtuple_get_n_fields(tuple));
}
+ if (!(latch_mode & 8)) {
+ mtr->rollback_to_savepoint(0, 1);
+ }
+
return false;
}
@@ -641,8 +1073,7 @@ static const rec_t* rtr_get_father_node(
if (sea_cur && sea_cur->tree_height > level) {
ut_ad(mtr->memo_contains_flagged(&index->lock, MTR_MEMO_X_LOCK
| MTR_MEMO_SX_LOCK));
- if (rtr_cur_restore_position(BTR_CONT_MODIFY_TREE, sea_cur,
- level, mtr)) {
+ if (rtr_cur_restore_position(sea_cur, level, mtr)) {
btr_pcur_t* r_cursor = rtr_get_parent_cursor(
sea_cur, level, false);
@@ -668,9 +1099,8 @@ static const rec_t* rtr_get_father_node(
btr_cur->rtr_info = rtr_create_rtr_info(false, false, btr_cur, index);
- if (btr_cur_search_to_nth_level(level, tuple,
- PAGE_CUR_RTREE_LOCATE,
- BTR_CONT_MODIFY_TREE, btr_cur, mtr)
+ if (rtr_search_to_nth_level(level, tuple, PAGE_CUR_RTREE_LOCATE,
+ BTR_CONT_MODIFY_TREE, btr_cur, mtr)
!= DB_SUCCESS) {
} else if (sea_cur && sea_cur->tree_height == level) {
rec = btr_cur_get_rec(btr_cur);
@@ -729,9 +1159,8 @@ rtr_page_get_father_node_ptr(
page_no = btr_cur_get_block(cursor)->page.id().page_no();
index = btr_cur_get_index(cursor);
- ut_ad(srv_read_only_mode
- || mtr->memo_contains_flagged(&index->lock, MTR_MEMO_X_LOCK
- | MTR_MEMO_SX_LOCK));
+ ut_ad(mtr->memo_contains_flagged(&index->lock, MTR_MEMO_X_LOCK
+ | MTR_MEMO_SX_LOCK));
ut_ad(dict_index_get_page(index) != page_no);
@@ -879,32 +1308,10 @@ rtr_init_rtr_info(
if (!reinit) {
/* Reset all members. */
- rtr_info->path = NULL;
- rtr_info->parent_path = NULL;
- rtr_info->matches = NULL;
-
+ memset(rtr_info, 0, sizeof *rtr_info);
+ static_assert(PAGE_CUR_UNSUPP == 0, "compatibility");
mysql_mutex_init(rtr_path_mutex_key, &rtr_info->rtr_path_mutex,
nullptr);
-
- memset(rtr_info->tree_blocks, 0x0,
- sizeof(rtr_info->tree_blocks));
- memset(rtr_info->tree_savepoints, 0x0,
- sizeof(rtr_info->tree_savepoints));
- rtr_info->mbr.xmin = 0.0;
- rtr_info->mbr.xmax = 0.0;
- rtr_info->mbr.ymin = 0.0;
- rtr_info->mbr.ymax = 0.0;
- rtr_info->thr = NULL;
- rtr_info->heap = NULL;
- rtr_info->cursor = NULL;
- rtr_info->index = NULL;
- rtr_info->need_prdt_lock = false;
- rtr_info->need_page_lock = false;
- rtr_info->allocated = false;
- rtr_info->mbr_adj = false;
- rtr_info->fd_del = false;
- rtr_info->search_tuple = NULL;
- rtr_info->search_mode = PAGE_CUR_UNSUPP;
}
ut_ad(!rtr_info->matches || rtr_info->matches->matched_recs->empty());
@@ -1130,7 +1537,6 @@ struct optimistic_get
static
bool
rtr_cur_restore_position(
- ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */
btr_cur_t* btr_cur, /*!< in: detached persistent cursor */
ulint level, /*!< in: index level */
mtr_t* mtr) /*!< in: mtr */
@@ -1158,8 +1564,6 @@ rtr_cur_restore_position(
r_cursor->modify_clock = 100;
);
- ut_ad(latch_mode == BTR_CONT_MODIFY_TREE);
-
if (r_cursor->block_when_stored.run_with_hint(
optimistic_get(r_cursor, mtr))) {
ut_ad(r_cursor->pos_state == BTR_PCUR_IS_POSITIONED);
diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
index 949a9eae0d7..244d535f020 100644
--- a/storage/innobase/handler/ha_innodb.cc
+++ b/storage/innobase/handler/ha_innodb.cc
@@ -1549,8 +1549,7 @@ static void innodb_drop_database(handlerton*, char *path)
mtr_t mtr;
mtr.start();
pcur.btr_cur.page_cur.index = sys_index;
- err= btr_pcur_open_on_user_rec(&tuple, PAGE_CUR_GE,
- BTR_SEARCH_LEAF, &pcur, &mtr);
+ err= btr_pcur_open_on_user_rec(&tuple, BTR_SEARCH_LEAF, &pcur, &mtr);
if (err != DB_SUCCESS)
goto err_exit;
diff --git a/storage/innobase/handler/handler0alter.cc b/storage/innobase/handler/handler0alter.cc
index 7c162b9af6a..8ff36bc4bdd 100644
--- a/storage/innobase/handler/handler0alter.cc
+++ b/storage/innobase/handler/handler0alter.cc
@@ -6081,7 +6081,8 @@ func_exit:
que_thr_t* thr = pars_complete_graph_for_exec(
NULL, trx, ctx->heap, NULL);
- const bool is_root = block->page.id().page_no() == index->page;
+ page_id_t id{block->page.id()};
+ const bool is_root = id.page_no() == index->page;
if (rec_is_metadata(rec, *index)) {
ut_ad(page_rec_is_user_rec(rec));
@@ -6098,8 +6099,10 @@ func_exit:
}
/* Ensure that the root page is in the correct format. */
- buf_block_t* root = btr_root_block_get(index, RW_X_LATCH,
- &mtr, &err);
+ id.set_page_no(index->page);
+ buf_block_t* root = mtr.get_already_latched(
+ id, MTR_MEMO_PAGE_SX_FIX);
+
if (UNIV_UNLIKELY(!root)) {
goto func_exit;
}
diff --git a/storage/innobase/ibuf/ibuf0ibuf.cc b/storage/innobase/ibuf/ibuf0ibuf.cc
index 5a97ea4ebe0..c3dddf8251d 100644
--- a/storage/innobase/ibuf/ibuf0ibuf.cc
+++ b/storage/innobase/ibuf/ibuf0ibuf.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2016, 2022, MariaDB Corporation.
+Copyright (c) 2016, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -2297,7 +2297,7 @@ loop:
btr_pcur_t pcur;
pcur.btr_cur.page_cur.index= ibuf.index;
ibuf_mtr_start(&mtr);
- if (btr_pcur_open(&tuple, PAGE_CUR_GE, BTR_MODIFY_LEAF, &pcur, 0, &mtr))
+ if (btr_pcur_open(&tuple, PAGE_CUR_GE, BTR_MODIFY_LEAF, &pcur, &mtr))
goto func_exit;
if (!btr_pcur_is_on_user_rec(&pcur))
{
@@ -2493,8 +2493,8 @@ ibuf_merge_space(
/* Position the cursor on the first matching record. */
pcur.btr_cur.page_cur.index = ibuf.index;
- dberr_t err = btr_pcur_open(&tuple, PAGE_CUR_GE,
- BTR_SEARCH_LEAF, &pcur, 0, &mtr);
+ dberr_t err = btr_pcur_open(&tuple, PAGE_CUR_GE, BTR_SEARCH_LEAF,
+ &pcur, &mtr);
ut_ad(err != DB_SUCCESS || page_validate(btr_pcur_get_page(&pcur),
ibuf.index));
@@ -3238,7 +3238,7 @@ ibuf_insert_low(
ibuf_mtr_start(&mtr);
pcur.btr_cur.page_cur.index = ibuf.index;
- err = btr_pcur_open(ibuf_entry, PAGE_CUR_LE, mode, &pcur, 0, &mtr);
+ err = btr_pcur_open(ibuf_entry, PAGE_CUR_LE, mode, &pcur, &mtr);
if (err != DB_SUCCESS) {
func_exit:
ibuf_mtr_commit(&mtr);
@@ -4037,12 +4037,12 @@ bool ibuf_delete_rec(const page_id_t page_id, btr_pcur_t* pcur,
ibuf_mtr_start(mtr);
mysql_mutex_lock(&ibuf_mutex);
+ ibuf.index->lock.u_lock(SRW_LOCK_ARGS(__FILE__, __LINE__));
if (!ibuf_restore_pos(page_id, search_tuple, BTR_PURGE_TREE,
pcur, mtr)) {
-
mysql_mutex_unlock(&ibuf_mutex);
- ut_ad(mtr->has_committed());
+ ibuf.index->lock.u_unlock();
goto func_exit;
}
@@ -4053,13 +4053,11 @@ bool ibuf_delete_rec(const page_id_t page_id, btr_pcur_t* pcur,
ut_a(err == DB_SUCCESS);
ibuf_size_update(ibuf_root->page.frame);
- mysql_mutex_unlock(&ibuf_mutex);
-
ibuf.empty = page_is_empty(ibuf_root->page.frame);
- } else {
- mysql_mutex_unlock(&ibuf_mutex);
}
+ mysql_mutex_unlock(&ibuf_mutex);
+ ibuf.index->lock.u_unlock();
ibuf_btr_pcur_commit_specify_mtr(pcur, mtr);
func_exit:
@@ -4237,7 +4235,7 @@ loop:
/* Position pcur in the insert buffer at the first entry for this
index page */
- if (btr_pcur_open_on_user_rec(search_tuple, PAGE_CUR_GE,
+ if (btr_pcur_open_on_user_rec(search_tuple,
BTR_MODIFY_LEAF, &pcur, &mtr)
!= DB_SUCCESS) {
err = DB_CORRUPTION;
@@ -4454,7 +4452,7 @@ loop:
/* Position pcur in the insert buffer at the first entry for the
space */
- if (btr_pcur_open_on_user_rec(&search_tuple, PAGE_CUR_GE,
+ if (btr_pcur_open_on_user_rec(&search_tuple,
BTR_MODIFY_LEAF, &pcur, &mtr)
!= DB_SUCCESS) {
goto leave_loop;
diff --git a/storage/innobase/include/btr0btr.h b/storage/innobase/include/btr0btr.h
index a2aa46b62da..a1cc10b05db 100644
--- a/storage/innobase/include/btr0btr.h
+++ b/storage/innobase/include/btr0btr.h
@@ -2,7 +2,7 @@
Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2012, Facebook Inc.
-Copyright (c) 2014, 2022, MariaDB Corporation.
+Copyright (c) 2014, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -211,13 +211,12 @@ btr_write_autoinc(dict_index_t* index, ib_uint64_t autoinc, bool reset = false)
@param[in,out] mtr mini-transaction */
void btr_set_instant(buf_block_t* root, const dict_index_t& index, mtr_t* mtr);
-ATTRIBUTE_COLD __attribute__((nonnull, warn_unused_result))
+ATTRIBUTE_COLD __attribute__((nonnull))
/** Reset the table to the canonical format on ROLLBACK of instant ALTER TABLE.
@param[in] index clustered index with instant ALTER TABLE
@param[in] all whether to reset FIL_PAGE_TYPE as well
-@param[in,out] mtr mini-transaction
-@return error code */
-dberr_t btr_reset_instant(const dict_index_t &index, bool all, mtr_t *mtr);
+@param[in,out] mtr mini-transaction */
+void btr_reset_instant(const dict_index_t &index, bool all, mtr_t *mtr);
/*************************************************************//**
Makes tree one level higher by splitting the root, and inserts
@@ -241,7 +240,7 @@ btr_root_raise_and_insert(
ulint n_ext, /*!< in: number of externally stored columns */
mtr_t* mtr, /*!< in: mtr */
dberr_t* err) /*!< out: error code */
- MY_ATTRIBUTE((warn_unused_result));
+ MY_ATTRIBUTE((nonnull, warn_unused_result));
/*************************************************************//**
Reorganizes an index page.
diff --git a/storage/innobase/include/btr0cur.h b/storage/innobase/include/btr0cur.h
index 49bc8a4ff1b..e9c44d6d9bf 100644
--- a/storage/innobase/include/btr0cur.h
+++ b/storage/innobase/include/btr0cur.h
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1994, 2019, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, 2022, MariaDB Corporation.
+Copyright (c) 2017, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -63,12 +63,6 @@ enum {
BTR_KEEP_IBUF_BITMAP = 32
};
-/* btr_cur_latch_leaves() returns latched blocks and savepoints. */
-struct btr_latch_leaves_t {
- buf_block_t* blocks[3];
- ulint savepoints[3];
-};
-
#include "que0types.h"
#include "row0types.h"
@@ -126,51 +120,28 @@ bool
btr_cur_instant_root_init(dict_index_t* index, const page_t* page)
ATTRIBUTE_COLD __attribute__((nonnull, warn_unused_result));
-/** Optimistically latches the leaf page or pages requested.
-@param[in] block guessed buffer block
-@param[in] modify_clock modify clock value
-@param[in,out] latch_mode BTR_SEARCH_LEAF, ...
-@param[in,out] cursor cursor
-@param[in] mtr mini-transaction
-@return true if success */
-bool
-btr_cur_optimistic_latch_leaves(
- buf_block_t* block,
- ib_uint64_t modify_clock,
- btr_latch_mode* latch_mode,
- btr_cur_t* cursor,
- mtr_t* mtr);
-
MY_ATTRIBUTE((warn_unused_result))
-/** Searches an index tree and positions a tree cursor on a given level.
+/********************************************************************//**
+Searches an index tree and positions a tree cursor on a given non-leaf level.
NOTE: n_fields_cmp in tuple must be set so that it cannot be compared
to node pointer page number fields on the upper levels of the tree!
-Note that if mode is PAGE_CUR_LE, which is used in inserts, then
cursor->up_match and cursor->low_match both will have sensible values.
-If mode is PAGE_CUR_GE, then up_match will a have a sensible value.
+Cursor is left at the place where an insert of the
+search tuple should be performed in the B-tree. InnoDB does an insert
+immediately after the cursor. Thus, the cursor may end up on a user record,
+or on a page infimum record.
@param level the tree level of search
@param tuple data tuple; NOTE: n_fields_cmp in tuple must be set so that
it cannot get compared to the node ptr page number field!
-@param mode PAGE_CUR_L, ...; NOTE that if the search is made using a
- unique prefix of a record, mode should be PAGE_CUR_LE, not
- PAGE_CUR_GE, as the latter may end up on the previous page of
- the record! Inserts should always be made using PAGE_CUR_LE
- to search the position!
-@param latch_mode BTR_SEARCH_LEAF, ..., ORed with at most one of BTR_INSERT,
- BTR_DELETE_MARK, or BTR_DELETE;
- cursor->left_block is used to store a pointer to the left
- neighbor page
+@param latch RW_S_LATCH or RW_X_LATCH
@param cursor tree cursor; the cursor page is s- or x-latched, but see also
above!
@param mtr mini-transaction
-@param autoinc PAGE_ROOT_AUTO_INC to be written (0 if none)
@return DB_SUCCESS on success or error code otherwise */
dberr_t btr_cur_search_to_nth_level(ulint level,
const dtuple_t *tuple,
- page_cur_mode_t mode,
- btr_latch_mode latch_mode,
- btr_cur_t *cursor, mtr_t *mtr,
- ib_uint64_t autoinc= 0);
+ rw_lock_type_t rw_latch,
+ btr_cur_t *cursor, mtr_t *mtr);
/*************************************************************//**
Tries to perform an insert to a page in an index tree, next to cursor.
@@ -657,15 +628,13 @@ btr_rec_copy_externally_stored_field(
@param[in] block leaf page where the search converged
@param[in] latch_mode BTR_SEARCH_LEAF, ...
@param[in] cursor cursor
-@param[in,out] mtr mini-transaction
-@param[out] latch_leaves latched blocks and savepoints */
+@param[in,out] mtr mini-transaction */
void
btr_cur_latch_leaves(
- buf_block_t* block,
+ ulint block_savepoint,
btr_latch_mode latch_mode,
btr_cur_t* cursor,
- mtr_t* mtr,
- btr_latch_leaves_t* latch_leaves = nullptr);
+ mtr_t* mtr);
/*######################################################################*/
@@ -734,14 +703,14 @@ struct btr_cur_t {
BTR_MODIFY_PREV */
/*------------------------------*/
que_thr_t* thr; /*!< this field is only used
- when btr_cur_search_to_nth_level
+ when search_leaf()
is called for an index entry
insertion: the calling query
thread is passed here to be
used in the insert buffer */
/*------------------------------*/
/** The following fields are used in
- btr_cur_search_to_nth_level to pass information: */
+ search_leaf() to pass information: */
/* @{ */
enum btr_cur_method flag; /*!< Search method used */
ulint tree_height; /*!< Tree height if the search is done
@@ -750,8 +719,7 @@ struct btr_cur_t {
ulint up_match; /*!< If the search mode was PAGE_CUR_LE,
the number of matched fields to the
the first user record to the right of
- the cursor record after
- btr_cur_search_to_nth_level;
+ the cursor record after search_leaf();
for the mode PAGE_CUR_GE, the matched
fields to the first user record AT THE
CURSOR or to the right of it;
@@ -768,8 +736,7 @@ struct btr_cur_t {
ulint low_match; /*!< if search mode was PAGE_CUR_LE,
the number of matched fields to the
first user record AT THE CURSOR or
- to the left of it after
- btr_cur_search_to_nth_level;
+ to the left of it after search_leaf();
NOT defined for PAGE_CUR_GE or any
other search modes; see also the NOTE
in up_match! */
@@ -803,6 +770,24 @@ struct btr_cur_t {
dberr_t open_leaf(bool first, dict_index_t *index, btr_latch_mode latch_mode,
mtr_t *mtr);
+ /** Search the leaf page record corresponding to a key.
+ @param tuple key to search for, with correct n_fields_cmp
+ @param mode search mode; PAGE_CUR_LE for unique prefix or for inserting
+ @param latch_mode latch mode
+ @param mtr mini-transaction
+ @return error code */
+ dberr_t search_leaf(const dtuple_t *tuple, page_cur_mode_t mode,
+ btr_latch_mode latch_mode, mtr_t *mtr);
+
+ /** Search the leaf page record corresponding to a key, exclusively latching
+ all sibling pages on the way.
+ @param tuple key to search for, with correct n_fields_cmp
+ @param mode search mode; PAGE_CUR_LE for unique prefix or for inserting
+ @param mtr mini-transaction
+ @return error code */
+ dberr_t pessimistic_search_leaf(const dtuple_t *tuple, page_cur_mode_t mode,
+ mtr_t *mtr);
+
/** Open the cursor at a random leaf page record.
@param offsets temporary memory for rec_get_offsets()
@param heap memory heap for rec_get_offsets()
@@ -862,14 +847,14 @@ inherited external field. */
#define BTR_EXTERN_INHERITED_FLAG 64U
#ifdef BTR_CUR_HASH_ADAPT
-/** Number of searches down the B-tree in btr_cur_search_to_nth_level(). */
+/** Number of searches down the B-tree in btr_cur_t::search_leaf(). */
extern ib_counter_t<ulint, ib_counter_element_t> btr_cur_n_non_sea;
/** Old value of btr_cur_n_non_sea. Copied by
srv_refresh_innodb_monitor_stats(). Referenced by
srv_printf_innodb_monitor(). */
extern ulint btr_cur_n_non_sea_old;
/** Number of successful adaptive hash index lookups in
-btr_cur_search_to_nth_level(). */
+btr_cur_t::search_leaf(). */
extern ib_counter_t<ulint, ib_counter_element_t> btr_cur_n_sea;
/** Old value of btr_cur_n_sea. Copied by
srv_refresh_innodb_monitor_stats(). Referenced by
diff --git a/storage/innobase/include/btr0pcur.h b/storage/innobase/include/btr0pcur.h
index cd8eacdc212..a25704ede53 100644
--- a/storage/innobase/include/btr0pcur.h
+++ b/storage/innobase/include/btr0pcur.h
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, 2022, MariaDB Corporation.
+Copyright (c) 2017, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -70,24 +70,6 @@ btr_pcur_init(
/*==========*/
btr_pcur_t* pcur); /*!< in: persistent cursor */
-/**************************************************************//**
-Initializes and opens a persistent cursor to an index tree. */
-inline
-dberr_t
-btr_pcur_open(
- const dtuple_t* tuple, /*!< in: tuple on which search done */
- page_cur_mode_t mode, /*!< in: PAGE_CUR_L, ...;
- NOTE that if the search is made using a unique
- prefix of a record, mode should be
- PAGE_CUR_LE, not PAGE_CUR_GE, as the latter
- may end up on the previous page from the
- record! */
- btr_latch_mode latch_mode,/*!< in: BTR_SEARCH_LEAF, ... */
- btr_pcur_t* cursor, /*!< in: memory buffer for persistent cursor */
- ib_uint64_t autoinc,/*!< in: PAGE_ROOT_AUTO_INC to be written
- (0 if none) */
- mtr_t* mtr) /*!< in: mtr */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
/** Opens an persistent cursor to an index tree without initializing the
cursor.
@param tuple tuple on which search done
@@ -100,8 +82,7 @@ cursor.
@param mtr mini-transaction
@return DB_SUCCESS on success or error code otherwise. */
inline
-dberr_t btr_pcur_open_with_no_init(const dtuple_t *tuple,
- page_cur_mode_t mode,
+dberr_t btr_pcur_open_with_no_init(const dtuple_t *tuple, page_cur_mode_t mode,
btr_latch_mode latch_mode,
btr_pcur_t *cursor, mtr_t *mtr);
@@ -409,8 +390,7 @@ struct btr_pcur_t
pos_state= BTR_PCUR_IS_POSITIONED;
old_rec= nullptr;
- return btr_cur.open_leaf(first, index,
- BTR_LATCH_MODE_WITHOUT_FLAGS(latch_mode), mtr);
+ return btr_cur.open_leaf(first, index, this->latch_mode, mtr);
}
};
@@ -433,6 +413,24 @@ inline rec_t *btr_pcur_get_rec(const btr_pcur_t *cursor)
return cursor->btr_cur.page_cur.rec;
}
+/**************************************************************//**
+Initializes and opens a persistent cursor to an index tree. */
+inline
+dberr_t
+btr_pcur_open(
+ const dtuple_t* tuple, /*!< in: tuple on which search done */
+ page_cur_mode_t mode, /*!< in: PAGE_CUR_LE, ... */
+ btr_latch_mode latch_mode,/*!< in: BTR_SEARCH_LEAF, ... */
+ btr_pcur_t* cursor, /*!< in: memory buffer for persistent cursor */
+ mtr_t* mtr) /*!< in: mtr */
+{
+ cursor->latch_mode= BTR_LATCH_MODE_WITHOUT_FLAGS(latch_mode);
+ cursor->search_mode= mode;
+ cursor->pos_state= BTR_PCUR_IS_POSITIONED;
+ cursor->trx_if_known= nullptr;
+ return cursor->btr_cur.search_leaf(tuple, mode, latch_mode, mtr);
+}
+
/** Open a cursor on the first user record satisfying the search condition;
in case of no match, after the last index record. */
MY_ATTRIBUTE((nonnull, warn_unused_result))
@@ -440,16 +438,15 @@ inline
dberr_t
btr_pcur_open_on_user_rec(
const dtuple_t* tuple, /*!< in: tuple on which search done */
- page_cur_mode_t mode, /*!< in: PAGE_CUR_L, ... */
btr_latch_mode latch_mode, /*!< in: BTR_SEARCH_LEAF or
BTR_MODIFY_LEAF */
btr_pcur_t* cursor, /*!< in: memory buffer for persistent
cursor */
mtr_t* mtr) /*!< in: mtr */
{
- ut_ad(mode == PAGE_CUR_GE || mode == PAGE_CUR_G);
ut_ad(latch_mode == BTR_SEARCH_LEAF || latch_mode == BTR_MODIFY_LEAF);
- if (dberr_t err= btr_pcur_open(tuple, mode, latch_mode, cursor, 0, mtr))
+ if (dberr_t err=
+ btr_pcur_open(tuple, PAGE_CUR_GE, latch_mode, cursor, mtr))
return err;
if (!btr_pcur_is_after_last_on_page(cursor) ||
btr_pcur_is_after_last_in_tree(cursor))
diff --git a/storage/innobase/include/btr0pcur.inl b/storage/innobase/include/btr0pcur.inl
index 551f8f20fca..b827d70dc47 100644
--- a/storage/innobase/include/btr0pcur.inl
+++ b/storage/innobase/include/btr0pcur.inl
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2015, 2022, MariaDB Corporation.
+Copyright (c) 2015, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -299,38 +299,10 @@ btr_pcur_init(
pcur->btr_cur.rtr_info = NULL;
}
-/**************************************************************//**
-Initializes and opens a persistent cursor to an index tree. */
-inline
-dberr_t
-btr_pcur_open(
- const dtuple_t* tuple, /*!< in: tuple on which search done */
- page_cur_mode_t mode, /*!< in: PAGE_CUR_L, ...;
- NOTE that if the search is made using a unique
- prefix of a record, mode should be
- PAGE_CUR_LE, not PAGE_CUR_GE, as the latter
- may end up on the previous page from the
- record! */
- btr_latch_mode latch_mode,/*!< in: BTR_SEARCH_LEAF, ... */
- btr_pcur_t* cursor, /*!< in: memory buffer for persistent cursor */
- ib_uint64_t autoinc,/*!< in: PAGE_ROOT_AUTO_INC to be written
- (0 if none) */
- mtr_t* mtr) /*!< in: mtr */
-{
- ut_ad(!cursor->index()->is_spatial());
- cursor->latch_mode= BTR_LATCH_MODE_WITHOUT_FLAGS(latch_mode);
- cursor->search_mode= mode;
- cursor->pos_state= BTR_PCUR_IS_POSITIONED;
- cursor->trx_if_known= nullptr;
- return btr_cur_search_to_nth_level(0, tuple, mode, latch_mode,
- btr_pcur_get_btr_cur(cursor),
- mtr, autoinc);
-}
-
/** Opens an persistent cursor to an index tree without initializing the
cursor.
@param tuple tuple on which search done
-@param mode PAGE_CUR_L, ...; NOTE that if the search is made using a
+@param mode search mode; NOTE that if the search is made using a
unique prefix of a record, mode should be PAGE_CUR_LE, not
PAGE_CUR_GE, as the latter may end up on the previous page of
the record!
@@ -339,8 +311,7 @@ cursor.
@param mtr mini-transaction
@return DB_SUCCESS on success or error code otherwise. */
inline
-dberr_t btr_pcur_open_with_no_init(const dtuple_t *tuple,
- page_cur_mode_t mode,
+dberr_t btr_pcur_open_with_no_init(const dtuple_t *tuple, page_cur_mode_t mode,
btr_latch_mode latch_mode,
btr_pcur_t *cursor, mtr_t *mtr)
{
@@ -348,10 +319,7 @@ dberr_t btr_pcur_open_with_no_init(const dtuple_t *tuple,
cursor->search_mode= mode;
cursor->pos_state= BTR_PCUR_IS_POSITIONED;
cursor->trx_if_known= nullptr;
-
- /* Search with the tree cursor */
- return btr_cur_search_to_nth_level(0, tuple, mode, latch_mode,
- btr_pcur_get_btr_cur(cursor), mtr);
+ return cursor->btr_cur.search_leaf(tuple, mode, latch_mode, mtr);
}
/**************************************************************//**
diff --git a/storage/innobase/include/btr0types.h b/storage/innobase/include/btr0types.h
index 6118bfbc128..912c022c64f 100644
--- a/storage/innobase/include/btr0types.h
+++ b/storage/innobase/include/btr0types.h
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2018, 2022, MariaDB Corporation.
+Copyright (c) 2018, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -55,25 +55,26 @@ in the index record. */
#define BTR_EXTERN_LOCAL_STORED_MAX_SIZE \
(BTR_EXTERN_FIELD_REF_SIZE * 2)
-/** Latching modes for btr_cur_search_to_nth_level(). */
+/** Latching modes for btr_cur_t::search_leaf(). */
enum btr_latch_mode {
/** Search a record on a leaf page and S-latch it. */
BTR_SEARCH_LEAF = RW_S_LATCH,
/** (Prepare to) modify a record on a leaf page and X-latch it. */
BTR_MODIFY_LEAF = RW_X_LATCH,
+ /** U-latch root and X-latch a leaf page */
+ BTR_MODIFY_ROOT_AND_LEAF = RW_SX_LATCH,
/** Obtain no latches. */
BTR_NO_LATCHES = RW_NO_LATCH,
- /** Search the previous record. */
+ /** Search the previous record.
+ Used in btr_pcur_move_backward_from_page(). */
BTR_SEARCH_PREV = 4 | BTR_SEARCH_LEAF,
- /** Modify the previous record. */
+ /** Modify the previous record.
+ Used in btr_pcur_move_backward_from_page() and ibuf_insert(). */
BTR_MODIFY_PREV = 4 | BTR_MODIFY_LEAF,
- /** Start searching the entire B-tree. */
- BTR_SEARCH_TREE = 8 | BTR_SEARCH_LEAF,
- /** Start modifying1 the entire B-tree. */
+ /** Start modifying the entire B-tree. */
BTR_MODIFY_TREE = 8 | BTR_MODIFY_LEAF,
- /** Continue searching the entire B-tree. */
- BTR_CONT_SEARCH_TREE = 4 | BTR_SEARCH_TREE,
- /** Continue modifying the entire B-tree. */
+ /** Continue modifying the entire R-tree.
+ Only used by rtr_search_to_nth_level(). */
BTR_CONT_MODIFY_TREE = 4 | BTR_MODIFY_TREE,
/* BTR_INSERT, BTR_DELETE and BTR_DELETE_MARK are mutually
@@ -98,14 +99,14 @@ enum btr_latch_mode {
dict_index_t::lock S-latch is being held. */
BTR_SEARCH_LEAF_ALREADY_S_LATCHED = BTR_SEARCH_LEAF
| BTR_ALREADY_S_LATCHED,
- /** Search the entire index tree, assuming that the
- dict_index_t::lock S-latch is being held. */
- BTR_SEARCH_TREE_ALREADY_S_LATCHED = BTR_SEARCH_TREE
- | BTR_ALREADY_S_LATCHED,
/** Search and X-latch a leaf page, assuming that the
dict_index_t::lock is being held in non-exclusive mode. */
BTR_MODIFY_LEAF_ALREADY_LATCHED = BTR_MODIFY_LEAF
| BTR_ALREADY_S_LATCHED,
+ /** U-latch root and X-latch a leaf page, assuming that
+ dict_index_t::lock is being held in U mode. */
+ BTR_MODIFY_ROOT_AND_LEAF_ALREADY_LATCHED = BTR_MODIFY_ROOT_AND_LEAF
+ | BTR_ALREADY_S_LATCHED,
/** Attempt to delete-mark a secondary index record. */
BTR_DELETE_MARK_LEAF = BTR_MODIFY_LEAF | BTR_DELETE_MARK,
@@ -132,6 +133,9 @@ enum btr_latch_mode {
/** Attempt to delete a record in the tree. */
BTR_PURGE_TREE = BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE,
+ /** Attempt to delete a record in an x-latched tree. */
+ BTR_PURGE_TREE_ALREADY_LATCHED = BTR_PURGE_TREE
+ | BTR_ALREADY_S_LATCHED,
/** Attempt to insert a record into the tree. */
BTR_INSERT_TREE = BTR_MODIFY_TREE | BTR_LATCH_FOR_INSERT,
diff --git a/storage/innobase/include/gis0rtree.h b/storage/innobase/include/gis0rtree.h
index 777f2432c93..b07261ce042 100644
--- a/storage/innobase/include/gis0rtree.h
+++ b/storage/innobase/include/gis0rtree.h
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 2014, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, 2022, MariaDB Corporation.
+Copyright (c) 2017, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -59,6 +59,44 @@ Created 2013/03/27 Jimmy Yang and Allen Lai
/* Geometry data header */
#define GEO_DATA_HEADER_SIZE 4
+
+/** Search for a spatial index leaf page record.
+@param cur cursor
+@param tuple search tuple
+@param latch_mode latching mode
+@param mtr mini-transaction
+@param mode search mode */
+dberr_t rtr_search_leaf(btr_cur_t *cur, const dtuple_t *tuple,
+ btr_latch_mode latch_mode, mtr_t *mtr,
+ page_cur_mode_t mode= PAGE_CUR_RTREE_LOCATE)
+ MY_ATTRIBUTE((nonnull, warn_unused_result));
+
+/** Search for inserting a spatial index leaf page record.
+@param cur cursor
+@param tuple search tuple
+@param latch_mode latching mode
+@param mtr mini-transaction */
+inline dberr_t rtr_insert_leaf(btr_cur_t *cur, const dtuple_t *tuple,
+ btr_latch_mode latch_mode, mtr_t *mtr)
+{
+ return rtr_search_leaf(cur, tuple, latch_mode, mtr, PAGE_CUR_RTREE_INSERT);
+}
+
+/** Search for a spatial index leaf page record.
+@param pcur cursor
+@param tuple search tuple
+@param mode search mode
+@param mtr mini-transaction */
+dberr_t rtr_search_leaf(btr_pcur_t *pcur, const dtuple_t *tuple,
+ page_cur_mode_t mode, mtr_t *mtr)
+ MY_ATTRIBUTE((nonnull, warn_unused_result));
+
+dberr_t rtr_search_to_nth_level(ulint level, const dtuple_t *tuple,
+ page_cur_mode_t mode,
+ btr_latch_mode latch_mode,
+ btr_cur_t *cur, mtr_t *mtr)
+ MY_ATTRIBUTE((nonnull, warn_unused_result));
+
/**********************************************************************//**
Builds a Rtree node pointer out of a physical record and a page number.
@return own: node pointer */
@@ -295,11 +333,9 @@ rtr_store_parent_path(
/**************************************************************//**
Initializes and opens a persistent cursor to an index tree. It should be
closed with btr_pcur_close. */
-bool
-rtr_pcur_open(
- dict_index_t* index, /*!< in: index */
+bool rtr_search(
const dtuple_t* tuple, /*!< in: tuple on which search done */
- btr_latch_mode latch_mode,/*!< in: BTR_SEARCH_LEAF, ... */
+ btr_latch_mode latch_mode,/*!< in: BTR_MODIFY_LEAF, ... */
btr_pcur_t* cursor, /*!< in: memory buffer for persistent cursor */
mtr_t* mtr) /*!< in: mtr */
MY_ATTRIBUTE((warn_unused_result));
diff --git a/storage/innobase/include/gis0type.h b/storage/innobase/include/gis0type.h
index 4fccfdb6c26..d6a4ef67a38 100644
--- a/storage/innobase/include/gis0type.h
+++ b/storage/innobase/include/gis0type.h
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 2014, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2018, 2020, MariaDB Corporation.
+Copyright (c) 2018, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -105,12 +105,6 @@ typedef struct rtr_info{
matched_rec_t* matches;/*!< struct holding matching leaf records */
mysql_mutex_t rtr_path_mutex;
/*!< mutex protect the "path" vector */
- buf_block_t* tree_blocks[RTR_MAX_LEVELS + RTR_LEAF_LATCH_NUM];
- /*!< tracking pages that would be locked
- at leaf level, for future free */
- ulint tree_savepoints[RTR_MAX_LEVELS + RTR_LEAF_LATCH_NUM];
- /*!< savepoint used to release latches/blocks
- on each level and leaf level */
rtr_mbr_t mbr; /*!< the search MBR */
que_thr_t* thr; /*!< the search thread */
mem_heap_t* heap; /*!< memory heap */
diff --git a/storage/innobase/include/ibuf0ibuf.inl b/storage/innobase/include/ibuf0ibuf.inl
index 9f4e937f31d..1e21f74ff2b 100644
--- a/storage/innobase/include/ibuf0ibuf.inl
+++ b/storage/innobase/include/ibuf0ibuf.inl
@@ -100,9 +100,8 @@ ibuf_should_try(
decide */
{
return(innodb_change_buffering
+ && !(index->type & (DICT_CLUSTERED | DICT_IBUF))
&& ibuf.max_size != 0
- && !dict_index_is_clust(index)
- && !dict_index_is_spatial(index)
&& index->table->quiesce == QUIESCE_NONE
&& (ignore_sec_unique || !dict_index_is_unique(index)));
}
diff --git a/storage/innobase/include/mtr0log.h b/storage/innobase/include/mtr0log.h
index 093b706c1de..0dfb50125bd 100644
--- a/storage/innobase/include/mtr0log.h
+++ b/storage/innobase/include/mtr0log.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2019, 2022, MariaDB Corporation.
+Copyright (c) 2019, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -401,7 +401,9 @@ inline byte *mtr_t::log_write(const page_id_t id, const buf_page_t *bpage,
ut_ad(have_offset || offset == 0);
ut_ad(offset + len <= srv_page_size);
static_assert(MIN_4BYTE >= UNIV_PAGE_SIZE_MAX, "consistency");
-
+ ut_ad(type == FREE_PAGE || type == OPTION || (type == EXTENDED && !bpage) ||
+ memo_contains_flagged(bpage,
+ MTR_MEMO_PAGE_X_FIX | MTR_MEMO_PAGE_SX_FIX));
size_t max_len;
if (!have_len)
max_len= 1 + 5 + 5;
diff --git a/storage/innobase/include/mtr0mtr.h b/storage/innobase/include/mtr0mtr.h
index 41f9b473856..3c4e9da69a2 100644
--- a/storage/innobase/include/mtr0mtr.h
+++ b/storage/innobase/include/mtr0mtr.h
@@ -37,15 +37,6 @@ Created 11/26/1995 Heikki Tuuri
/** Commit a mini-transaction. */
#define mtr_commit(m) (m)->commit()
-/** Set and return a savepoint in mtr.
-@return savepoint */
-#define mtr_set_savepoint(m) (m)->get_savepoint()
-
-/** Release the (index tree) s-latch stored in an mtr memo after a
-savepoint. */
-#define mtr_release_s_latch_at_savepoint(m, s, l) \
- (m)->release_s_latch_at_savepoint((s), (l))
-
/** Change the logging mode of a mini-transaction.
@return old mode */
#define mtr_set_log_mode(m, d) (m)->set_log_mode((d))
@@ -60,13 +51,10 @@ savepoint. */
# define mtr_sx_lock_index(i,m) (m)->u_lock(&(i)->lock)
#endif
-#define mtr_release_block_at_savepoint(m, s, b) \
- (m)->release_block_at_savepoint((s), (b))
-
/** Mini-transaction memo stack slot. */
struct mtr_memo_slot_t
{
- /** pointer to the object, or nullptr if released */
+ /** pointer to the object */
void *object;
/** type of the stored object */
mtr_memo_type_t type;
@@ -125,41 +113,36 @@ struct mtr_t {
return m_memo.size();
}
- /** Release the (index tree) s-latch stored in an mtr memo after a savepoint.
- @param savepoint value returned by get_savepoint()
- @param lock index latch to release */
- void release_s_latch_at_savepoint(ulint savepoint, index_lock *lock)
+ /** Get the block at a savepoint */
+ buf_block_t *at_savepoint(ulint savepoint) const
{
ut_ad(is_active());
- mtr_memo_slot_t &slot= m_memo[savepoint];
- ut_ad(slot.object == lock);
- ut_ad(slot.type == MTR_MEMO_S_LOCK);
- slot.object= nullptr;
- lock->s_unlock();
+ const mtr_memo_slot_t &slot= m_memo[savepoint];
+ ut_ad(slot.type < MTR_MEMO_S_LOCK);
+ ut_ad(slot.object);
+ return static_cast<buf_block_t*>(slot.object);
}
- /** Release the block in an mtr memo after a savepoint. */
- void release_block_at_savepoint(ulint savepoint, buf_block_t *block)
+
+ /** Try to get a block at a savepoint.
+ @param savepoint the savepoint right before the block was acquired
+ @return the block at the savepoint
+ @retval nullptr if no buffer block was registered at that savepoint */
+ buf_block_t *block_at_savepoint(ulint savepoint) const
{
ut_ad(is_active());
- mtr_memo_slot_t &slot= m_memo[savepoint];
- ut_ad(slot.object == block);
- ut_ad(!(slot.type & MTR_MEMO_MODIFY));
- slot.object= nullptr;
- block->page.unfix();
-
- switch (slot.type) {
- case MTR_MEMO_PAGE_S_FIX:
- block->page.lock.s_unlock();
- break;
- case MTR_MEMO_PAGE_SX_FIX:
- case MTR_MEMO_PAGE_X_FIX:
- block->page.lock.u_or_x_unlock(slot.type == MTR_MEMO_PAGE_SX_FIX);
- break;
- default:
- break;
- }
+ const mtr_memo_slot_t &slot= m_memo[savepoint];
+ return slot.type < MTR_MEMO_S_LOCK
+ ? static_cast<buf_block_t*>(slot.object)
+ : nullptr;
}
+ /** Retrieve a page that has already been latched.
+ @param id page identifier
+ @param type page latch type
+ @return block */
+ buf_block_t *get_already_latched(const page_id_t id, mtr_memo_type_t type)
+ const;
+
/** @return if we are about to make a clean buffer block dirty */
static bool is_block_dirtied(const buf_page_t &b)
{
@@ -408,28 +391,17 @@ public:
@param rw_latch latch to acquire */
void upgrade_buffer_fix(ulint savepoint, rw_lock_type_t rw_latch);
- /** Register a page latch on a buffer-fixed block was buffer-fixed.
- @param latch latch type */
- void u_lock_register(ulint savepoint)
- {
- mtr_memo_slot_t &slot= m_memo[savepoint];
- ut_ad(slot.type == MTR_MEMO_BUF_FIX);
- slot.type= MTR_MEMO_PAGE_SX_FIX;
- }
-
- /** Register a page latch on a buffer-fixed block was buffer-fixed.
- @param latch latch type */
- void s_lock_register(ulint savepoint)
+ /** Register a page latch on a previously buffer-fixed block. */
+ void lock_register(ulint savepoint, mtr_memo_type_t type)
{
mtr_memo_slot_t &slot= m_memo[savepoint];
ut_ad(slot.type == MTR_MEMO_BUF_FIX);
- slot.type= MTR_MEMO_PAGE_S_FIX;
+ ut_ad(type <= MTR_MEMO_PAGE_SX_FIX);
+ slot.type= type;
}
/** Upgrade U locks on a block to X */
void page_lock_upgrade(const buf_block_t &block);
- /** Upgrade U lock to X */
- void lock_upgrade(const index_lock &lock);
/** Check if we are holding tablespace latch
@param space tablespace to search for
diff --git a/storage/innobase/include/small_vector.h b/storage/innobase/include/small_vector.h
index 76069cfc168..d28a36184b8 100644
--- a/storage/innobase/include/small_vector.h
+++ b/storage/innobase/include/small_vector.h
@@ -71,6 +71,7 @@ public:
using const_iterator= const T *;
using reverse_iterator= std::reverse_iterator<iterator>;
using reference= T &;
+ using const_reference= const T&;
iterator begin() { return static_cast<iterator>(BeginX); }
const_iterator begin() const { return static_cast<const_iterator>(BeginX); }
@@ -81,6 +82,8 @@ public:
reverse_iterator rend() { return reverse_iterator(begin()); }
reference operator[](size_t i) { assert(i < size()); return begin()[i]; }
+ const_reference operator[](size_t i) const
+ { return const_cast<small_vector&>(*this)[i]; }
void erase(const_iterator S, const_iterator E)
{
diff --git a/storage/innobase/mtr/mtr0mtr.cc b/storage/innobase/mtr/mtr0mtr.cc
index 730914b2892..ce72b26693c 100644
--- a/storage/innobase/mtr/mtr0mtr.cc
+++ b/storage/innobase/mtr/mtr0mtr.cc
@@ -40,6 +40,8 @@ Created 11/26/1995 Heikki Tuuri
void mtr_memo_slot_t::release() const
{
+ ut_ad(object);
+
switch (type) {
case MTR_MEMO_S_LOCK:
static_cast<index_lock*>(object)->s_unlock();
@@ -121,11 +123,7 @@ inline void mtr_t::release_resources()
void mtr_t::release()
{
for (auto it= m_memo.rbegin(); it != m_memo.rend(); it++)
- {
- mtr_memo_slot_t &slot= *it;
- if (slot.object)
- slot.release();
- }
+ it->release();
m_memo.clear();
}
@@ -191,7 +189,7 @@ void mtr_t::commit()
for (const mtr_memo_slot_t &slot : m_memo)
{
- if (slot.object && slot.type & MTR_MEMO_MODIFY)
+ if (slot.type & MTR_MEMO_MODIFY)
{
ut_ad(slot.type == MTR_MEMO_PAGE_X_MODIFY ||
slot.type == MTR_MEMO_PAGE_SX_MODIFY);
@@ -226,8 +224,7 @@ void mtr_t::rollback_to_savepoint(ulint begin, ulint end)
while (s-- > begin)
{
const mtr_memo_slot_t &slot= m_memo[s];
- if (!slot.object)
- continue;
+ ut_ad(slot.object);
/* This is intended for releasing latches on indexes or unmodified
buffer pool pages. */
ut_ad(slot.type <= MTR_MEMO_SX_LOCK);
@@ -289,8 +286,7 @@ void mtr_t::commit_shrink(fil_space_t &space)
for (mtr_memo_slot_t &slot : m_memo)
{
- if (!slot.object)
- continue;
+ ut_ad(slot.object);
switch (slot.type) {
default:
ut_ad("invalid type" == 0);
@@ -954,15 +950,6 @@ void mtr_t::page_lock_upgrade(const buf_block_t &block)
#endif /* BTR_CUR_HASH_ADAPT */
}
-void mtr_t::lock_upgrade(const index_lock &lock)
-{
- ut_ad(lock.have_x());
-
- for (mtr_memo_slot_t &slot : m_memo)
- if (slot.object == &lock && slot.type == MTR_MEMO_SX_LOCK)
- slot.type= MTR_MEMO_X_LOCK;
-}
-
/** Latch a buffer pool block.
@param block block to be latched
@param rw_latch RW_S_LATCH, RW_SX_LATCH, RW_X_LATCH, RW_NO_LATCH */
@@ -1134,7 +1121,8 @@ buf_block_t* mtr_t::memo_contains_page_flagged(const byte *ptr, ulint flags)
for (const mtr_memo_slot_t &slot : m_memo)
{
- if (!slot.object || !(flags & slot.type))
+ ut_ad(slot.object);
+ if (!(flags & slot.type))
continue;
buf_page_t *bpage= static_cast<buf_page_t*>(slot.object);
@@ -1194,20 +1182,23 @@ void mtr_t::free(const fil_space_t &space, uint32_t offset)
buf_block_t *freed= nullptr;
const page_id_t id{space.id, offset};
- for (auto it= m_memo.rbegin(); it != m_memo.rend(); it++)
+ for (auto it= m_memo.end(); it != m_memo.begin(); )
{
+ it--;
+ next:
mtr_memo_slot_t &slot= *it;
buf_block_t *block= static_cast<buf_block_t*>(slot.object);
- if (!block);
- else if (block == freed)
+ ut_ad(block);
+ if (block == freed)
{
if (slot.type & (MTR_MEMO_PAGE_SX_FIX | MTR_MEMO_PAGE_X_FIX))
slot.type= MTR_MEMO_PAGE_X_FIX;
else
{
ut_ad(slot.type == MTR_MEMO_BUF_FIX);
- slot.object= nullptr;
block->page.unfix();
+ m_memo.erase(it, it + 1);
+ goto next;
}
}
else if (slot.type & (MTR_MEMO_PAGE_X_FIX | MTR_MEMO_PAGE_SX_FIX) &&
diff --git a/storage/innobase/row/row0import.cc b/storage/innobase/row/row0import.cc
index 45c35bc6995..861095b421e 100644
--- a/storage/innobase/row/row0import.cc
+++ b/storage/innobase/row/row0import.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 2012, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2015, 2022, MariaDB Corporation.
+Copyright (c) 2015, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -1629,6 +1629,9 @@ inline dberr_t IndexPurge::purge_pessimistic_delete() noexcept
dberr_t IndexPurge::purge() noexcept
{
btr_pcur_store_position(&m_pcur, &m_mtr);
+ m_mtr.commit();
+ m_mtr.start();
+ m_mtr.set_log_mode(MTR_LOG_NO_REDO);
dberr_t err= purge_pessimistic_delete();
m_mtr.start();
diff --git a/storage/innobase/row/row0ins.cc b/storage/innobase/row/row0ins.cc
index 81babd0082e..eea66ae05aa 100644
--- a/storage/innobase/row/row0ins.cc
+++ b/storage/innobase/row/row0ins.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2016, 2022, MariaDB Corporation.
+Copyright (c) 2016, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -314,8 +314,10 @@ row_ins_clust_index_entry_by_modify(
}
if (mode != BTR_MODIFY_TREE) {
- ut_ad((mode & ulint(~BTR_ALREADY_S_LATCHED))
- == BTR_MODIFY_LEAF);
+ ut_ad(mode == BTR_MODIFY_LEAF
+ || mode == BTR_MODIFY_LEAF_ALREADY_LATCHED
+ || mode == BTR_MODIFY_ROOT_AND_LEAF
+ || mode == BTR_MODIFY_ROOT_AND_LEAF_ALREADY_LATCHED);
/* Try optimistic updating of the record, keeping changes
within the page */
@@ -1621,8 +1623,7 @@ row_ins_check_foreign_constraint(
dtuple_set_n_fields_cmp(entry, foreign->n_fields);
pcur.btr_cur.page_cur.index = check_index;
- err = btr_pcur_open(entry, PAGE_CUR_GE, BTR_SEARCH_LEAF, &pcur, 0,
- &mtr);
+ err = btr_pcur_open(entry, PAGE_CUR_GE, BTR_SEARCH_LEAF, &pcur, &mtr);
if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
goto end_scan;
}
@@ -2119,7 +2120,7 @@ row_ins_scan_sec_index_for_duplicate(
pcur.btr_cur.page_cur.index = index;
trx_t* const trx = thr_get_trx(thr);
dberr_t err = btr_pcur_open(entry, PAGE_CUR_GE, BTR_SEARCH_LEAF,
- &pcur, 0, mtr);
+ &pcur, mtr);
if (err != DB_SUCCESS) {
goto end_scan;
}
@@ -2539,8 +2540,8 @@ row_ins_index_entry_big_rec(
index->set_modified(mtr);
}
- dberr_t error = btr_pcur_open(entry, PAGE_CUR_LE,
- BTR_MODIFY_TREE, &pcur, 0, &mtr);
+ dberr_t error = btr_pcur_open(entry, PAGE_CUR_LE, BTR_MODIFY_TREE,
+ &pcur, &mtr);
if (error != DB_SUCCESS) {
return error;
}
@@ -2598,11 +2599,10 @@ row_ins_clust_index_entry_low(
que_thr_t* thr) /*!< in: query thread */
{
btr_pcur_t pcur;
- btr_cur_t* cursor;
dberr_t err = DB_SUCCESS;
big_rec_t* big_rec = NULL;
mtr_t mtr;
- ib_uint64_t auto_inc = 0;
+ uint64_t auto_inc = 0;
mem_heap_t* offsets_heap = NULL;
rec_offs offsets_[REC_OFFS_NORMAL_SIZE];
rec_offs* offsets = offsets_;
@@ -2618,7 +2618,7 @@ row_ins_clust_index_entry_low(
ut_ad(!n_uniq || n_uniq == dict_index_get_n_unique(index));
ut_ad(!trx->in_rollback);
- mtr_start(&mtr);
+ mtr.start();
if (index->table->is_temporary()) {
/* Disable REDO logging as the lifetime of temp-tables is
@@ -2658,6 +2658,13 @@ row_ins_clust_index_entry_low(
dfield->type.mtype,
dfield->type.prtype
& DATA_UNSIGNED);
+ if (auto_inc
+ && mode != BTR_MODIFY_TREE) {
+ mode = btr_latch_mode(
+ BTR_MODIFY_ROOT_AND_LEAF
+ ^ BTR_MODIFY_LEAF
+ ^ mode);
+ }
}
}
}
@@ -2667,20 +2674,26 @@ row_ins_clust_index_entry_low(
the function will return in both low_match and up_match of the
cursor sensible values */
pcur.btr_cur.page_cur.index = index;
- err = btr_pcur_open(entry, PAGE_CUR_LE, mode, &pcur, auto_inc, &mtr);
+ err = btr_pcur_open(entry, PAGE_CUR_LE, mode, &pcur, &mtr);
if (err != DB_SUCCESS) {
index->table->file_unreadable = true;
-commit_exit:
+err_exit:
mtr.commit();
goto func_exit;
}
- cursor = btr_pcur_get_btr_cur(&pcur);
- cursor->thr = thr;
+ if (auto_inc) {
+ buf_block_t* root
+ = mtr.at_savepoint(mode != BTR_MODIFY_ROOT_AND_LEAF);
+ ut_ad(index->page == root->page.id().page_no());
+ page_set_autoinc(root, auto_inc, &mtr, false);
+ }
+
+ btr_pcur_get_btr_cur(&pcur)->thr = thr;
#ifdef UNIV_DEBUG
{
- page_t* page = btr_cur_get_page(cursor);
+ page_t* page = btr_pcur_get_page(&pcur);
rec_t* first_rec = page_rec_get_next(
page_get_infimum_rec(page));
@@ -2689,7 +2702,7 @@ commit_exit:
}
#endif /* UNIV_DEBUG */
- block = btr_cur_get_block(cursor);
+ block = btr_pcur_get_block(&pcur);
DBUG_EXECUTE_IF("row_ins_row_level", goto skip_bulk_insert;);
@@ -2711,7 +2724,7 @@ commit_exit:
if (err != DB_SUCCESS) {
trx->error_state = err;
- goto commit_exit;
+ goto err_exit;
}
if (index->table->n_rec_locks) {
@@ -2741,7 +2754,7 @@ skip_bulk_insert:
ut_ad(index->is_instant());
ut_ad(!dict_index_is_online_ddl(index));
- const rec_t* rec = btr_cur_get_rec(cursor);
+ const rec_t* rec = btr_pcur_get_rec(&pcur);
if (rec_get_info_bits(rec, page_rec_is_comp(rec))
& REC_INFO_MIN_REC_FLAG) {
@@ -2750,16 +2763,17 @@ skip_bulk_insert:
goto err_exit;
}
- ut_ad(!row_ins_must_modify_rec(cursor));
+ ut_ad(!row_ins_must_modify_rec(&pcur.btr_cur));
goto do_insert;
}
- if (rec_is_metadata(btr_cur_get_rec(cursor), *index)) {
+ if (rec_is_metadata(btr_pcur_get_rec(&pcur), *index)) {
goto do_insert;
}
if (n_uniq
- && (cursor->up_match >= n_uniq || cursor->low_match >= n_uniq)) {
+ && (pcur.btr_cur.up_match >= n_uniq
+ || pcur.btr_cur.low_match >= n_uniq)) {
if (flags
== (BTR_CREATE_FLAG | BTR_NO_LOCKING_FLAG
@@ -2767,7 +2781,7 @@ skip_bulk_insert:
/* Set no locks when applying log
in online table rebuild. Only check for duplicates. */
err = row_ins_duplicate_error_in_clust_online(
- n_uniq, entry, cursor,
+ n_uniq, entry, &pcur.btr_cur,
&offsets, &offsets_heap);
switch (err) {
@@ -2778,26 +2792,24 @@ skip_bulk_insert:
/* fall through */
case DB_SUCCESS_LOCKED_REC:
case DB_DUPLICATE_KEY:
- trx->error_info = cursor->index();
+ trx->error_info = index;
}
} else {
/* Note that the following may return also
DB_LOCK_WAIT */
err = row_ins_duplicate_error_in_clust(
- flags, cursor, entry, thr);
+ flags, &pcur.btr_cur, entry, thr);
}
if (err != DB_SUCCESS) {
-err_exit:
- mtr_commit(&mtr);
- goto func_exit;
+ goto err_exit;
}
}
/* Note: Allowing duplicates would qualify for modification of
an existing record as the new entry is exactly same as old entry. */
- if (row_ins_must_modify_rec(cursor)) {
+ if (row_ins_must_modify_rec(&pcur.btr_cur)) {
/* There is already an index entry with a long enough common
prefix, we must convert the insert into a modify of an
existing record */
@@ -2815,10 +2827,13 @@ do_insert:
rec_t* insert_rec;
if (mode != BTR_MODIFY_TREE) {
- ut_ad(mode == BTR_MODIFY_LEAF ||
- mode == BTR_MODIFY_LEAF_ALREADY_LATCHED);
+ ut_ad(mode == BTR_MODIFY_LEAF
+ || mode == BTR_MODIFY_LEAF_ALREADY_LATCHED
+ || mode == BTR_MODIFY_ROOT_AND_LEAF
+ || mode
+ == BTR_MODIFY_ROOT_AND_LEAF_ALREADY_LATCHED);
err = btr_cur_optimistic_insert(
- flags, cursor, &offsets, &offsets_heap,
+ flags, &pcur.btr_cur, &offsets, &offsets_heap,
entry, &insert_rec, &big_rec,
n_ext, thr, &mtr);
} else {
@@ -2827,17 +2842,15 @@ do_insert:
goto err_exit;
}
- DEBUG_SYNC_C("before_insert_pessimitic_row_ins_clust");
-
err = btr_cur_optimistic_insert(
- flags, cursor,
+ flags, &pcur.btr_cur,
&offsets, &offsets_heap,
entry, &insert_rec, &big_rec,
n_ext, thr, &mtr);
if (err == DB_FAIL) {
err = btr_cur_pessimistic_insert(
- flags, cursor,
+ flags, &pcur.btr_cur,
&offsets, &offsets_heap,
entry, &insert_rec, &big_rec,
n_ext, thr, &mtr);
@@ -2949,9 +2962,7 @@ row_ins_sec_index_entry_low(
rtr_init_rtr_info(&rtr_info, false, &cursor, index, false);
rtr_info_update_btr(&cursor, &rtr_info);
- err = btr_cur_search_to_nth_level(0, entry,
- PAGE_CUR_RTREE_INSERT,
- search_mode, &cursor, &mtr);
+ err = rtr_insert_leaf(&cursor, entry, search_mode, &mtr);
if (err == DB_SUCCESS && search_mode == BTR_MODIFY_LEAF
&& rtr_info.mbr_adj) {
@@ -2967,9 +2978,8 @@ row_ins_sec_index_entry_low(
} else {
index->set_modified(mtr);
}
- err = btr_cur_search_to_nth_level(
- 0, entry, PAGE_CUR_RTREE_INSERT,
- search_mode, &cursor, &mtr);
+ err = rtr_insert_leaf(&cursor, entry,
+ search_mode, &mtr);
}
DBUG_EXECUTE_IF(
@@ -2985,8 +2995,8 @@ row_ins_sec_index_entry_low(
: BTR_INSERT));
}
- err = btr_cur_search_to_nth_level(0, entry, PAGE_CUR_LE,
- search_mode, &cursor, &mtr);
+ err = cursor.search_leaf(entry, PAGE_CUR_LE, search_mode,
+ &mtr);
}
if (err != DB_SUCCESS) {
@@ -3062,12 +3072,12 @@ row_ins_sec_index_entry_low(
prevent any insertion of a duplicate by another
transaction. Let us now reposition the cursor and
continue the insertion (bypassing the change buffer). */
- err = btr_cur_search_to_nth_level(
- 0, entry, PAGE_CUR_LE,
+ err = cursor.search_leaf(
+ entry, PAGE_CUR_LE,
btr_latch_mode(search_mode
& ~(BTR_INSERT
| BTR_IGNORE_SEC_UNIQUE)),
- &cursor, &mtr);
+ &mtr);
if (err != DB_SUCCESS) {
goto func_exit;
}
diff --git a/storage/innobase/row/row0log.cc b/storage/innobase/row/row0log.cc
index 94d69d88fb5..0743dc2bb50 100644
--- a/storage/innobase/row/row0log.cc
+++ b/storage/innobase/row/row0log.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 2011, 2018, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, 2022, MariaDB Corporation.
+Copyright (c) 2017, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -1696,8 +1696,8 @@ err_exit:
mtr->start();
index->set_modified(*mtr);
pcur->btr_cur.page_cur.index = index;
- error = btr_pcur_open(entry, PAGE_CUR_LE,
- BTR_PURGE_TREE, pcur, 0, mtr);
+ error = btr_pcur_open(entry, PAGE_CUR_LE, BTR_PURGE_TREE, pcur,
+ mtr);
if (error) {
goto err_exit;
}
@@ -1780,8 +1780,8 @@ row_log_table_apply_delete(
mtr_start(&mtr);
index->set_modified(mtr);
- dberr_t err = btr_pcur_open(old_pk, PAGE_CUR_LE,
- BTR_PURGE_TREE, &pcur, 0, &mtr);
+ dberr_t err = btr_pcur_open(old_pk, PAGE_CUR_LE, BTR_PURGE_TREE, &pcur,
+ &mtr);
if (err != DB_SUCCESS) {
goto all_done;
}
@@ -1917,8 +1917,8 @@ row_log_table_apply_update(
mtr.start();
index->set_modified(mtr);
- error = btr_pcur_open(old_pk, PAGE_CUR_LE,
- BTR_MODIFY_TREE, &pcur, 0, &mtr);
+ error = btr_pcur_open(old_pk, PAGE_CUR_LE, BTR_MODIFY_TREE, &pcur,
+ &mtr);
if (error != DB_SUCCESS) {
func_exit:
mtr.commit();
@@ -3084,11 +3084,8 @@ row_log_apply_op_low(
record. The operation may already have been performed,
depending on when the row in the clustered index was
scanned. */
- *error = btr_cur_search_to_nth_level(0, entry, PAGE_CUR_LE,
- has_index_lock
- ? BTR_MODIFY_TREE
- : BTR_MODIFY_LEAF,
- &cursor, &mtr);
+ *error = cursor.search_leaf(entry, PAGE_CUR_LE, has_index_lock
+ ? BTR_MODIFY_TREE : BTR_MODIFY_LEAF, &mtr);
if (UNIV_UNLIKELY(*error != DB_SUCCESS)) {
goto func_exit;
}
@@ -3138,9 +3135,9 @@ row_log_apply_op_low(
mtr_commit(&mtr);
mtr_start(&mtr);
index->set_modified(mtr);
- *error = btr_cur_search_to_nth_level(
- 0, entry, PAGE_CUR_LE,
- BTR_MODIFY_TREE, &cursor, &mtr);
+ *error = cursor.search_leaf(entry, PAGE_CUR_LE,
+ BTR_MODIFY_TREE,
+ &mtr);
if (UNIV_UNLIKELY(*error != DB_SUCCESS)) {
goto func_exit;
}
@@ -3242,9 +3239,9 @@ insert_the_rec:
mtr_commit(&mtr);
mtr_start(&mtr);
index->set_modified(mtr);
- *error = btr_cur_search_to_nth_level(
- 0, entry, PAGE_CUR_LE,
- BTR_MODIFY_TREE, &cursor, &mtr);
+ *error = cursor.search_leaf(entry, PAGE_CUR_LE,
+ BTR_MODIFY_TREE,
+ &mtr);
if (*error != DB_SUCCESS) {
break;
}
diff --git a/storage/innobase/row/row0merge.cc b/storage/innobase/row/row0merge.cc
index caa2646eee2..535762ee105 100644
--- a/storage/innobase/row/row0merge.cc
+++ b/storage/innobase/row/row0merge.cc
@@ -150,9 +150,8 @@ public:
false);
rtr_info_update_btr(&ins_cur, &rtr_info);
- error = btr_cur_search_to_nth_level(
- 0, dtuple, PAGE_CUR_RTREE_INSERT,
- BTR_MODIFY_LEAF, &ins_cur, &mtr);
+ error = rtr_insert_leaf(&ins_cur, dtuple,
+ BTR_MODIFY_LEAF, &mtr);
/* It need to update MBR in parent entry,
so change search mode to BTR_MODIFY_TREE */
@@ -164,10 +163,8 @@ public:
rtr_info_update_btr(&ins_cur, &rtr_info);
mtr.start();
index->set_modified(mtr);
- error = btr_cur_search_to_nth_level(
- 0, dtuple,
- PAGE_CUR_RTREE_INSERT,
- BTR_MODIFY_TREE, &ins_cur, &mtr);
+ error = rtr_insert_leaf(&ins_cur, dtuple,
+ BTR_MODIFY_TREE, &mtr);
}
if (error == DB_SUCCESS) {
@@ -189,11 +186,8 @@ public:
&ins_cur, index, false);
rtr_info_update_btr(&ins_cur, &rtr_info);
- error = btr_cur_search_to_nth_level(
- 0, dtuple,
- PAGE_CUR_RTREE_INSERT,
- BTR_MODIFY_TREE,
- &ins_cur, &mtr);
+ error = rtr_insert_leaf(&ins_cur, dtuple,
+ BTR_MODIFY_TREE, &mtr);
if (error == DB_SUCCESS) {
error = btr_cur_pessimistic_insert(
diff --git a/storage/innobase/row/row0purge.cc b/storage/innobase/row/row0purge.cc
index e216d9faa3b..753b42332fc 100644
--- a/storage/innobase/row/row0purge.cc
+++ b/storage/innobase/row/row0purge.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1997, 2017, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, 2022, MariaDB Corporation.
+Copyright (c) 2017, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -104,7 +104,7 @@ bool
row_purge_remove_clust_if_poss_low(
/*===============================*/
purge_node_t* node, /*!< in/out: row purge node */
- btr_latch_mode mode) /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */
+ btr_latch_mode mode) /*!< in: BTR_MODIFY_LEAF or BTR_PURGE_TREE */
{
dict_index_t* index = dict_table_get_first_index(node->table);
table_id_t table_id = 0;
@@ -342,17 +342,20 @@ row_purge_remove_sec_if_poss_tree(
ibool success = TRUE;
dberr_t err;
mtr_t mtr;
- enum row_search_result search_result;
log_free_check();
mtr.start();
index->set_modified(mtr);
pcur.btr_cur.page_cur.index = index;
- search_result = row_search_index_entry(entry, BTR_PURGE_TREE,
- &pcur, &mtr);
+ if (index->is_spatial()) {
+ if (!rtr_search(entry, BTR_PURGE_TREE, &pcur, &mtr)) {
+ goto found;
+ }
+ goto func_exit;
+ }
- switch (search_result) {
+ switch (row_search_index_entry(entry, BTR_PURGE_TREE, &pcur, &mtr)) {
case ROW_NOT_FOUND:
/* Not found. This is a legitimate condition. In a
rollback, InnoDB will remove secondary recs that would
@@ -381,6 +384,7 @@ row_purge_remove_sec_if_poss_tree(
which cannot be purged yet, requires its existence. If some requires,
we should do nothing. */
+found:
if (row_purge_poss_sec(node, index, entry, &pcur, &mtr, true)) {
/* Remove the index record, which should have been
@@ -439,8 +443,6 @@ row_purge_remove_sec_if_poss_leaf(
{
mtr_t mtr;
btr_pcur_t pcur;
- enum btr_latch_mode mode;
- enum row_search_result search_result;
bool success = true;
log_free_check();
@@ -449,31 +451,27 @@ row_purge_remove_sec_if_poss_leaf(
mtr.start();
index->set_modified(mtr);
- /* Change buffering is disabled for spatial index and
- virtual index. */
- mode = (index->type & (DICT_SPATIAL | DICT_VIRTUAL))
- ? BTR_MODIFY_LEAF : BTR_PURGE_LEAF;
pcur.btr_cur.page_cur.index = index;
/* Set the purge node for the call to row_purge_poss_sec(). */
pcur.btr_cur.purge_node = node;
if (index->is_spatial()) {
pcur.btr_cur.thr = NULL;
- index->lock.u_lock(SRW_LOCK_CALL);
- search_result = row_search_index_entry(
- entry, mode, &pcur, &mtr);
- index->lock.u_unlock();
- } else {
- /* Set the query thread, so that ibuf_insert_low() will be
- able to invoke thd_get_trx(). */
- pcur.btr_cur.thr = static_cast<que_thr_t*>(
- que_node_get_parent(node));
- search_result = row_search_index_entry(
- entry, mode, &pcur, &mtr);
+ if (!rtr_search(entry, BTR_MODIFY_LEAF, &pcur, &mtr)) {
+ goto found;
+ }
+ goto func_exit;
}
- switch (search_result) {
+ /* Set the query thread, so that ibuf_insert_low() will be
+ able to invoke thd_get_trx(). */
+ pcur.btr_cur.thr = static_cast<que_thr_t*>(que_node_get_parent(node));
+
+ switch (row_search_index_entry(entry, index->has_virtual()
+ ? BTR_MODIFY_LEAF : BTR_PURGE_LEAF,
+ &pcur, &mtr)) {
case ROW_FOUND:
+found:
/* Before attempting to purge a record, check
if it is safe to do so. */
if (row_purge_poss_sec(node, index, entry, &pcur, &mtr, false)) {
diff --git a/storage/innobase/row/row0row.cc b/storage/innobase/row/row0row.cc
index b998d27d836..4a00b2a430e 100644
--- a/storage/innobase/row/row0row.cc
+++ b/storage/innobase/row/row0row.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1996, 2018, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2018, 2022, MariaDB Corporation.
+Copyright (c) 2018, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -1216,7 +1216,7 @@ row_search_on_row_ref(
& REC_INFO_MIN_REC_FLAG;
} else {
ut_a(ref->n_fields == index->n_uniq);
- if (btr_pcur_open(ref, PAGE_CUR_LE, mode, pcur, 0, mtr)
+ if (btr_pcur_open(ref, PAGE_CUR_LE, mode, pcur, mtr)
!= DB_SUCCESS) {
return false;
}
@@ -1278,21 +1278,13 @@ row_search_index_entry(
ut_ad(dtuple_check_typed(entry));
- if (pcur->index()->is_spatial()) {
- if (rtr_pcur_open(pcur->index(), entry, mode, pcur, mtr)) {
- return ROW_NOT_FOUND;
- }
- } else {
- if (btr_pcur_open(entry, PAGE_CUR_LE, mode, pcur, 0, mtr)
- != DB_SUCCESS) {
- return ROW_NOT_FOUND;
- }
+ if (btr_pcur_open(entry, PAGE_CUR_LE, mode, pcur, mtr) != DB_SUCCESS) {
+ return ROW_NOT_FOUND;
}
switch (btr_pcur_get_btr_cur(pcur)->flag) {
case BTR_CUR_DELETE_REF:
ut_ad(!(~mode & BTR_DELETE));
- ut_ad(!pcur->index()->is_spatial());
return(ROW_NOT_DELETED_REF);
case BTR_CUR_DEL_MARK_IBUF:
diff --git a/storage/innobase/row/row0sel.cc b/storage/innobase/row/row0sel.cc
index 87e3ca43b1c..e44cc466295 100644
--- a/storage/innobase/row/row0sel.cc
+++ b/storage/innobase/row/row0sel.cc
@@ -2,7 +2,7 @@
Copyright (c) 1997, 2017, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2008, Google Inc.
-Copyright (c) 2015, 2022, MariaDB Corporation.
+Copyright (c) 2015, 2023, MariaDB Corporation.
Portions of this file contain modifications contributed and copyrighted by
Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -4776,7 +4776,7 @@ wait_table_again:
pcur->btr_cur.thr = thr;
pcur->old_rec = nullptr;
- if (dict_index_is_spatial(index)) {
+ if (index->is_spatial()) {
if (!prebuilt->rtr_info) {
prebuilt->rtr_info = rtr_create_rtr_info(
set_also_gap_locks, true,
@@ -4792,10 +4792,13 @@ wait_table_again:
prebuilt->rtr_info->search_tuple = search_tuple;
prebuilt->rtr_info->search_mode = mode;
}
- }
- err = btr_pcur_open_with_no_init(search_tuple, mode,
- BTR_SEARCH_LEAF, pcur, &mtr);
+ err = rtr_search_leaf(pcur, search_tuple, mode, &mtr);
+ } else {
+ err = btr_pcur_open_with_no_init(search_tuple, mode,
+ BTR_SEARCH_LEAF,
+ pcur, &mtr);
+ }
if (err != DB_SUCCESS) {
page_corrupted:
@@ -5771,8 +5774,7 @@ next_rec_after_check:
if (spatial_search) {
/* No need to do store restore for R-tree */
- mtr.commit();
- mtr.start();
+ mtr.rollback_to_savepoint(0);
} else if (mtr_extra_clust_savepoint) {
/* We must release any clustered index latches
if we are moving to the next non-clustered
diff --git a/storage/innobase/row/row0uins.cc b/storage/innobase/row/row0uins.cc
index 6567019a33d..50196e78092 100644
--- a/storage/innobase/row/row0uins.cc
+++ b/storage/innobase/row/row0uins.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1997, 2017, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, 2022, MariaDB Corporation.
+Copyright (c) 2017, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -233,7 +233,7 @@ func_exit:
if (err == DB_SUCCESS && node->rec_type == TRX_UNDO_INSERT_METADATA) {
/* When rolling back the very first instant ADD COLUMN
operation, reset the root page to the basic state. */
- err = btr_reset_instant(*index, true, &mtr);
+ btr_reset_instant(*index, true, &mtr);
}
btr_pcur_commit_specify_mtr(&node->pcur, &mtr);
@@ -268,21 +268,32 @@ row_undo_ins_remove_sec_low(
pcur.btr_cur.page_cur.index = index;
row_mtr_start(&mtr, index, !modify_leaf);
- if (modify_leaf) {
- mode = BTR_MODIFY_LEAF_ALREADY_LATCHED;
- mtr_s_lock_index(index, &mtr);
- } else {
- ut_ad(mode == BTR_PURGE_TREE);
- mtr_sx_lock_index(index, &mtr);
- }
-
if (index->is_spatial()) {
mode = modify_leaf
- ? btr_latch_mode(BTR_MODIFY_LEAF_ALREADY_LATCHED
+ ? btr_latch_mode(BTR_MODIFY_LEAF
| BTR_RTREE_DELETE_MARK
| BTR_RTREE_UNDO_INS)
: btr_latch_mode(BTR_PURGE_TREE | BTR_RTREE_UNDO_INS);
btr_pcur_get_btr_cur(&pcur)->thr = thr;
+ if (rtr_search(entry, mode, &pcur, &mtr)) {
+ goto func_exit;
+ }
+
+ if (rec_get_deleted_flag(
+ btr_pcur_get_rec(&pcur),
+ dict_table_is_comp(index->table))) {
+ ib::error() << "Record found in index " << index->name
+ << " is deleted marked on insert rollback.";
+ ut_ad(0);
+ }
+ goto found;
+ } else if (modify_leaf) {
+ mode = BTR_MODIFY_LEAF_ALREADY_LATCHED;
+ mtr_s_lock_index(index, &mtr);
+ } else {
+ ut_ad(mode == BTR_PURGE_TREE);
+ mode = BTR_PURGE_TREE_ALREADY_LATCHED;
+ mtr_x_lock_index(index, &mtr);
}
switch (row_search_index_entry(entry, mode, &pcur, &mtr)) {
@@ -295,15 +306,7 @@ row_undo_ins_remove_sec_low(
case ROW_NOT_FOUND:
break;
case ROW_FOUND:
- if (dict_index_is_spatial(index)
- && rec_get_deleted_flag(
- btr_pcur_get_rec(&pcur),
- dict_table_is_comp(index->table))) {
- ib::error() << "Record found in index " << index->name
- << " is deleted marked on insert rollback.";
- ut_ad(0);
- }
-
+ found:
btr_cur_t* btr_cur = btr_pcur_get_btr_cur(&pcur);
if (modify_leaf) {
@@ -318,6 +321,7 @@ row_undo_ins_remove_sec_low(
}
}
+func_exit:
btr_pcur_close(&pcur);
mtr_commit(&mtr);
diff --git a/storage/innobase/row/row0umod.cc b/storage/innobase/row/row0umod.cc
index 2d04dca4003..50e15e03cc9 100644
--- a/storage/innobase/row/row0umod.cc
+++ b/storage/innobase/row/row0umod.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1997, 2017, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, 2022, MariaDB Corporation.
+Copyright (c) 2017, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -133,8 +133,7 @@ row_undo_mod_clust_low(
&& node->ref == &trx_undo_metadata
&& btr_cur_get_index(btr_cur)->table->instant
&& node->update->info_bits == REC_INFO_METADATA_ADD) {
- err = btr_reset_instant(*btr_cur_get_index(btr_cur),
- false, mtr);
+ btr_reset_instant(*btr_cur->index(), false, mtr);
}
}
@@ -490,7 +489,6 @@ row_undo_mod_del_mark_or_remove_sec_low(
dberr_t err = DB_SUCCESS;
mtr_t mtr;
mtr_t mtr_vers;
- row_search_result search_result;
const bool modify_leaf = mode == BTR_MODIFY_LEAF;
row_mtr_start(&mtr, index, !modify_leaf);
@@ -505,6 +503,11 @@ row_undo_mod_del_mark_or_remove_sec_low(
| BTR_RTREE_UNDO_INS)
: btr_latch_mode(BTR_PURGE_TREE | BTR_RTREE_UNDO_INS);
btr_cur->thr = thr;
+ if (UNIV_LIKELY(!rtr_search(entry, mode, &pcur, &mtr))) {
+ goto found;
+ } else {
+ goto func_exit;
+ }
} else if (!index->is_committed()) {
/* The index->online_status may change if the index is
or was being created online, but not committed yet. It
@@ -514,7 +517,8 @@ row_undo_mod_del_mark_or_remove_sec_low(
mtr_s_lock_index(index, &mtr);
} else {
ut_ad(mode == BTR_PURGE_TREE);
- mtr_sx_lock_index(index, &mtr);
+ mode = BTR_PURGE_TREE_ALREADY_LATCHED;
+ mtr_x_lock_index(index, &mtr);
}
} else {
/* For secondary indexes,
@@ -523,9 +527,8 @@ row_undo_mod_del_mark_or_remove_sec_low(
ut_ad(!dict_index_is_online_ddl(index));
}
- search_result = row_search_index_entry(entry, mode, &pcur, &mtr);
-
- switch (UNIV_EXPECT(search_result, ROW_FOUND)) {
+ switch (UNIV_EXPECT(row_search_index_entry(entry, mode, &pcur, &mtr),
+ ROW_FOUND)) {
case ROW_NOT_FOUND:
/* In crash recovery, the secondary index record may
be missing if the UPDATE did not have time to insert
@@ -547,6 +550,7 @@ row_undo_mod_del_mark_or_remove_sec_low(
ut_error;
}
+found:
/* We should remove the index record if no prior version of the row,
which cannot be purged yet, requires its existence. If some requires,
we should delete mark the record. */
@@ -665,13 +669,12 @@ row_undo_mod_del_unmark_sec_and_undo_update(
trx_t* trx = thr_get_trx(thr);
const ulint flags
= BTR_KEEP_SYS_FLAG | BTR_NO_LOCKING_FLAG;
- row_search_result search_result;
const auto orig_mode = mode;
pcur.btr_cur.page_cur.index = index;
ut_ad(trx->id != 0);
- if (dict_index_is_spatial(index)) {
+ if (index->is_spatial()) {
/* FIXME: Currently we do a 2-pass search for the undo
due to avoid undel-mark a wrong rec in rolling back in
partial update. Later, we could log some info in
@@ -686,9 +689,22 @@ try_again:
btr_cur->thr = thr;
- search_result = row_search_index_entry(entry, mode, &pcur, &mtr);
+ if (index->is_spatial()) {
+ if (!rtr_search(entry, mode, &pcur, &mtr)) {
+ goto found;
+ }
- switch (search_result) {
+ if (mode != orig_mode && btr_cur->rtr_info->fd_del) {
+ mode = orig_mode;
+ btr_pcur_close(&pcur);
+ mtr.commit();
+ goto try_again;
+ }
+
+ goto not_found;
+ }
+
+ switch (row_search_index_entry(entry, mode, &pcur, &mtr)) {
mem_heap_t* heap;
mem_heap_t* offsets_heap;
rec_offs* offsets;
@@ -699,17 +715,7 @@ try_again:
flags BTR_INSERT, BTR_DELETE, or BTR_DELETE_MARK. */
ut_error;
case ROW_NOT_FOUND:
- /* For spatial index, if first search didn't find an
- undel-marked rec, try to find a del-marked rec. */
- if (dict_index_is_spatial(index) && btr_cur->rtr_info->fd_del) {
- if (mode != orig_mode) {
- mode = orig_mode;
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
- goto try_again;
- }
- }
-
+not_found:
if (btr_cur->up_match >= dict_index_get_n_unique(index)
|| btr_cur->low_match >= dict_index_get_n_unique(index)) {
ib::warn() << "Record in index " << index->name
@@ -767,6 +773,7 @@ try_again:
break;
case ROW_FOUND:
+found:
btr_rec_set_deleted<false>(btr_cur_get_block(btr_cur),
btr_cur_get_rec(btr_cur), &mtr);
heap = mem_heap_create(
diff --git a/storage/innobase/row/row0upd.cc b/storage/innobase/row/row0upd.cc
index a3f940adff5..fe88fce58a2 100644
--- a/storage/innobase/row/row0upd.cc
+++ b/storage/innobase/row/row0upd.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1996, 2017, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2015, 2022, MariaDB Corporation.
+Copyright (c) 2015, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -1832,12 +1832,10 @@ row_upd_sec_index_entry(
que_thr_t* thr) /*!< in: query thread */
{
mtr_t mtr;
- const rec_t* rec;
btr_pcur_t pcur;
mem_heap_t* heap;
dtuple_t* entry;
dict_index_t* index;
- btr_cur_t* btr_cur;
dberr_t err = DB_SUCCESS;
trx_t* trx = thr_get_trx(thr);
btr_latch_mode mode;
@@ -1876,10 +1874,6 @@ row_upd_sec_index_entry(
case SRV_TMP_SPACE_ID:
mtr.set_log_mode(MTR_LOG_NO_REDO);
flags = BTR_NO_LOCKING_FLAG;
- if (index->is_spatial()) {
- mode = btr_latch_mode(BTR_MODIFY_LEAF
- | BTR_RTREE_DELETE_MARK);
- }
break;
default:
index->set_modified(mtr);
@@ -1888,26 +1882,35 @@ row_upd_sec_index_entry(
flags = index->table->no_rollback() ? BTR_NO_ROLLBACK : 0;
/* We can only buffer delete-mark operations if there
are no foreign key constraints referring to the index. */
- mode = index->is_spatial()
- ? btr_latch_mode(BTR_MODIFY_LEAF
- | BTR_RTREE_DELETE_MARK)
- : referenced
- ? BTR_MODIFY_LEAF : BTR_DELETE_MARK_LEAF;
+ if (!referenced) {
+ mode = BTR_DELETE_MARK_LEAF;
+ }
break;
}
/* Set the query thread, so that ibuf_insert_low() will be
able to invoke thd_get_trx(). */
- btr_pcur_get_btr_cur(&pcur)->thr = thr;
+ pcur.btr_cur.thr = thr;
pcur.btr_cur.page_cur.index = index;
- search_result = row_search_index_entry(entry, mode, &pcur, &mtr);
+ if (index->is_spatial()) {
+ mode = btr_latch_mode(BTR_MODIFY_LEAF | BTR_RTREE_DELETE_MARK);
+ if (UNIV_LIKELY(!rtr_search(entry, mode, &pcur, &mtr))) {
+ goto found;
+ }
- btr_cur = btr_pcur_get_btr_cur(&pcur);
+ if (pcur.btr_cur.rtr_info->fd_del) {
+ /* We found the record, but a delete marked */
+ goto close;
+ }
- rec = btr_cur_get_rec(btr_cur);
+ goto not_found;
+ }
+
+ search_result = row_search_index_entry(entry, mode, &pcur, &mtr);
switch (search_result) {
+ const rec_t* rec;
case ROW_NOT_DELETED_REF: /* should only occur for BTR_DELETE */
ut_error;
break;
@@ -1916,11 +1919,8 @@ row_upd_sec_index_entry(
break;
case ROW_NOT_FOUND:
- if (dict_index_is_spatial(index) && btr_cur->rtr_info->fd_del) {
- /* We found the record, but a delete marked */
- break;
- }
-
+not_found:
+ rec = btr_pcur_get_rec(&pcur);
ib::error()
<< "Record in index " << index->name
<< " of table " << index->table->name
@@ -1934,7 +1934,9 @@ row_upd_sec_index_entry(
#endif /* UNIV_DEBUG */
break;
case ROW_FOUND:
+found:
ut_ad(err == DB_SUCCESS);
+ rec = btr_pcur_get_rec(&pcur);
/* Delete mark the old index record; it can already be
delete marked if we return after a lock wait in
@@ -1943,14 +1945,14 @@ row_upd_sec_index_entry(
rec, dict_table_is_comp(index->table))) {
err = lock_sec_rec_modify_check_and_lock(
flags,
- btr_cur_get_block(btr_cur),
- btr_cur_get_rec(btr_cur), index, thr, &mtr);
+ btr_pcur_get_block(&pcur),
+ btr_pcur_get_rec(&pcur), index, thr, &mtr);
if (err != DB_SUCCESS) {
break;
}
- btr_rec_set_deleted<true>(btr_cur_get_block(btr_cur),
- btr_cur_get_rec(btr_cur),
+ btr_rec_set_deleted<true>(btr_pcur_get_block(&pcur),
+ btr_pcur_get_rec(&pcur),
&mtr);
#ifdef WITH_WSREP
if (!referenced && foreign
@@ -2009,6 +2011,7 @@ row_upd_sec_index_entry(
}
}
+close:
btr_pcur_close(&pcur);
mtr_commit(&mtr);