diff options
author | Marko Mäkelä <marko.makela@mariadb.com> | 2023-01-23 14:52:49 +0200 |
---|---|---|
committer | Marko Mäkelä <marko.makela@mariadb.com> | 2023-01-23 14:52:49 +0200 |
commit | e41fb3697c9f10b3db2e35b6a5704a03b1041c7e (patch) | |
tree | 4c36737d6a6edddee0ca59044e30d2a15ea6d014 | |
parent | 851c56771e11d50648430bb47644966996b9aa82 (diff) | |
download | mariadb-git-e41fb3697c9f10b3db2e35b6a5704a03b1041c7e.tar.gz |
Revert "MDEV-30400 Assertion height == btr_page_get_level(...) on INSERT"
This reverts commit f9cac8d2cbf82d4d616905fb3dfab34a9901179d
which was accidentally pushed prematurely.
41 files changed, 3151 insertions, 2309 deletions
diff --git a/mysql-test/suite/innodb/r/innodb_wl6326.result b/mysql-test/suite/innodb/r/innodb_wl6326.result new file mode 100644 index 00000000000..fcd58aedafe --- /dev/null +++ b/mysql-test/suite/innodb/r/innodb_wl6326.result @@ -0,0 +1,405 @@ +SET GLOBAL innodb_adaptive_hash_index = false; +SET GLOBAL innodb_stats_persistent = false; +connect con1,localhost,root,,; +connect con2,localhost,root,,; +connect con3,localhost,root,,; +CREATE TABLE t1 ( +a00 CHAR(255) NOT NULL DEFAULT 'a', +a01 CHAR(255) NOT NULL DEFAULT 'a', +a02 CHAR(255) NOT NULL DEFAULT 'a', +a03 CHAR(255) NOT NULL DEFAULT 'a', +a04 CHAR(255) NOT NULL DEFAULT 'a', +a05 CHAR(255) NOT NULL DEFAULT 'a', +a06 CHAR(255) NOT NULL DEFAULT 'a', +b INT NOT NULL DEFAULT 0 +) ENGINE = InnoDB; +ALTER TABLE t1 ADD PRIMARY KEY( +a00, +a01, +a02, +a03, +a04, +a05, +a06 +); +ANALYZE TABLE t1; +Table Op Msg_type Msg_text +test.t1 analyze status Engine-independent statistics collected +test.t1 analyze status OK +SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1'; +CLUST_INDEX_SIZE +1 +SET GLOBAL innodb_limit_optimistic_insert_debug = 7; +BEGIN; +INSERT INTO t1 (a00) VALUES ('aa'); +INSERT INTO t1 (a00) VALUES ('ab'); +INSERT INTO t1 (a00) VALUES ('ac'); +INSERT INTO t1 (a00) VALUES ('ad'); +INSERT INTO t1 (a00) VALUES ('ae'); +INSERT INTO t1 (a00) VALUES ('af'); +INSERT INTO t1 (a00) VALUES ('ag'); +INSERT INTO t1 (a00) VALUES ('ah'); +COMMIT; +ANALYZE TABLE t1; +Table Op Msg_type Msg_text +test.t1 analyze status Engine-independent statistics collected +test.t1 analyze status OK +SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1'; +CLUST_INDEX_SIZE +3 +BEGIN; +INSERT INTO t1 (a00) VALUES ('ai'); +INSERT INTO t1 (a00) VALUES ('aj'); +INSERT INTO t1 (a00) VALUES ('ak'); +COMMIT; +ANALYZE TABLE t1; +Table Op Msg_type Msg_text +test.t1 analyze status Engine-independent statistics collected +test.t1 analyze status OK +SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1'; +CLUST_INDEX_SIZE +4 +BEGIN; +INSERT INTO t1 (a00) VALUES ('al'); +INSERT INTO t1 (a00) VALUES ('am'); +INSERT INTO t1 (a00) VALUES ('an'); +INSERT INTO t1 (a00) VALUES ('ao'); +INSERT INTO t1 (a00) VALUES ('ap'); +INSERT INTO t1 (a00) VALUES ('aq'); +INSERT INTO t1 (a00) VALUES ('ar'); +COMMIT; +ANALYZE TABLE t1; +Table Op Msg_type Msg_text +test.t1 analyze status Engine-independent statistics collected +test.t1 analyze status OK +SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1'; +CLUST_INDEX_SIZE +5 +BEGIN; +INSERT INTO t1 (a00) VALUES ('as'); +INSERT INTO t1 (a00) VALUES ('at'); +INSERT INTO t1 (a00) VALUES ('au'); +INSERT INTO t1 (a00) VALUES ('av'); +INSERT INTO t1 (a00) VALUES ('aw'); +INSERT INTO t1 (a00) VALUES ('ax'); +INSERT INTO t1 (a00) VALUES ('ay'); +COMMIT; +ANALYZE TABLE t1; +Table Op Msg_type Msg_text +test.t1 analyze status Engine-independent statistics collected +test.t1 analyze status OK +SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1'; +CLUST_INDEX_SIZE +6 +BEGIN; +INSERT INTO t1 (a00) VALUES ('az'); +INSERT INTO t1 (a00) VALUES ('ba'); +INSERT INTO t1 (a00) VALUES ('bb'); +INSERT INTO t1 (a00) VALUES ('bc'); +INSERT INTO t1 (a00) VALUES ('bd'); +INSERT INTO t1 (a00) VALUES ('be'); +INSERT INTO t1 (a00) VALUES ('bf'); +COMMIT; +ANALYZE TABLE t1; +Table Op Msg_type Msg_text +test.t1 analyze status Engine-independent statistics collected +test.t1 analyze status OK +SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1'; +CLUST_INDEX_SIZE +7 +BEGIN; +INSERT INTO t1 (a00) VALUES ('bg'); +INSERT INTO t1 (a00) VALUES ('bh'); +INSERT INTO t1 (a00) VALUES ('bi'); +INSERT INTO t1 (a00) VALUES ('bj'); +INSERT INTO t1 (a00) VALUES ('bk'); +INSERT INTO t1 (a00) VALUES ('bl'); +INSERT INTO t1 (a00) VALUES ('bm'); +COMMIT; +ANALYZE TABLE t1; +Table Op Msg_type Msg_text +test.t1 analyze status Engine-independent statistics collected +test.t1 analyze status OK +SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1'; +CLUST_INDEX_SIZE +8 +BEGIN; +INSERT INTO t1 (a00) VALUES ('bn'); +INSERT INTO t1 (a00) VALUES ('bo'); +INSERT INTO t1 (a00) VALUES ('bp'); +INSERT INTO t1 (a00) VALUES ('bq'); +INSERT INTO t1 (a00) VALUES ('br'); +INSERT INTO t1 (a00) VALUES ('bs'); +INSERT INTO t1 (a00) VALUES ('bt'); +COMMIT; +ANALYZE TABLE t1; +Table Op Msg_type Msg_text +test.t1 analyze status Engine-independent statistics collected +test.t1 analyze status OK +SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1'; +CLUST_INDEX_SIZE +11 +BEGIN; +INSERT INTO t1 (a00) VALUES ('bu'); +INSERT INTO t1 (a00) VALUES ('bv'); +INSERT INTO t1 (a00) VALUES ('bw'); +INSERT INTO t1 (a00) VALUES ('bx'); +INSERT INTO t1 (a00) VALUES ('by'); +INSERT INTO t1 (a00) VALUES ('bz'); +INSERT INTO t1 (a00) VALUES ('ca'); +INSERT INTO t1 (a00) VALUES ('cb'); +INSERT INTO t1 (a00) VALUES ('cc'); +INSERT INTO t1 (a00) VALUES ('cd'); +INSERT INTO t1 (a00) VALUES ('ce'); +INSERT INTO t1 (a00) VALUES ('cf'); +INSERT INTO t1 (a00) VALUES ('cg'); +INSERT INTO t1 (a00) VALUES ('ch'); +INSERT INTO t1 (a00) VALUES ('ci'); +INSERT INTO t1 (a00) VALUES ('cj'); +INSERT INTO t1 (a00) VALUES ('ck'); +INSERT INTO t1 (a00) VALUES ('cl'); +INSERT INTO t1 (a00) VALUES ('cm'); +INSERT INTO t1 (a00) VALUES ('cn'); +INSERT INTO t1 (a00) VALUES ('co'); +COMMIT; +ANALYZE TABLE t1; +Table Op Msg_type Msg_text +test.t1 analyze status Engine-independent statistics collected +test.t1 analyze status OK +SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1'; +CLUST_INDEX_SIZE +15 +BEGIN; +INSERT INTO t1 (a00) VALUES ('cp'); +INSERT INTO t1 (a00) VALUES ('cq'); +INSERT INTO t1 (a00) VALUES ('cr'); +INSERT INTO t1 (a00) VALUES ('cs'); +INSERT INTO t1 (a00) VALUES ('ct'); +INSERT INTO t1 (a00) VALUES ('cu'); +INSERT INTO t1 (a00) VALUES ('cv'); +INSERT INTO t1 (a00) VALUES ('cw'); +INSERT INTO t1 (a00) VALUES ('cx'); +INSERT INTO t1 (a00) VALUES ('cy'); +INSERT INTO t1 (a00) VALUES ('cz'); +INSERT INTO t1 (a00) VALUES ('da'); +INSERT INTO t1 (a00) VALUES ('db'); +INSERT INTO t1 (a00) VALUES ('dc'); +INSERT INTO t1 (a00) VALUES ('dd'); +INSERT INTO t1 (a00) VALUES ('de'); +INSERT INTO t1 (a00) VALUES ('df'); +INSERT INTO t1 (a00) VALUES ('dg'); +INSERT INTO t1 (a00) VALUES ('dh'); +INSERT INTO t1 (a00) VALUES ('di'); +INSERT INTO t1 (a00) VALUES ('dj'); +INSERT INTO t1 (a00) VALUES ('dk'); +INSERT INTO t1 (a00) VALUES ('dl'); +INSERT INTO t1 (a00) VALUES ('dm'); +INSERT INTO t1 (a00) VALUES ('dn'); +INSERT INTO t1 (a00) VALUES ('do'); +INSERT INTO t1 (a00) VALUES ('dp'); +INSERT INTO t1 (a00) VALUES ('dq'); +INSERT INTO t1 (a00) VALUES ('dr'); +INSERT INTO t1 (a00) VALUES ('ds'); +INSERT INTO t1 (a00) VALUES ('dt'); +INSERT INTO t1 (a00) VALUES ('du'); +INSERT INTO t1 (a00) VALUES ('dv'); +INSERT INTO t1 (a00) VALUES ('dw'); +INSERT INTO t1 (a00) VALUES ('dx'); +INSERT INTO t1 (a00) VALUES ('dy'); +INSERT INTO t1 (a00) VALUES ('dz'); +INSERT INTO t1 (a00) VALUES ('ea'); +INSERT INTO t1 (a00) VALUES ('eb'); +INSERT INTO t1 (a00) VALUES ('ec'); +INSERT INTO t1 (a00) VALUES ('ed'); +INSERT INTO t1 (a00) VALUES ('ee'); +INSERT INTO t1 (a00) VALUES ('ef'); +INSERT INTO t1 (a00) VALUES ('eg'); +INSERT INTO t1 (a00) VALUES ('eh'); +INSERT INTO t1 (a00) VALUES ('ei'); +INSERT INTO t1 (a00) VALUES ('ej'); +INSERT INTO t1 (a00) VALUES ('ek'); +INSERT INTO t1 (a00) VALUES ('el'); +COMMIT; +ANALYZE TABLE t1; +Table Op Msg_type Msg_text +test.t1 analyze status Engine-independent statistics collected +test.t1 analyze status OK +SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1'; +CLUST_INDEX_SIZE +23 +BEGIN; +INSERT INTO t1 (a00) VALUES ('em'); +INSERT INTO t1 (a00) VALUES ('en'); +INSERT INTO t1 (a00) VALUES ('eo'); +INSERT INTO t1 (a00) VALUES ('ep'); +INSERT INTO t1 (a00) VALUES ('eq'); +INSERT INTO t1 (a00) VALUES ('er'); +INSERT INTO t1 (a00) VALUES ('es'); +INSERT INTO t1 (a00) VALUES ('et'); +INSERT INTO t1 (a00) VALUES ('eu'); +INSERT INTO t1 (a00) VALUES ('ev'); +INSERT INTO t1 (a00) VALUES ('ew'); +INSERT INTO t1 (a00) VALUES ('ex'); +INSERT INTO t1 (a00) VALUES ('ey'); +INSERT INTO t1 (a00) VALUES ('ez'); +INSERT INTO t1 (a00) VALUES ('fa'); +INSERT INTO t1 (a00) VALUES ('fb'); +INSERT INTO t1 (a00) VALUES ('fc'); +INSERT INTO t1 (a00) VALUES ('fd'); +INSERT INTO t1 (a00) VALUES ('fe'); +INSERT INTO t1 (a00) VALUES ('ff'); +INSERT INTO t1 (a00) VALUES ('fg'); +INSERT INTO t1 (a00) VALUES ('fh'); +INSERT INTO t1 (a00) VALUES ('fi'); +INSERT INTO t1 (a00) VALUES ('fj'); +INSERT INTO t1 (a00) VALUES ('fk'); +INSERT INTO t1 (a00) VALUES ('fl'); +INSERT INTO t1 (a00) VALUES ('fm'); +INSERT INTO t1 (a00) VALUES ('fn'); +INSERT INTO t1 (a00) VALUES ('fo'); +INSERT INTO t1 (a00) VALUES ('fp'); +INSERT INTO t1 (a00) VALUES ('fq'); +INSERT INTO t1 (a00) VALUES ('fr'); +INSERT INTO t1 (a00) VALUES ('fs'); +INSERT INTO t1 (a00) VALUES ('ft'); +INSERT INTO t1 (a00) VALUES ('fu'); +INSERT INTO t1 (a00) VALUES ('fv'); +INSERT INTO t1 (a00) VALUES ('fw'); +INSERT INTO t1 (a00) VALUES ('fx'); +INSERT INTO t1 (a00) VALUES ('fy'); +INSERT INTO t1 (a00) VALUES ('fz'); +INSERT INTO t1 (a00) VALUES ('ga'); +INSERT INTO t1 (a00) VALUES ('gb'); +INSERT INTO t1 (a00) VALUES ('gc'); +INSERT INTO t1 (a00) VALUES ('gd'); +INSERT INTO t1 (a00) VALUES ('ge'); +INSERT INTO t1 (a00) VALUES ('gf'); +INSERT INTO t1 (a00) VALUES ('gg'); +INSERT INTO t1 (a00) VALUES ('gh'); +COMMIT; +ANALYZE TABLE t1; +Table Op Msg_type Msg_text +test.t1 analyze status Engine-independent statistics collected +test.t1 analyze status OK +SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1'; +CLUST_INDEX_SIZE +29 +SET GLOBAL innodb_limit_optimistic_insert_debug = 0; +# Test start +SET DEBUG_SYNC = 'RESET'; +INSERT INTO t1 (a00) VALUES ('bfa'); +connection con1; +SET DEBUG_SYNC = 'before_insert_pessimitic_row_ins_clust SIGNAL reached WAIT_FOR continue'; +INSERT INTO t1 (a00) VALUES ('bfb'); +connection con2; +SET DEBUG_SYNC = 'now WAIT_FOR reached'; +SELECT a00,a01 FROM t1 WHERE a00 = 'aa'; +a00 a01 +aa a +SELECT a00,a01 FROM t1 WHERE a00 = 'aq'; +a00 a01 +aq a +SELECT a00,a01 FROM t1 WHERE a00 = 'cp'; +a00 a01 +cp a +SELECT a00,a01 FROM t1 WHERE a00 = 'el'; +a00 a01 +el a +SET DEBUG_SYNC = 'rw_s_lock_waiting SIGNAL lockwait1'; +SELECT a00,a01 FROM t1 WHERE a00 = 'ar'; +connection con3; +SET DEBUG_SYNC = 'rw_s_lock_waiting SIGNAL lockwait2'; +SELECT a00,a01 FROM t1 WHERE a00 = 'cn'; +connection default; +SET DEBUG_SYNC = 'now WAIT_FOR lockwait1 TIMEOUT 1'; +SET DEBUG_SYNC = 'now WAIT_FOR lockwait2 TIMEOUT 1'; +SET DEBUG_SYNC = 'now SIGNAL continue'; +connection con1; +connection con2; +a00 a01 +ar a +connection con3; +a00 a01 +cn a +connection default; +ANALYZE TABLE t1; +Table Op Msg_type Msg_text +test.t1 analyze status Engine-independent statistics collected +test.t1 analyze status OK +SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1'; +CLUST_INDEX_SIZE +30 +SET DEBUG_SYNC = 'RESET'; +INSERT INTO t1 (a00) VALUES ('coa'); +connection con1; +SET DEBUG_SYNC = 'before_insert_pessimitic_row_ins_clust SIGNAL reached WAIT_FOR continue'; +INSERT INTO t1 (a00) VALUES ('cob'); +connection con2; +SET DEBUG_SYNC = 'now WAIT_FOR reached'; +SET DEBUG_SYNC = 'rw_s_lock_waiting SIGNAL lockwait1'; +SELECT a00,a01 FROM t1 WHERE a00 = 'aa'; +connection con3; +SET DEBUG_SYNC = 'rw_s_lock_waiting SIGNAL lockwait2'; +SELECT a00,a01 FROM t1 WHERE a00 = 'el'; +connection default; +SET DEBUG_SYNC = 'now WAIT_FOR lockwait1 TIMEOUT 1'; +SET DEBUG_SYNC = 'now WAIT_FOR lockwait2 TIMEOUT 1'; +SET DEBUG_SYNC = 'now SIGNAL continue'; +connection con1; +connection con2; +a00 a01 +aa a +connection con3; +a00 a01 +el a +connection default; +ANALYZE TABLE t1; +Table Op Msg_type Msg_text +test.t1 analyze status Engine-independent statistics collected +test.t1 analyze status OK +SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1'; +CLUST_INDEX_SIZE +31 +SET DEBUG_SYNC = 'RESET'; +INSERT INTO t1 (a00) VALUES ('gba'); +connection con1; +SET DEBUG_SYNC = 'before_insert_pessimitic_row_ins_clust SIGNAL reached WAIT_FOR continue'; +INSERT INTO t1 (a00) VALUES ('gbb'); +connection con2; +SET DEBUG_SYNC = 'now WAIT_FOR reached'; +SELECT a00,a01 FROM t1 WHERE a00 = 'aa'; +a00 a01 +aa a +SELECT a00,a01 FROM t1 WHERE a00 = 'ek'; +a00 a01 +ek a +SET DEBUG_SYNC = 'rw_s_lock_waiting SIGNAL lockwait1'; +SELECT a00,a01 FROM t1 WHERE a00 = 'el'; +connection con3; +SET DEBUG_SYNC = 'rw_s_lock_waiting SIGNAL lockwait2'; +SELECT a00,a01 FROM t1 WHERE a00 = 'gb'; +connection default; +SET DEBUG_SYNC = 'now WAIT_FOR lockwait1 TIMEOUT 1'; +SET DEBUG_SYNC = 'now WAIT_FOR lockwait2 TIMEOUT 1'; +SET DEBUG_SYNC = 'now SIGNAL continue'; +connection con1; +connection con2; +a00 a01 +el a +connection con3; +a00 a01 +gb a +connection default; +ANALYZE TABLE t1; +Table Op Msg_type Msg_text +test.t1 analyze status Engine-independent statistics collected +test.t1 analyze status OK +SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1'; +CLUST_INDEX_SIZE +32 +SET DEBUG_SYNC = 'RESET'; +connection default; +disconnect con1; +disconnect con2; +disconnect con3; +DROP TABLE t1; diff --git a/mysql-test/suite/innodb/t/innodb_wl6326.opt b/mysql-test/suite/innodb/t/innodb_wl6326.opt new file mode 100644 index 00000000000..99bf0e5a28b --- /dev/null +++ b/mysql-test/suite/innodb/t/innodb_wl6326.opt @@ -0,0 +1 @@ +--innodb-sys-tablestats diff --git a/mysql-test/suite/innodb/t/innodb_wl6326.test b/mysql-test/suite/innodb/t/innodb_wl6326.test new file mode 100644 index 00000000000..1cf98cd1c7b --- /dev/null +++ b/mysql-test/suite/innodb/t/innodb_wl6326.test @@ -0,0 +1,519 @@ +# +# WL#6326: InnoDB: fix index->lock contention +# + +--source include/have_innodb.inc +--source include/have_debug.inc +--source include/have_debug_sync.inc +--source include/have_innodb_16k.inc + +--disable_query_log +SET @old_innodb_limit_optimistic_insert_debug = @@innodb_limit_optimistic_insert_debug; +SET @old_innodb_adaptive_hash_index = @@innodb_adaptive_hash_index; +SET @old_innodb_stats_persistent = @@innodb_stats_persistent; +--enable_query_log + +# Save the initial number of concurrent sessions +--source include/count_sessions.inc + +SET GLOBAL innodb_adaptive_hash_index = false; +SET GLOBAL innodb_stats_persistent = false; + +--connect (con1,localhost,root,,) +--connect (con2,localhost,root,,) +--connect (con3,localhost,root,,) + +CREATE TABLE t1 ( + a00 CHAR(255) NOT NULL DEFAULT 'a', + a01 CHAR(255) NOT NULL DEFAULT 'a', + a02 CHAR(255) NOT NULL DEFAULT 'a', + a03 CHAR(255) NOT NULL DEFAULT 'a', + a04 CHAR(255) NOT NULL DEFAULT 'a', + a05 CHAR(255) NOT NULL DEFAULT 'a', + a06 CHAR(255) NOT NULL DEFAULT 'a', + b INT NOT NULL DEFAULT 0 +) ENGINE = InnoDB; + +ALTER TABLE t1 ADD PRIMARY KEY( + a00, + a01, + a02, + a03, + a04, + a05, + a06 +); + +# +# Prepare primary key index tree to be used for this test. +# + +# Only root (1) +ANALYZE TABLE t1; +SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1'; + +# Make the first records sparse artificially, +# not to cause modify_tree by single node_ptr insert operation. +# * (7 - 2) records should be larger than a half of the page size +# * (7 + 2) records should be fit to the page +# (above t1 definition is already adjusted) +SET GLOBAL innodb_limit_optimistic_insert_debug = 7; + +BEGIN; +INSERT INTO t1 (a00) VALUES ('aa'); +INSERT INTO t1 (a00) VALUES ('ab'); +INSERT INTO t1 (a00) VALUES ('ac'); +INSERT INTO t1 (a00) VALUES ('ad'); +INSERT INTO t1 (a00) VALUES ('ae'); +INSERT INTO t1 (a00) VALUES ('af'); +INSERT INTO t1 (a00) VALUES ('ag'); +INSERT INTO t1 (a00) VALUES ('ah'); +COMMIT; +# Raise root (1-2) +# (aa,ad) +# (aa,ab,ac)(ad,ae,af,ag,ah) +ANALYZE TABLE t1; +SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1'; + +BEGIN; +INSERT INTO t1 (a00) VALUES ('ai'); +INSERT INTO t1 (a00) VALUES ('aj'); +INSERT INTO t1 (a00) VALUES ('ak'); +COMMIT; +# Split leaf (1-3) +# (aa,ad,ak) +# (aa,ab,ac)(ad,ae,af,ag,ah,ai,aj)(ak) +ANALYZE TABLE t1; +SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1'; + +BEGIN; +INSERT INTO t1 (a00) VALUES ('al'); +INSERT INTO t1 (a00) VALUES ('am'); +INSERT INTO t1 (a00) VALUES ('an'); +INSERT INTO t1 (a00) VALUES ('ao'); +INSERT INTO t1 (a00) VALUES ('ap'); +INSERT INTO t1 (a00) VALUES ('aq'); +INSERT INTO t1 (a00) VALUES ('ar'); +COMMIT; +# Split leaf (1-4) +# (aa,ad,ak,ar) +# (aa,ab,ac)(ad,ae,af,ag,ah,ai,aj)(ak,al,am,an,ao,ap,aq)(ar) +ANALYZE TABLE t1; +SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1'; + +BEGIN; +INSERT INTO t1 (a00) VALUES ('as'); +INSERT INTO t1 (a00) VALUES ('at'); +INSERT INTO t1 (a00) VALUES ('au'); +INSERT INTO t1 (a00) VALUES ('av'); +INSERT INTO t1 (a00) VALUES ('aw'); +INSERT INTO t1 (a00) VALUES ('ax'); +INSERT INTO t1 (a00) VALUES ('ay'); +COMMIT; +# Split leaf (1-5) +# (aa,ad,ak,ar,ay) +# (aa,ab,ac)(ad,ae,af,ag,ah,ai,aj)(ak,al,am,an,ao,ap,aq)(ar,as,at,au,av,aw,ax)(ay) +ANALYZE TABLE t1; +SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1'; + +BEGIN; +INSERT INTO t1 (a00) VALUES ('az'); +INSERT INTO t1 (a00) VALUES ('ba'); +INSERT INTO t1 (a00) VALUES ('bb'); +INSERT INTO t1 (a00) VALUES ('bc'); +INSERT INTO t1 (a00) VALUES ('bd'); +INSERT INTO t1 (a00) VALUES ('be'); +INSERT INTO t1 (a00) VALUES ('bf'); +COMMIT; +# Split leaf (1-6) +# (aa,ad,ak,ar,ay,bf) +# (aa,ab,ac)(ad..)(ak..)(ar,as,at,au,av,aw,ax)(ay,az,ba,bb,bc,bd,be)(bf) +ANALYZE TABLE t1; +SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1'; + +BEGIN; +INSERT INTO t1 (a00) VALUES ('bg'); +INSERT INTO t1 (a00) VALUES ('bh'); +INSERT INTO t1 (a00) VALUES ('bi'); +INSERT INTO t1 (a00) VALUES ('bj'); +INSERT INTO t1 (a00) VALUES ('bk'); +INSERT INTO t1 (a00) VALUES ('bl'); +INSERT INTO t1 (a00) VALUES ('bm'); +COMMIT; +# Split leaf (1-7) +# (aa,ad,ak,ar,ay,bf,bm) +# (aa,ab,ac)(ad..)(ak..)(ar..)(ay,az,ba,bb,bc,bd,be)(bf,bg,bh,bi,bj,bk,bl)(bm) +ANALYZE TABLE t1; +SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1'; + +BEGIN; +INSERT INTO t1 (a00) VALUES ('bn'); +INSERT INTO t1 (a00) VALUES ('bo'); +INSERT INTO t1 (a00) VALUES ('bp'); +INSERT INTO t1 (a00) VALUES ('bq'); +INSERT INTO t1 (a00) VALUES ('br'); +INSERT INTO t1 (a00) VALUES ('bs'); +INSERT INTO t1 (a00) VALUES ('bt'); +COMMIT; +# Raise root (1-2-8) +# (aa,ar) +# (aa,ad,ak) (ar,ay,bf,bm,bt) +# (aa,ab,ac)(ad..)(ak..)(ar..)(ay..)(bf..)(bm..)(bt) +ANALYZE TABLE t1; +SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1'; + +BEGIN; +INSERT INTO t1 (a00) VALUES ('bu'); +INSERT INTO t1 (a00) VALUES ('bv'); +INSERT INTO t1 (a00) VALUES ('bw'); +INSERT INTO t1 (a00) VALUES ('bx'); +INSERT INTO t1 (a00) VALUES ('by'); +INSERT INTO t1 (a00) VALUES ('bz'); +INSERT INTO t1 (a00) VALUES ('ca'); + +INSERT INTO t1 (a00) VALUES ('cb'); +INSERT INTO t1 (a00) VALUES ('cc'); +INSERT INTO t1 (a00) VALUES ('cd'); +INSERT INTO t1 (a00) VALUES ('ce'); +INSERT INTO t1 (a00) VALUES ('cf'); +INSERT INTO t1 (a00) VALUES ('cg'); +INSERT INTO t1 (a00) VALUES ('ch'); + +INSERT INTO t1 (a00) VALUES ('ci'); +INSERT INTO t1 (a00) VALUES ('cj'); +INSERT INTO t1 (a00) VALUES ('ck'); +INSERT INTO t1 (a00) VALUES ('cl'); +INSERT INTO t1 (a00) VALUES ('cm'); +INSERT INTO t1 (a00) VALUES ('cn'); +INSERT INTO t1 (a00) VALUES ('co'); +COMMIT; +# Split also at level 1 (1-3-11) +# (aa,ar,co) +# (aa,ad,ak) (ar,ay,bf,bm,bt,ca,ch) (co) +# (aa,ab,ac)(ad..)(ak..)(ar..)(ay..)(bf..)(bm..)(bt..)(ca..)(ch..)(co) +ANALYZE TABLE t1; +SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1'; + +BEGIN; +INSERT INTO t1 (a00) VALUES ('cp'); +INSERT INTO t1 (a00) VALUES ('cq'); +INSERT INTO t1 (a00) VALUES ('cr'); +INSERT INTO t1 (a00) VALUES ('cs'); +INSERT INTO t1 (a00) VALUES ('ct'); +INSERT INTO t1 (a00) VALUES ('cu'); +INSERT INTO t1 (a00) VALUES ('cv'); + +INSERT INTO t1 (a00) VALUES ('cw'); +INSERT INTO t1 (a00) VALUES ('cx'); +INSERT INTO t1 (a00) VALUES ('cy'); +INSERT INTO t1 (a00) VALUES ('cz'); +INSERT INTO t1 (a00) VALUES ('da'); +INSERT INTO t1 (a00) VALUES ('db'); +INSERT INTO t1 (a00) VALUES ('dc'); + +INSERT INTO t1 (a00) VALUES ('dd'); +INSERT INTO t1 (a00) VALUES ('de'); +INSERT INTO t1 (a00) VALUES ('df'); +INSERT INTO t1 (a00) VALUES ('dg'); +INSERT INTO t1 (a00) VALUES ('dh'); +INSERT INTO t1 (a00) VALUES ('di'); +INSERT INTO t1 (a00) VALUES ('dj'); + +INSERT INTO t1 (a00) VALUES ('dk'); +INSERT INTO t1 (a00) VALUES ('dl'); +INSERT INTO t1 (a00) VALUES ('dm'); +INSERT INTO t1 (a00) VALUES ('dn'); +INSERT INTO t1 (a00) VALUES ('do'); +INSERT INTO t1 (a00) VALUES ('dp'); +INSERT INTO t1 (a00) VALUES ('dq'); + +INSERT INTO t1 (a00) VALUES ('dr'); +INSERT INTO t1 (a00) VALUES ('ds'); +INSERT INTO t1 (a00) VALUES ('dt'); +INSERT INTO t1 (a00) VALUES ('du'); +INSERT INTO t1 (a00) VALUES ('dv'); +INSERT INTO t1 (a00) VALUES ('dw'); +INSERT INTO t1 (a00) VALUES ('dx'); + +INSERT INTO t1 (a00) VALUES ('dy'); +INSERT INTO t1 (a00) VALUES ('dz'); +INSERT INTO t1 (a00) VALUES ('ea'); +INSERT INTO t1 (a00) VALUES ('eb'); +INSERT INTO t1 (a00) VALUES ('ec'); +INSERT INTO t1 (a00) VALUES ('ed'); +INSERT INTO t1 (a00) VALUES ('ee'); + +INSERT INTO t1 (a00) VALUES ('ef'); +INSERT INTO t1 (a00) VALUES ('eg'); +INSERT INTO t1 (a00) VALUES ('eh'); +INSERT INTO t1 (a00) VALUES ('ei'); +INSERT INTO t1 (a00) VALUES ('ej'); +INSERT INTO t1 (a00) VALUES ('ek'); +INSERT INTO t1 (a00) VALUES ('el'); +COMMIT; +# Split also at level 1 (1-4-18) +# (aa,ar,co,el) +# (aa,ad,ak) (ar,ay,bf,bm,bt,ca,ch) (co,cv,dc,dj,dq,dx,ee) (el) +# (aa,ab,ac)(ad..)(ak..)(ar..)(ay..)(bf..)(bm..)(bt..)(ca..)(ch..)(co..)(cv..)(dc..)(dj..)(dq..)(dx..)(ee..)(el) +ANALYZE TABLE t1; +SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1'; + +BEGIN; +INSERT INTO t1 (a00) VALUES ('em'); +INSERT INTO t1 (a00) VALUES ('en'); +INSERT INTO t1 (a00) VALUES ('eo'); +INSERT INTO t1 (a00) VALUES ('ep'); +INSERT INTO t1 (a00) VALUES ('eq'); +INSERT INTO t1 (a00) VALUES ('er'); +INSERT INTO t1 (a00) VALUES ('es'); + +INSERT INTO t1 (a00) VALUES ('et'); +INSERT INTO t1 (a00) VALUES ('eu'); +INSERT INTO t1 (a00) VALUES ('ev'); +INSERT INTO t1 (a00) VALUES ('ew'); +INSERT INTO t1 (a00) VALUES ('ex'); +INSERT INTO t1 (a00) VALUES ('ey'); +INSERT INTO t1 (a00) VALUES ('ez'); + +INSERT INTO t1 (a00) VALUES ('fa'); +INSERT INTO t1 (a00) VALUES ('fb'); +INSERT INTO t1 (a00) VALUES ('fc'); +INSERT INTO t1 (a00) VALUES ('fd'); +INSERT INTO t1 (a00) VALUES ('fe'); +INSERT INTO t1 (a00) VALUES ('ff'); +INSERT INTO t1 (a00) VALUES ('fg'); + +INSERT INTO t1 (a00) VALUES ('fh'); +INSERT INTO t1 (a00) VALUES ('fi'); +INSERT INTO t1 (a00) VALUES ('fj'); +INSERT INTO t1 (a00) VALUES ('fk'); +INSERT INTO t1 (a00) VALUES ('fl'); +INSERT INTO t1 (a00) VALUES ('fm'); +INSERT INTO t1 (a00) VALUES ('fn'); + +INSERT INTO t1 (a00) VALUES ('fo'); +INSERT INTO t1 (a00) VALUES ('fp'); +INSERT INTO t1 (a00) VALUES ('fq'); +INSERT INTO t1 (a00) VALUES ('fr'); +INSERT INTO t1 (a00) VALUES ('fs'); +INSERT INTO t1 (a00) VALUES ('ft'); +INSERT INTO t1 (a00) VALUES ('fu'); + +INSERT INTO t1 (a00) VALUES ('fv'); +INSERT INTO t1 (a00) VALUES ('fw'); +INSERT INTO t1 (a00) VALUES ('fx'); +INSERT INTO t1 (a00) VALUES ('fy'); +INSERT INTO t1 (a00) VALUES ('fz'); +INSERT INTO t1 (a00) VALUES ('ga'); +INSERT INTO t1 (a00) VALUES ('gb'); + +INSERT INTO t1 (a00) VALUES ('gc'); +INSERT INTO t1 (a00) VALUES ('gd'); +INSERT INTO t1 (a00) VALUES ('ge'); +INSERT INTO t1 (a00) VALUES ('gf'); +INSERT INTO t1 (a00) VALUES ('gg'); +INSERT INTO t1 (a00) VALUES ('gh'); +COMMIT; + +# Current tree form (1-4-24) +# (aa,ar,co,el) +# (aa,ad,ak) (ar,ay,bf,bm,bt,ca,ch) (co,cv,dc,dj,dq,dx,ee) (el..,gb) +# (aa,ab,ac)(ad..)(ak..)(ar..)(ay..)(bf..)(bm..)(bt..)(ca..)(ch..)(co..)(cv..)(dc..)(dj..)(dq..)(dx..)(ee..)(el..)..(gb..) +ANALYZE TABLE t1; +SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1'; + +# Insert the rest of records normally +SET GLOBAL innodb_limit_optimistic_insert_debug = 0; + +--echo # Test start + +# (1) Insert records to leaf page (bf..) and cause modify_page. +# - root page is not X latched +# - latched from level 1 page (ar,ay,bf,bm,bt,ca,ch) + +SET DEBUG_SYNC = 'RESET'; + +# Filling leaf page (bf..) +INSERT INTO t1 (a00) VALUES ('bfa'); + +--connection con1 +SET DEBUG_SYNC = 'before_insert_pessimitic_row_ins_clust SIGNAL reached WAIT_FOR continue'; +# Cause modify_tree +--send +INSERT INTO t1 (a00) VALUES ('bfb'); + +--connection con2 +SET DEBUG_SYNC = 'now WAIT_FOR reached'; +# Not blocked searches +SELECT a00,a01 FROM t1 WHERE a00 = 'aa'; +SELECT a00,a01 FROM t1 WHERE a00 = 'aq'; +# "where a00 = 'co'" is blocked because searching from smaller ('co','a','a',..). +SELECT a00,a01 FROM t1 WHERE a00 = 'cp'; +SELECT a00,a01 FROM t1 WHERE a00 = 'el'; + +SET DEBUG_SYNC = 'rw_s_lock_waiting SIGNAL lockwait1'; +# Blocked +--send +SELECT a00,a01 FROM t1 WHERE a00 = 'ar'; + +--connection con3 +SET DEBUG_SYNC = 'rw_s_lock_waiting SIGNAL lockwait2'; +# Blocked +--send +SELECT a00,a01 FROM t1 WHERE a00 = 'cn'; + +--connection default +# FIXME: These occasionally time out! +--disable_warnings +SET DEBUG_SYNC = 'now WAIT_FOR lockwait1 TIMEOUT 1'; +SET DEBUG_SYNC = 'now WAIT_FOR lockwait2 TIMEOUT 1'; +--enable_warnings +SET DEBUG_SYNC = 'now SIGNAL continue'; + +--connection con1 +--reap + +--connection con2 +--reap + +--connection con3 +--reap + +--connection default + +ANALYZE TABLE t1; +SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1'; + + + +# (2) Insert records to leaf page (co..) and cause modify_page +# - root page is X latched, because node_ptr for 'co' +# is 1st record for (co,cv,dc,dj,dq,dx,ee) +# +# * ordinary pessimitic insert might be done by pessistic update +# and we should consider possibility node_ptr to be deleted. + +SET DEBUG_SYNC = 'RESET'; + +# Filling leaf page (co..) +INSERT INTO t1 (a00) VALUES ('coa'); + +--connection con1 +SET DEBUG_SYNC = 'before_insert_pessimitic_row_ins_clust SIGNAL reached WAIT_FOR continue'; +# Cause modify_tree +--send +INSERT INTO t1 (a00) VALUES ('cob'); + +--connection con2 +SET DEBUG_SYNC = 'now WAIT_FOR reached'; +# All searches are blocked because root page is X latched + +SET DEBUG_SYNC = 'rw_s_lock_waiting SIGNAL lockwait1'; +# Blocked +--send +SELECT a00,a01 FROM t1 WHERE a00 = 'aa'; + +--connection con3 +SET DEBUG_SYNC = 'rw_s_lock_waiting SIGNAL lockwait2'; +# Blocked +--send +SELECT a00,a01 FROM t1 WHERE a00 = 'el'; + +--connection default +# FIXME: These occasionally time out! +--disable_warnings +SET DEBUG_SYNC = 'now WAIT_FOR lockwait1 TIMEOUT 1'; +SET DEBUG_SYNC = 'now WAIT_FOR lockwait2 TIMEOUT 1'; +--enable_warnings +SET DEBUG_SYNC = 'now SIGNAL continue'; + +--connection con1 +--reap + +--connection con2 +--reap + +--connection con3 +--reap + +--connection default + +ANALYZE TABLE t1; +SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1'; + + + +# (3) Insert records to rightmost leaf page (gb..) and cause modify_page +# - root page is not X latched, because node_ptr for 'gb' is the last record +# of the level 1 though it is last record in the page. +# - lathed from level 1 page (el..,gb) + +SET DEBUG_SYNC = 'RESET'; + +# Filling leaf page (gb..) +INSERT INTO t1 (a00) VALUES ('gba'); + +--connection con1 +SET DEBUG_SYNC = 'before_insert_pessimitic_row_ins_clust SIGNAL reached WAIT_FOR continue'; +# Cause modify_tree +--send +INSERT INTO t1 (a00) VALUES ('gbb'); + +--connection con2 +SET DEBUG_SYNC = 'now WAIT_FOR reached'; +# Not blocked searches +SELECT a00,a01 FROM t1 WHERE a00 = 'aa'; +SELECT a00,a01 FROM t1 WHERE a00 = 'ek'; + +SET DEBUG_SYNC = 'rw_s_lock_waiting SIGNAL lockwait1'; +# Blocked +--send +SELECT a00,a01 FROM t1 WHERE a00 = 'el'; + +--connection con3 +SET DEBUG_SYNC = 'rw_s_lock_waiting SIGNAL lockwait2'; +# Blocked +--send +SELECT a00,a01 FROM t1 WHERE a00 = 'gb'; + +--connection default +# FIXME: These occasionally time out! +--disable_warnings +SET DEBUG_SYNC = 'now WAIT_FOR lockwait1 TIMEOUT 1'; +SET DEBUG_SYNC = 'now WAIT_FOR lockwait2 TIMEOUT 1'; +--enable_warnings +SET DEBUG_SYNC = 'now SIGNAL continue'; + +--connection con1 +--reap + +--connection con2 +--reap + +--connection con3 +--reap + +--connection default +ANALYZE TABLE t1; +SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1'; + + + +# Cleanup +SET DEBUG_SYNC = 'RESET'; + +--connection default +--disconnect con1 +--disconnect con2 +--disconnect con3 + +DROP TABLE t1; + +--disable_query_log +SET GLOBAL innodb_limit_optimistic_insert_debug = @old_innodb_limit_optimistic_insert_debug; +SET GLOBAL innodb_adaptive_hash_index = @old_innodb_adaptive_hash_index; +SET GLOBAL innodb_stats_persistent = @old_innodb_stats_persistent; +--enable_query_log + +# Wait till all disconnects are completed. +--source include/wait_until_count_sessions.inc diff --git a/mysql-test/suite/innodb_gis/r/rtree_split.result b/mysql-test/suite/innodb_gis/r/rtree_split.result index 10262f0220b..8e475776ce0 100644 --- a/mysql-test/suite/innodb_gis/r/rtree_split.result +++ b/mysql-test/suite/innodb_gis/r/rtree_split.result @@ -61,15 +61,3 @@ select count(*) from t1 where MBRWithin(t1.c2, @g1); count(*) 57344 drop table t1; -# -# MDEV-30400 Assertion height == btr_page_get_level ... on INSERT -# -CREATE TABLE t1 (c POINT NOT NULL,SPATIAL (c)) ENGINE=InnoDB; -SET @save_limit=@@GLOBAL.innodb_limit_optimistic_insert_debug; -SET GLOBAL innodb_limit_optimistic_insert_debug=2; -BEGIN; -INSERT INTO t1 SELECT POINTFROMTEXT ('POINT(0 0)') FROM seq_1_to_366; -ROLLBACK; -SET GLOBAL innodb_limit_optimistic_insert_debug=@save_limit; -DROP TABLE t1; -# End of 10.6 tests diff --git a/mysql-test/suite/innodb_gis/t/rtree_split.test b/mysql-test/suite/innodb_gis/t/rtree_split.test index de7fc676e0e..6f285187508 100644 --- a/mysql-test/suite/innodb_gis/t/rtree_split.test +++ b/mysql-test/suite/innodb_gis/t/rtree_split.test @@ -73,18 +73,3 @@ select count(*) from t1 where MBRWithin(t1.c2, @g1); # Clean up. drop table t1; - ---echo # ---echo # MDEV-30400 Assertion height == btr_page_get_level ... on INSERT ---echo # - -CREATE TABLE t1 (c POINT NOT NULL,SPATIAL (c)) ENGINE=InnoDB; -SET @save_limit=@@GLOBAL.innodb_limit_optimistic_insert_debug; -SET GLOBAL innodb_limit_optimistic_insert_debug=2; -BEGIN; -INSERT INTO t1 SELECT POINTFROMTEXT ('POINT(0 0)') FROM seq_1_to_366; -ROLLBACK; -SET GLOBAL innodb_limit_optimistic_insert_debug=@save_limit; -DROP TABLE t1; - ---echo # End of 10.6 tests diff --git a/storage/innobase/btr/btr0btr.cc b/storage/innobase/btr/btr0btr.cc index ef44ed5d9d6..0bb16dba374 100644 --- a/storage/innobase/btr/btr0btr.cc +++ b/storage/innobase/btr/btr0btr.cc @@ -2,7 +2,7 @@ Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2012, Facebook Inc. -Copyright (c) 2014, 2023, MariaDB Corporation. +Copyright (c) 2014, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -460,53 +460,6 @@ btr_page_create( } } -buf_block_t * -mtr_t::get_already_latched(const page_id_t id, mtr_memo_type_t type) const -{ - ut_ad(is_active()); - ut_ad(type == MTR_MEMO_PAGE_X_FIX || type == MTR_MEMO_PAGE_SX_FIX || - type == MTR_MEMO_PAGE_S_FIX); - for (ulint i= 0; i < m_memo.size(); i++) - { - const mtr_memo_slot_t &slot= m_memo[i]; - const auto slot_type= mtr_memo_type_t(slot.type & ~MTR_MEMO_MODIFY); - if (slot_type == MTR_MEMO_PAGE_X_FIX || slot_type == type) - { - buf_block_t *block= static_cast<buf_block_t*>(slot.object); - if (block->page.id() == id) - return block; - } - } - return nullptr; -} - -/** Fetch an index root page that was already latched in the -mini-transaction. */ -static buf_block_t *btr_get_latched_root(const dict_index_t &index, mtr_t *mtr) -{ - return mtr->get_already_latched(page_id_t{index.table->space_id, index.page}, - MTR_MEMO_PAGE_SX_FIX); -} - -/** Fet an index page that should have been already latched in the -mini-transaction. */ -static buf_block_t * -btr_block_reget(mtr_t *mtr, const dict_index_t &index, - const page_id_t id, rw_lock_type_t rw_latch, - dberr_t *err) -{ - if (buf_block_t *block= - mtr->get_already_latched(id, mtr_memo_type_t(rw_latch))) - { - *err= DB_SUCCESS; - return block; - } - - /* MDEV-29385 FIXME: Acquire the page latch upfront. */ - ut_ad(mtr->memo_contains_flagged(&index.lock, MTR_MEMO_X_LOCK)); - return btr_block_get(index, id.page_no(), rw_latch, true, mtr, err); -} - /**************************************************************//** Allocates a new file page to be used in an ibuf tree. Takes the page from the free list of the tree, which must contain pages! @@ -519,16 +472,18 @@ btr_page_alloc_for_ibuf( mtr_t* mtr, /*!< in: mtr */ dberr_t* err) /*!< out: error code */ { - buf_block_t *root= btr_get_latched_root(*index, mtr); + buf_block_t *root= btr_root_block_get(index, RW_SX_LATCH, mtr, err); if (UNIV_UNLIKELY(!root)) return root; + buf_block_t *new_block= - buf_page_get_gen(page_id_t(IBUF_SPACE_ID, + buf_page_get_gen(page_id_t(index->table->space_id, mach_read_from_4(PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST + FLST_FIRST + FIL_ADDR_PAGE + root->page.frame)), - 0, RW_X_LATCH, nullptr, BUF_GET, mtr, err); + index->table->space->zip_size(), RW_X_LATCH, nullptr, + BUF_GET, mtr, err); if (new_block) *err= flst_remove(root, PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, new_block, PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, mtr); @@ -568,11 +523,11 @@ btr_page_alloc_low( #ifdef BTR_CUR_HASH_ADAPT ut_ad(!root->index || !root->index->freed()); #endif - mtr->rollback_to_savepoint(savepoint); + mtr->release_block_at_savepoint(savepoint, root); } else { - mtr->lock_register(savepoint, MTR_MEMO_PAGE_SX_FIX); + mtr->u_lock_register(savepoint); root->page.lock.u_lock(); #ifdef BTR_CUR_HASH_ADAPT btr_search_drop_page_hash_index(root, true); @@ -624,12 +579,15 @@ btr_page_free_for_ibuf( mtr_t* mtr) /*!< in: mtr */ { ut_ad(mtr->memo_contains_flagged(block, MTR_MEMO_PAGE_X_FIX)); - buf_block_t *root= btr_get_latched_root(*index, mtr); - dberr_t err= - flst_add_first(root, PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, + + dberr_t err; + if (buf_block_t *root= btr_root_block_get(index, RW_SX_LATCH, mtr, &err)) + { + err= flst_add_first(root, PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, block, PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, mtr); - ut_d(if (err == DB_SUCCESS) - flst_validate(root, PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, mtr)); + ut_d(if (err == DB_SUCCESS) + flst_validate(root, PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, mtr)); + } return err; } @@ -679,11 +637,11 @@ dberr_t btr_page_free(dict_index_t* index, buf_block_t* block, mtr_t* mtr, #ifdef BTR_CUR_HASH_ADAPT ut_ad(!root->index || !root->index->freed()); #endif - mtr->rollback_to_savepoint(savepoint); + mtr->release_block_at_savepoint(savepoint, root); } else { - mtr->lock_register(savepoint, MTR_MEMO_PAGE_SX_FIX); + mtr->u_lock_register(savepoint); root->page.lock.u_lock(); #ifdef BTR_CUR_HASH_ADAPT btr_search_drop_page_hash_index(root, true); @@ -754,27 +712,35 @@ btr_node_ptr_get_child( mtr, err); } -MY_ATTRIBUTE((nonnull(2,3,4), warn_unused_result)) +MY_ATTRIBUTE((nonnull(2,3,5), warn_unused_result)) /************************************************************//** Returns the upper level node pointer to a page. It is assumed that mtr holds an sx-latch on the tree. @return rec_get_offsets() of the node pointer record */ static rec_offs* -btr_page_get_father_node_ptr_for_validate( +btr_page_get_father_node_ptr_func( +/*==============================*/ rec_offs* offsets,/*!< in: work area for the return value */ mem_heap_t* heap, /*!< in: memory heap to use */ btr_cur_t* cursor, /*!< in: cursor pointing to user record, out: cursor on node pointer record, its page x-latched */ + btr_latch_mode latch_mode,/*!< in: BTR_CONT_MODIFY_TREE + or BTR_CONT_SEARCH_TREE */ mtr_t* mtr) /*!< in: mtr */ { + ut_ad(latch_mode == BTR_CONT_MODIFY_TREE + || latch_mode == BTR_CONT_SEARCH_TREE); + const uint32_t page_no = btr_cur_get_block(cursor)->page.id().page_no(); dict_index_t* index = btr_cur_get_index(cursor); ut_ad(!dict_index_is_spatial(index)); - ut_ad(mtr->memo_contains_flagged(&index->lock, MTR_MEMO_X_LOCK - | MTR_MEMO_SX_LOCK)); + ut_ad(srv_read_only_mode + || mtr->memo_contains_flagged(&index->lock, MTR_MEMO_X_LOCK + | MTR_MEMO_SX_LOCK)); + ut_ad(dict_index_get_page(index) != page_no); const auto level = btr_page_get_level(btr_cur_get_page(cursor)); @@ -786,16 +752,12 @@ btr_page_get_father_node_ptr_for_validate( dict_index_build_node_ptr(index, user_rec, 0, heap, level), - RW_S_LATCH, + PAGE_CUR_LE, latch_mode, cursor, mtr) != DB_SUCCESS) { return nullptr; } const rec_t* node_ptr = btr_cur_get_rec(cursor); -#if 0 /* MDEV-29835 FIXME */ - ut_ad(!btr_cur_get_block(cursor)->page.lock.not_recursive() - || mtr->memo_contains(index->lock, MTR_MEMO_X_LOCK)); -#endif offsets = rec_get_offsets(node_ptr, index, offsets, 0, ULINT_UNDEFINED, &heap); @@ -807,64 +769,13 @@ btr_page_get_father_node_ptr_for_validate( return(offsets); } -MY_ATTRIBUTE((nonnull(2,3,4), warn_unused_result)) -/************************************************************//** -Returns the upper level node pointer to a page. It is assumed that -it has already been latched. -@return rec_get_offsets() of the node pointer record */ -static -rec_offs* -btr_page_get_parent( - rec_offs* offsets,/*!< in: work area for the return value */ - mem_heap_t* heap, /*!< in: memory heap to use */ - btr_cur_t* cursor, /*!< in: cursor pointing to user record, - out: cursor on node pointer record, - its page x-latched */ - mtr_t* mtr) /*!< in: mtr */ -{ - const uint32_t page_no= cursor->block()->page.id().page_no(); - const dict_index_t *index= cursor->index(); - ut_ad(!index->is_spatial()); - ut_ad(index->page != page_no); - - uint32_t p= index->page; - const dtuple_t *tuple= - dict_index_build_node_ptr(index, btr_cur_get_rec(cursor), 0, heap, - btr_page_get_level(btr_cur_get_page(cursor))); - - ulint i; - for (i= 0; i < mtr->get_savepoint(); i++) - if (buf_block_t *block= mtr->block_at_savepoint(i)) - if (block->page.id().page_no() == p) - { - ut_ad(block->page.lock.have_u_or_x() || - (!block->page.lock.have_s() && index->lock.have_x())); - ulint up_match= 0, low_match= 0; - cursor->page_cur.block= block; - if (page_cur_search_with_match(tuple, PAGE_CUR_LE, &up_match, - &low_match, &cursor->page_cur, - nullptr)) - return nullptr; - offsets= rec_get_offsets(cursor->page_cur.rec, index, offsets, 0, - ULINT_UNDEFINED, &heap); - p= btr_node_ptr_get_child_page_no(cursor->page_cur.rec, offsets); - if (p != page_no) - { - i= 0; // MDEV-29835 FIXME: require all pages to be latched in order! - continue; - } - ut_ad(block->page.lock.have_u_or_x()); - if (block->page.lock.have_u_not_x()) - { - ut_ad(block->page.id().page_no() == index->page); - block->page.lock.u_x_upgrade(); - mtr->page_lock_upgrade(*block); - } - return offsets; - } +#define btr_page_get_father_node_ptr(of,heap,cur,mtr) \ + btr_page_get_father_node_ptr_func( \ + of,heap,cur,BTR_CONT_MODIFY_TREE,mtr) - return nullptr; -} +#define btr_page_get_father_node_ptr_for_validate(of,heap,cur,mtr) \ + btr_page_get_father_node_ptr_func( \ + of,heap,cur,BTR_CONT_SEARCH_TREE,mtr) /************************************************************//** Returns the upper level node pointer to a page. It is assumed that mtr holds @@ -885,7 +796,7 @@ btr_page_get_father_block( if (UNIV_UNLIKELY(!rec)) return nullptr; cursor->page_cur.rec= rec; - return btr_page_get_parent(offsets, heap, cursor, mtr); + return btr_page_get_father_node_ptr(offsets, heap, cursor, mtr); } /** Seek to the parent page of a B-tree page. @@ -900,7 +811,7 @@ bool btr_page_get_father(mtr_t* mtr, btr_cur_t* cursor) return false; cursor->page_cur.rec= rec; mem_heap_t *heap= mem_heap_create(100); - const bool got= btr_page_get_parent(nullptr, heap, cursor, mtr); + const bool got= btr_page_get_father_node_ptr(nullptr, heap, cursor, mtr); mem_heap_free(heap); return got; } @@ -1807,43 +1718,48 @@ void btr_set_instant(buf_block_t* root, const dict_index_t& index, mtr_t* mtr) /** Reset the table to the canonical format on ROLLBACK of instant ALTER TABLE. @param[in] index clustered index with instant ALTER TABLE @param[in] all whether to reset FIL_PAGE_TYPE as well -@param[in,out] mtr mini-transaction */ +@param[in,out] mtr mini-transaction +@return error code */ ATTRIBUTE_COLD -void btr_reset_instant(const dict_index_t &index, bool all, mtr_t *mtr) +dberr_t btr_reset_instant(const dict_index_t &index, bool all, mtr_t *mtr) { ut_ad(!index.table->is_temporary()); ut_ad(index.is_primary()); - buf_block_t *root= btr_get_latched_root(index, mtr); - byte *page_type= root->page.frame + FIL_PAGE_TYPE; - if (all) - { - ut_ad(mach_read_from_2(page_type) == FIL_PAGE_TYPE_INSTANT || - mach_read_from_2(page_type) == FIL_PAGE_INDEX); - mtr->write<2,mtr_t::MAYBE_NOP>(*root, page_type, FIL_PAGE_INDEX); - byte *instant= PAGE_INSTANT + PAGE_HEADER + root->page.frame; - mtr->write<2,mtr_t::MAYBE_NOP>(*root, instant, - page_ptr_get_direction(instant + 1)); - } - else - ut_ad(mach_read_from_2(page_type) == FIL_PAGE_TYPE_INSTANT); - static const byte supremuminfimum[8 + 8] = "supremuminfimum"; - uint16_t infimum, supremum; - if (page_is_comp(root->page.frame)) - { - infimum= PAGE_NEW_INFIMUM; - supremum= PAGE_NEW_SUPREMUM; - } - else + dberr_t err; + if (buf_block_t *root= btr_root_block_get(&index, RW_SX_LATCH, mtr, &err)) { - infimum= PAGE_OLD_INFIMUM; - supremum= PAGE_OLD_SUPREMUM; + byte *page_type= root->page.frame + FIL_PAGE_TYPE; + if (all) + { + ut_ad(mach_read_from_2(page_type) == FIL_PAGE_TYPE_INSTANT || + mach_read_from_2(page_type) == FIL_PAGE_INDEX); + mtr->write<2,mtr_t::MAYBE_NOP>(*root, page_type, FIL_PAGE_INDEX); + byte *instant= PAGE_INSTANT + PAGE_HEADER + root->page.frame; + mtr->write<2,mtr_t::MAYBE_NOP>(*root, instant, + page_ptr_get_direction(instant + 1)); + } + else + ut_ad(mach_read_from_2(page_type) == FIL_PAGE_TYPE_INSTANT); + static const byte supremuminfimum[8 + 8] = "supremuminfimum"; + uint16_t infimum, supremum; + if (page_is_comp(root->page.frame)) + { + infimum= PAGE_NEW_INFIMUM; + supremum= PAGE_NEW_SUPREMUM; + } + else + { + infimum= PAGE_OLD_INFIMUM; + supremum= PAGE_OLD_SUPREMUM; + } + ut_ad(!memcmp(&root->page.frame[infimum], supremuminfimum + 8, 8) == + !memcmp(&root->page.frame[supremum], supremuminfimum, 8)); + mtr->memcpy<mtr_t::MAYBE_NOP>(*root, &root->page.frame[infimum], + supremuminfimum + 8, 8); + mtr->memcpy<mtr_t::MAYBE_NOP>(*root, &root->page.frame[supremum], + supremuminfimum, 8); } - ut_ad(!memcmp(&root->page.frame[infimum], supremuminfimum + 8, 8) == - !memcmp(&root->page.frame[supremum], supremuminfimum, 8)); - mtr->memcpy<mtr_t::MAYBE_NOP>(*root, &root->page.frame[infimum], - supremuminfimum + 8, 8); - mtr->memcpy<mtr_t::MAYBE_NOP>(*root, &root->page.frame[supremum], - supremuminfimum, 8); + return err; } /*************************************************************//** @@ -1940,6 +1856,11 @@ btr_root_raise_and_insert( } /* Copy the records from root to the new page one by one. */ + dberr_t e; + if (!err) { + err = &e; + } + if (0 #ifdef UNIV_ZIP_COPY || new_page_zip @@ -2083,15 +2004,21 @@ btr_root_raise_and_insert( page_cursor->block = new_block; page_cursor->index = index; - ut_ad(dtuple_check_typed(tuple)); - /* Reposition the cursor to the child node */ - ulint low_match = 0, up_match = 0; + if (tuple) { + ut_ad(dtuple_check_typed(tuple)); + /* Reposition the cursor to the child node */ + ulint low_match = 0, up_match = 0; - if (page_cur_search_with_match(tuple, PAGE_CUR_LE, - &up_match, &low_match, - page_cursor, nullptr)) { - *err = DB_CORRUPTION; - return nullptr; + if (page_cur_search_with_match(tuple, PAGE_CUR_LE, + &up_match, &low_match, + page_cursor, nullptr)) { + if (err) { + *err = DB_CORRUPTION; + } + return nullptr; + } + } else { + page_cursor->rec = page_get_infimum_rec(new_block->page.frame); } /* Split the child and insert tuple */ @@ -2310,7 +2237,6 @@ func_exit: return(rec); } -#ifdef UNIV_DEBUG /*************************************************************//** Returns TRUE if the insert fits on the appropriate half-page with the chosen split_rec. @@ -2408,7 +2334,6 @@ got_rec: return(false); } -#endif /*******************************************************//** Inserts a data tuple to a tree on a non-leaf level. It is assumed @@ -2431,34 +2356,25 @@ btr_insert_on_non_leaf_level( rtr_info_t rtr_info; ut_ad(level > 0); - - flags |= BTR_NO_LOCKING_FLAG | BTR_KEEP_SYS_FLAG - | BTR_NO_UNDO_LOG_FLAG; - cursor.page_cur.index = index; - - dberr_t err; + auto mode = PAGE_CUR_LE; if (index->is_spatial()) { + mode = PAGE_CUR_RTREE_INSERT; /* For spatial index, initialize structures to track its parents etc. */ rtr_init_rtr_info(&rtr_info, false, &cursor, index, false); rtr_info_update_btr(&cursor, &rtr_info); - err = rtr_search_to_nth_level(level, tuple, - PAGE_CUR_RTREE_INSERT, - BTR_CONT_MODIFY_TREE, - &cursor, mtr); - } else { - err = btr_cur_search_to_nth_level(level, tuple, RW_X_LATCH, - &cursor, mtr); } + flags |= BTR_NO_LOCKING_FLAG | BTR_KEEP_SYS_FLAG + | BTR_NO_UNDO_LOG_FLAG; + cursor.page_cur.index = index; + + dberr_t err = btr_cur_search_to_nth_level(level, tuple, mode, + BTR_CONT_MODIFY_TREE, + &cursor, mtr); ut_ad(cursor.flag == BTR_CUR_BINARY); -#if 0 /* MDEV-29835 FIXME */ - ut_ad(!btr_cur_get_block(&cursor)->page.lock.not_recursive() - || index->is_spatial() - || mtr->memo_contains(index->lock, MTR_MEMO_X_LOCK)); -#endif if (UNIV_LIKELY(err == DB_SUCCESS)) { err = btr_cur_optimistic_insert(flags, @@ -2554,7 +2470,6 @@ btr_attach_half_pages( /* Get the level of the split pages */ const ulint level = btr_page_get_level(block->page.frame); ut_ad(level == btr_page_get_level(new_block->page.frame)); - page_id_t id{block->page.id()}; /* Get the previous and next pages of page */ const uint32_t prev_page_no = btr_page_get_prev(block->page.frame); @@ -2562,32 +2477,12 @@ btr_attach_half_pages( /* for consistency, both blocks should be locked, before change */ if (prev_page_no != FIL_NULL && direction == FSP_DOWN) { - id.set_page_no(prev_page_no); - prev_block = mtr->get_already_latched(id, MTR_MEMO_PAGE_X_FIX); -#if 1 /* MDEV-29835 FIXME: acquire page latches upfront */ - if (!prev_block) { -# if 0 /* MDEV-29835 FIXME */ - ut_ad(mtr->memo_contains(index->lock, - MTR_MEMO_X_LOCK)); -# endif - prev_block = btr_block_get(*index, prev_page_no, - RW_X_LATCH, !level, mtr); - } -#endif + prev_block = btr_block_get(*index, prev_page_no, RW_X_LATCH, + !level, mtr); } if (next_page_no != FIL_NULL && direction != FSP_DOWN) { - id.set_page_no(next_page_no); - next_block = mtr->get_already_latched(id, MTR_MEMO_PAGE_X_FIX); -#if 1 /* MDEV-29835 FIXME: acquire page latches upfront */ - if (!next_block) { -# if 0 /* MDEV-29835 FIXME */ - ut_ad(mtr->memo_contains(index->lock, - MTR_MEMO_X_LOCK)); -# endif - next_block = btr_block_get(*index, next_page_no, - RW_X_LATCH, !level, mtr); - } -#endif + next_block = btr_block_get(*index, next_page_no, RW_X_LATCH, + !level, mtr); } /* Build the node pointer (= node key and page address) for the upper @@ -3123,7 +3018,6 @@ insert_empty: return nullptr; } -#ifdef UNIV_DEBUG /* If the split is made on the leaf level and the insert will fit on the appropriate half-page, we may release the tree x-latch. We can then move the records after releasing the tree latch, @@ -3131,21 +3025,21 @@ insert_empty: const bool insert_will_fit = !new_page_zip && btr_page_insert_fits(cursor, split_rec, offsets, tuple, n_ext, heap); -#endif if (!split_rec && !insert_left) { UT_DELETE_ARRAY(buf); buf = NULL; } -#if 0 // FIXME: this used to be a no-op, and may cause trouble if enabled - if (insert_will_fit + if (!srv_read_only_mode + && insert_will_fit && page_is_leaf(page) && !dict_index_is_online_ddl(cursor->index())) { +#if 0 // FIXME: this used to be a no-op, and may cause trouble if enabled mtr->release(cursor->index()->lock); +#endif /* NOTE: We cannot release root block latch here, because it has segment header and already modified in most of cases.*/ } -#endif /* 5. Move then the records to the new page */ if (direction == FSP_DOWN) { @@ -3377,58 +3271,52 @@ func_exit: dberr_t btr_level_list_remove(const buf_block_t& block, const dict_index_t& index, mtr_t* mtr) { - ut_ad(mtr->memo_contains_flagged(&block, MTR_MEMO_PAGE_X_FIX)); - ut_ad(block.zip_size() == index.table->space->zip_size()); - ut_ad(index.table->space->id == block.page.id().space()); - /* Get the previous and next page numbers of page */ - const uint32_t prev_page_no= btr_page_get_prev(block.page.frame); - const uint32_t next_page_no= btr_page_get_next(block.page.frame); - page_id_t id{block.page.id()}; - buf_block_t *prev= nullptr, *next; - dberr_t err; + ut_ad(mtr->memo_contains_flagged(&block, MTR_MEMO_PAGE_X_FIX)); + ut_ad(block.zip_size() == index.table->space->zip_size()); + ut_ad(index.table->space->id == block.page.id().space()); + /* Get the previous and next page numbers of page */ - /* Update page links of the level */ - if (prev_page_no != FIL_NULL) - { - id.set_page_no(prev_page_no); - prev= mtr->get_already_latched(id, MTR_MEMO_PAGE_X_FIX); -#if 1 /* MDEV-29835 FIXME: acquire page latches upfront */ - if (!prev) - { -# if 0 /* MDEV-29835 FIXME */ - ut_ad(mtr->memo_contains(index.lock, MTR_MEMO_X_LOCK)); -# endif - prev= btr_block_get(index, id.page_no(), RW_X_LATCH, - page_is_leaf(block.page.frame), mtr, &err); - if (UNIV_UNLIKELY(!prev)) - return err; - } -#endif - } + const page_t* page = block.page.frame; + const uint32_t prev_page_no = btr_page_get_prev(page); + const uint32_t next_page_no = btr_page_get_next(page); - if (next_page_no != FIL_NULL) - { - id.set_page_no(next_page_no); - next= mtr->get_already_latched(id, MTR_MEMO_PAGE_X_FIX); -#if 1 /* MDEV-29835 FIXME: acquire page latches upfront */ - if (!next) - { -# if 0 /* MDEV-29835 FIXME */ - ut_ad(mtr->memo_contains(index.lock, MTR_MEMO_X_LOCK)); -# endif - next= btr_block_get(index, id.page_no(), RW_X_LATCH, - page_is_leaf(block.page.frame), mtr, &err); - if (UNIV_UNLIKELY(!next)) - return err; - } -#endif - btr_page_set_prev(next, prev_page_no, mtr); - } + /* Update page links of the level */ + dberr_t err; - if (prev) - btr_page_set_next(prev, next_page_no, mtr); + if (prev_page_no != FIL_NULL) { + buf_block_t* prev_block = btr_block_get( + index, prev_page_no, RW_X_LATCH, page_is_leaf(page), + mtr, &err); + if (UNIV_UNLIKELY(!prev_block)) { + return err; + } + if (UNIV_UNLIKELY(memcmp_aligned<4>(prev_block->page.frame + + FIL_PAGE_NEXT, + page + FIL_PAGE_OFFSET, + 4))) { + return DB_CORRUPTION; + } + btr_page_set_next(prev_block, next_page_no, mtr); + } - return DB_SUCCESS; + if (next_page_no != FIL_NULL) { + buf_block_t* next_block = btr_block_get( + index, next_page_no, RW_X_LATCH, page_is_leaf(page), + mtr, &err); + + if (UNIV_UNLIKELY(!next_block)) { + return err; + } + if (UNIV_UNLIKELY(memcmp_aligned<4>(next_block->page.frame + + FIL_PAGE_PREV, + page + FIL_PAGE_OFFSET, + 4))) { + return DB_CORRUPTION; + } + btr_page_set_prev(next_block, prev_page_no, mtr); + } + + return DB_SUCCESS; } /*************************************************************//** @@ -4278,30 +4166,23 @@ btr_discard_page( const uint32_t left_page_no = btr_page_get_prev(block->page.frame); const uint32_t right_page_no = btr_page_get_next(block->page.frame); - page_id_t merge_page_id{block->page.id()}; ut_d(bool parent_is_different = false); - dberr_t err; if (left_page_no != FIL_NULL) { - merge_page_id.set_page_no(left_page_no); - merge_block = btr_block_reget(mtr, *index, merge_page_id, - RW_X_LATCH, &err); + dberr_t err; + merge_block = btr_block_get(*index, left_page_no, RW_X_LATCH, + true, mtr, &err); if (UNIV_UNLIKELY(!merge_block)) { return err; } -#if 0 /* MDEV-29385 FIXME: Acquire the page latch upfront. */ - ut_ad(!memcmp_aligned<4>(merge_block->page.frame - + FIL_PAGE_NEXT, - block->page.frame + FIL_PAGE_OFFSET, - 4)); -#else + if (UNIV_UNLIKELY(memcmp_aligned<4>(merge_block->page.frame + FIL_PAGE_NEXT, block->page.frame + FIL_PAGE_OFFSET, 4))) { return DB_CORRUPTION; } -#endif + ut_d(parent_is_different = (page_rec_get_next( page_get_infimum_rec( @@ -4309,25 +4190,19 @@ btr_discard_page( &parent_cursor))) == btr_cur_get_rec(&parent_cursor))); } else if (right_page_no != FIL_NULL) { - merge_page_id.set_page_no(right_page_no); - merge_block = btr_block_reget(mtr, *index, merge_page_id, - RW_X_LATCH, &err); + dberr_t err; + merge_block = btr_block_get(*index, right_page_no, RW_X_LATCH, + true, mtr, &err); if (UNIV_UNLIKELY(!merge_block)) { return err; } -#if 0 /* MDEV-29385 FIXME: Acquire the page latch upfront. */ - ut_ad(!memcmp_aligned<4>(merge_block->page.frame - + FIL_PAGE_PREV, - block->page.frame + FIL_PAGE_OFFSET, - 4)); -#else if (UNIV_UNLIKELY(memcmp_aligned<4>(merge_block->page.frame + FIL_PAGE_PREV, block->page.frame + FIL_PAGE_OFFSET, 4))) { return DB_CORRUPTION; } -#endif + ut_d(parent_is_different = page_rec_is_supremum( page_rec_get_next(btr_cur_get_rec(&parent_cursor)))); if (page_is_leaf(merge_block->page.frame)) { @@ -4369,10 +4244,13 @@ btr_discard_page( } #ifdef UNIV_ZIP_DEBUG - if (page_zip_des_t* merge_page_zip - = buf_block_get_page_zip(merge_block)); - ut_a(page_zip_validate(merge_page_zip, - merge_block->page.frame, index)); + { + page_zip_des_t* merge_page_zip + = buf_block_get_page_zip(merge_block); + ut_a(!merge_page_zip + || page_zip_validate(merge_page_zip, + merge_block->page.frame, index)); + } #endif /* UNIV_ZIP_DEBUG */ if (index->has_locking()) { @@ -4391,7 +4269,7 @@ btr_discard_page( } /* Free the file page */ - err = btr_page_free(index, block, mtr); + dberr_t err = btr_page_free(index, block, mtr); if (err == DB_SUCCESS) { /* btr_check_node_ptr() needs parent block latched. @@ -4584,8 +4462,6 @@ btr_check_node_ptr( offsets = btr_page_get_father_block(NULL, heap, mtr, &cursor); } - ut_ad(offsets); - if (page_is_leaf(page)) { goto func_exit; @@ -4917,16 +4793,19 @@ btr_validate_level( page_zip_des_t* page_zip; #endif /* UNIV_ZIP_DEBUG */ ulint savepoint = 0; + ulint savepoint2 = 0; uint32_t parent_page_no = FIL_NULL; uint32_t parent_right_page_no = FIL_NULL; bool rightmost_child = false; mtr.start(); - if (lockout) { - mtr_x_lock_index(index, &mtr); - } else { - mtr_sx_lock_index(index, &mtr); + if (!srv_read_only_mode) { + if (lockout) { + mtr_x_lock_index(index, &mtr); + } else { + mtr_sx_lock_index(index, &mtr); + } } dberr_t err; @@ -4974,6 +4853,7 @@ corrupted: offsets = rec_get_offsets(node_ptr, index, offsets, 0, ULINT_UNDEFINED, &heap); + savepoint2 = mtr_set_savepoint(&mtr); block = btr_node_ptr_get_child(node_ptr, index, offsets, &mtr, &err); if (!block) { @@ -4994,8 +4874,10 @@ corrupted: /* To obey latch order of tree blocks, we should release the right_block once to obtain lock of the uncle block. */ - mtr.release_last_page(); + mtr_release_block_at_savepoint( + &mtr, savepoint2, block); + savepoint2 = mtr_set_savepoint(&mtr); block = btr_block_get(*index, left_page_no, RW_SX_LATCH, false, &mtr, &err); @@ -5023,10 +4905,12 @@ func_exit: mem_heap_empty(heap); offsets = offsets2 = NULL; - if (lockout) { - mtr_x_lock_index(index, &mtr); - } else { - mtr_sx_lock_index(index, &mtr); + if (!srv_read_only_mode) { + if (lockout) { + mtr_x_lock_index(index, &mtr); + } else { + mtr_sx_lock_index(index, &mtr); + } } page = block->page.frame; @@ -5071,7 +4955,7 @@ func_exit: if (right_page_no != FIL_NULL) { const rec_t* right_rec; - savepoint = mtr.get_savepoint(); + savepoint = mtr_set_savepoint(&mtr); right_block = btr_block_get(*index, right_page_no, RW_SX_LATCH, !level, &mtr, &err); @@ -5266,10 +5150,8 @@ broken_links: /* To obey latch order of tree blocks, we should release the right_block once to obtain lock of the uncle block. */ - ut_ad(right_block - == mtr.at_savepoint(savepoint)); - mtr.rollback_to_savepoint(savepoint, - savepoint + 1); + mtr_release_block_at_savepoint( + &mtr, savepoint, right_block); if (parent_right_page_no != FIL_NULL) { btr_block_get(*index, diff --git a/storage/innobase/btr/btr0cur.cc b/storage/innobase/btr/btr0cur.cc index b3bfb74bb8b..ac06d9b1568 100644 --- a/storage/innobase/btr/btr0cur.cc +++ b/storage/innobase/btr/btr0cur.cc @@ -3,7 +3,7 @@ Copyright (c) 1994, 2019, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, Google Inc. Copyright (c) 2012, Facebook Inc. -Copyright (c) 2015, 2023, MariaDB Corporation. +Copyright (c) 2015, 2022, MariaDB Corporation. Portions of this file contain modifications contributed and copyrighted by Google, Inc. Those modifications are gratefully acknowledged and are described @@ -103,14 +103,14 @@ throughput clearly from about 100000. */ #define BTR_CUR_FINE_HISTORY_LENGTH 100000 #ifdef BTR_CUR_HASH_ADAPT -/** Number of searches down the B-tree in btr_cur_t::search_leaf(). */ +/** Number of searches down the B-tree in btr_cur_search_to_nth_level(). */ ib_counter_t<ulint, ib_counter_element_t> btr_cur_n_non_sea; /** Old value of btr_cur_n_non_sea. Copied by srv_refresh_innodb_monitor_stats(). Referenced by srv_printf_innodb_monitor(). */ ulint btr_cur_n_non_sea_old; /** Number of successful adaptive hash index lookups in -btr_cur_t::search_leaf(). */ +btr_cur_search_to_nth_level(). */ ib_counter_t<ulint, ib_counter_element_t> btr_cur_n_sea; /** Old value of btr_cur_n_sea. Copied by srv_refresh_innodb_monitor_stats(). Referenced by @@ -188,106 +188,164 @@ btr_rec_free_externally_stored_fields( /*==================== B-TREE SEARCH =========================*/ /** Latches the leaf page or pages requested. -@param[in] block_savepoint leaf page where the search converged +@param[in] block leaf page where the search converged @param[in] latch_mode BTR_SEARCH_LEAF, ... @param[in] cursor cursor -@param[in] mtr mini-transaction */ +@param[in] mtr mini-transaction +@param[out] latch_leaves latched blocks and savepoints */ void btr_cur_latch_leaves( - ulint block_savepoint, + buf_block_t* block, btr_latch_mode latch_mode, btr_cur_t* cursor, - mtr_t* mtr) + mtr_t* mtr, + btr_latch_leaves_t* latch_leaves) { compile_time_assert(int(MTR_MEMO_PAGE_S_FIX) == int(RW_S_LATCH)); compile_time_assert(int(MTR_MEMO_PAGE_X_FIX) == int(RW_X_LATCH)); compile_time_assert(int(MTR_MEMO_PAGE_SX_FIX) == int(RW_SX_LATCH)); - - buf_block_t* block = mtr->at_savepoint(block_savepoint); - ut_ad(block->page.id().space() == cursor->index()->table->space->id); ut_ad(block->page.in_file()); - ut_ad(mtr->memo_contains_flagged(&cursor->index()->lock, - MTR_MEMO_S_LOCK - | MTR_MEMO_X_LOCK - | MTR_MEMO_SX_LOCK)); + ut_ad(srv_read_only_mode + || mtr->memo_contains_flagged(&cursor->index()->lock, + MTR_MEMO_S_LOCK + | MTR_MEMO_X_LOCK + | MTR_MEMO_SX_LOCK)); + auto rtr_info = cursor->rtr_info; + if (UNIV_LIKELY_NULL(rtr_info) && !cursor->index()->is_spatial()) { + rtr_info = nullptr; + } + const rw_lock_type_t mode = rw_lock_type_t( latch_mode & (RW_X_LATCH | RW_S_LATCH)); static_assert(ulint{RW_S_LATCH} == ulint{BTR_SEARCH_LEAF}, ""); static_assert(ulint{RW_X_LATCH} == ulint{BTR_MODIFY_LEAF}, ""); + static_assert(BTR_SEARCH_LEAF & BTR_SEARCH_TREE, ""); switch (latch_mode) { + default: + break; uint32_t left_page_no; uint32_t right_page_no; - default: - ut_ad(latch_mode == BTR_CONT_MODIFY_TREE); - ut_ad(cursor->index()->is_spatial()); - break; + ulint save; case BTR_SEARCH_LEAF: - s_latch_block: - block->page.lock.s_lock(); -#ifdef BTR_CUR_HASH_ADAPT - btr_search_drop_page_hash_index(block, true); -#endif - mtr->lock_register(block_savepoint, MTR_MEMO_PAGE_S_FIX); - break; + case BTR_MODIFY_LEAF: + case BTR_SEARCH_TREE: + if (UNIV_LIKELY_NULL(rtr_info)) { + rtr_info->tree_savepoints[RTR_MAX_LEVELS] + = mtr->get_savepoint(); + } +latch_block: + if (latch_leaves) { + latch_leaves->savepoints[1] = mtr->get_savepoint(); + latch_leaves->blocks[1] = block; + } + block->page.fix(); + mtr->page_lock(block, mode); + if (UNIV_LIKELY_NULL(rtr_info)) { + rtr_info->tree_blocks[RTR_MAX_LEVELS] = block; + } + return; case BTR_MODIFY_TREE: /* It is exclusive for other operations which calls btr_page_set_prev() */ ut_ad(mtr->memo_contains_flagged(&cursor->index()->lock, MTR_MEMO_X_LOCK | MTR_MEMO_SX_LOCK)); + save = mtr->get_savepoint(); /* x-latch also siblings from left to right */ left_page_no = btr_page_get_prev(block->page.frame); if (left_page_no != FIL_NULL) { - btr_block_get(*cursor->index(), left_page_no, RW_X_LATCH, - true, mtr); + buf_block_t *b = btr_block_get( + *cursor->index(), left_page_no, RW_X_LATCH, + true, mtr); + + if (latch_leaves) { + latch_leaves->savepoints[0] = save; + latch_leaves->blocks[0] = b; + } + + if (UNIV_LIKELY_NULL(rtr_info)) { + rtr_info->tree_savepoints[RTR_MAX_LEVELS] + = save; + rtr_info->tree_blocks[RTR_MAX_LEVELS] = b; + } + + save = mtr->get_savepoint(); } - mtr->x_latch_at_savepoint(block_savepoint, block); + if (latch_leaves) { + latch_leaves->savepoints[1] = mtr->get_savepoint(); + latch_leaves->blocks[1] = block; + } + + block->page.fix(); + block->page.lock.x_lock(); + + mtr->memo_push(block, MTR_MEMO_PAGE_X_FIX); #ifdef BTR_CUR_HASH_ADAPT - btr_search_drop_page_hash_index(block, true); + ut_ad(!btr_search_check_marked_free_index(block)); #endif + if (UNIV_LIKELY_NULL(rtr_info)) { + rtr_info->tree_savepoints[RTR_MAX_LEVELS + 1] = save; + rtr_info->tree_blocks[RTR_MAX_LEVELS + 1] = block; + } + right_page_no = btr_page_get_next(block->page.frame); if (right_page_no != FIL_NULL) { - btr_block_get(*cursor->index(), right_page_no, - RW_X_LATCH, true, mtr); + save = mtr->get_savepoint(); + + buf_block_t* b = btr_block_get( + *cursor->index(), right_page_no, RW_X_LATCH, + true, mtr); + if (latch_leaves) { + latch_leaves->savepoints[2] = save; + latch_leaves->blocks[2] = b; + } + + if (UNIV_LIKELY_NULL(rtr_info)) { + rtr_info->tree_savepoints[RTR_MAX_LEVELS + 2] + = save; + rtr_info->tree_blocks[RTR_MAX_LEVELS + 2] = b; + } } - break; + + return; case BTR_SEARCH_PREV: case BTR_MODIFY_PREV: - static_assert(BTR_MODIFY_PREV & BTR_MODIFY_LEAF, ""); + ut_ad(!rtr_info); static_assert(BTR_SEARCH_PREV & BTR_SEARCH_LEAF, ""); - ut_ad(cursor->index()->is_ibuf() - || mtr->memo_contains_flagged(&cursor->index()->lock, - MTR_MEMO_X_LOCK - | MTR_MEMO_SX_LOCK)); + static_assert(BTR_MODIFY_PREV & BTR_MODIFY_LEAF, ""); + static_assert((BTR_SEARCH_PREV ^ BTR_MODIFY_PREV) + == (RW_S_LATCH ^ RW_X_LATCH), ""); + /* Because we are holding index->lock, no page splits or merges may run concurrently, and we may read FIL_PAGE_PREV from a buffer-fixed, unlatched page. */ left_page_no = btr_page_get_prev(block->page.frame); if (left_page_no != FIL_NULL) { + save = mtr->get_savepoint(); cursor->left_block = btr_block_get( *cursor->index(), left_page_no, mode, true, mtr); + if (latch_leaves) { + latch_leaves->savepoints[0] = save; + latch_leaves->blocks[0] = cursor->left_block; + } } - if (latch_mode == BTR_SEARCH_PREV) { - goto s_latch_block; - } - - /* fall through */ - case BTR_MODIFY_LEAF: - mtr->x_latch_at_savepoint(block_savepoint, block); -#ifdef BTR_CUR_HASH_ADAPT - btr_search_drop_page_hash_index(block, true); -#endif + goto latch_block; + case BTR_CONT_MODIFY_TREE: + ut_ad(cursor->index()->is_spatial()); + return; } + + MY_ASSERT_UNREACHABLE(); } /** Load the instant ALTER TABLE metadata from the clustered index @@ -671,6 +729,98 @@ bool btr_cur_instant_root_init(dict_index_t* index, const page_t* page) return index->n_core_null_bytes > 128; } +/** Optimistically latches the leaf page or pages requested. +@param[in] block guessed buffer block +@param[in] modify_clock modify clock value +@param[in,out] latch_mode BTR_SEARCH_LEAF, ... +@param[in,out] cursor cursor +@param[in] mtr mini-transaction +@return true if success */ +TRANSACTIONAL_TARGET +bool +btr_cur_optimistic_latch_leaves( + buf_block_t* block, + ib_uint64_t modify_clock, + btr_latch_mode* latch_mode, + btr_cur_t* cursor, + mtr_t* mtr) +{ + ut_ad(block->page.buf_fix_count()); + ut_ad(block->page.in_file()); + ut_ad(block->page.frame); + + switch (*latch_mode) { + default: + MY_ASSERT_UNREACHABLE(); + return(false); + case BTR_SEARCH_LEAF: + case BTR_MODIFY_LEAF: + return(buf_page_optimistic_get(*latch_mode, block, + modify_clock, mtr)); + case BTR_SEARCH_PREV: /* btr_pcur_move_backward_from_page() */ + case BTR_MODIFY_PREV: /* Ditto, or ibuf_insert() */ + uint32_t curr_page_no, left_page_no; + { + transactional_shared_lock_guard<block_lock> g{ + block->page.lock}; + if (block->modify_clock != modify_clock) { + return false; + } + curr_page_no = block->page.id().page_no(); + left_page_no = btr_page_get_prev(block->page.frame); + } + + static_assert(BTR_SEARCH_PREV & BTR_SEARCH_LEAF, ""); + static_assert(BTR_MODIFY_PREV & BTR_MODIFY_LEAF, ""); + static_assert((BTR_SEARCH_PREV ^ BTR_MODIFY_PREV) + == (RW_S_LATCH ^ RW_X_LATCH), ""); + + const rw_lock_type_t mode = rw_lock_type_t( + *latch_mode & (RW_X_LATCH | RW_S_LATCH)); + + if (left_page_no != FIL_NULL) { + cursor->left_block = buf_page_get_gen( + page_id_t(cursor->index()->table->space_id, + left_page_no), + cursor->index()->table->space->zip_size(), + mode, nullptr, BUF_GET_POSSIBLY_FREED, mtr); + + if (cursor->left_block + && btr_page_get_next( + cursor->left_block->page.frame) + != curr_page_no) { +release_left_block: + mtr->release_last_page(); + return false; + } + } else { + cursor->left_block = nullptr; + } + + if (buf_page_optimistic_get(mode, block, modify_clock, mtr)) { + if (btr_page_get_prev(block->page.frame) + == left_page_no) { + /* block was already buffer-fixed while + entering the function and + buf_page_optimistic_get() buffer-fixes + it again. */ + ut_ad(2 <= block->page.buf_fix_count()); + *latch_mode = btr_latch_mode(mode); + return(true); + } + + mtr->release_last_page(); + } + + ut_ad(block->page.buf_fix_count()); + if (cursor->left_block) { + goto release_left_block; + } + } + + return false; +} + /** Gets intention in btr_intention_t from latch_mode, and cleares the intention at the latch_mode. @@ -698,6 +848,38 @@ btr_intention_t btr_cur_get_and_clear_intention(btr_latch_mode *latch_mode) return(intention); } +/** +Gets the desired latch type for the root leaf (root page is root leaf) +at the latch mode. +@param latch_mode in: BTR_SEARCH_LEAF, ... +@return latch type */ +static +rw_lock_type_t +btr_cur_latch_for_root_leaf( + ulint latch_mode) +{ + switch (latch_mode) { + case BTR_SEARCH_LEAF: + case BTR_SEARCH_TREE: + case BTR_SEARCH_PREV: + return(RW_S_LATCH); + case BTR_MODIFY_LEAF: + case BTR_MODIFY_TREE: + case BTR_MODIFY_PREV: + return(RW_X_LATCH); + case BTR_CONT_MODIFY_TREE: + case BTR_CONT_SEARCH_TREE: + /* A root page should be latched already, + and don't need to be latched here. + fall through (RW_NO_LATCH) */ + case BTR_NO_LATCHES: + return(RW_NO_LATCH); + } + + MY_ASSERT_UNREACHABLE(); + return(RW_NO_LATCH); /* avoid compiler warnings */ +} + /** @return whether the distance between two records is at most the specified value */ static bool @@ -1015,841 +1197,1223 @@ static ulint btr_node_ptr_max_size(const dict_index_t* index) return rec_max_size; } -/** @return a B-tree search mode suitable for non-leaf pages -@param mode leaf page search mode */ -static inline page_cur_mode_t btr_cur_nonleaf_mode(page_cur_mode_t mode) -{ - if (mode > PAGE_CUR_GE) - { - ut_ad(mode == PAGE_CUR_L || mode == PAGE_CUR_LE); - return mode; - } - if (mode == PAGE_CUR_GE) - return PAGE_CUR_L; - ut_ad(mode == PAGE_CUR_G); - return PAGE_CUR_LE; -} +/********************************************************************//** +Searches an index tree and positions a tree cursor on a given level. +NOTE: n_fields_cmp in tuple must be set so that it cannot be compared +to node pointer page number fields on the upper levels of the tree! +Note that if mode is PAGE_CUR_LE, which is used in inserts, then +cursor->up_match and cursor->low_match both will have sensible values. +If mode is PAGE_CUR_GE, then up_match will a have a sensible value. -dberr_t btr_cur_t::search_leaf(const dtuple_t *tuple, page_cur_mode_t mode, - btr_latch_mode latch_mode, mtr_t *mtr) +If mode is PAGE_CUR_LE , cursor is left at the place where an insert of the +search tuple should be performed in the B-tree. InnoDB does an insert +immediately after the cursor. Thus, the cursor may end up on a user record, +or on a page infimum record. +@param level the tree level of search +@param tuple data tuple; NOTE: n_fields_cmp in tuple must be set so that + it cannot get compared to the node ptr page number field! +@param mode PAGE_CUR_L, ...; NOTE that if the search is made using a + unique prefix of a record, mode should be PAGE_CUR_LE, not + PAGE_CUR_GE, as the latter may end up on the previous page of + the record! Inserts should always be made using PAGE_CUR_LE + to search the position! +@param latch_mode BTR_SEARCH_LEAF, ..., ORed with at most one of BTR_INSERT, + BTR_DELETE_MARK, or BTR_DELETE; + cursor->left_block is used to store a pointer to the left + neighbor page +@param cursor tree cursor; the cursor page is s- or x-latched, but see also + above! +@param mtr mini-transaction +@param autoinc PAGE_ROOT_AUTO_INC to be written (0 if none) +@return DB_SUCCESS on success or error code otherwise */ +TRANSACTIONAL_TARGET +dberr_t btr_cur_search_to_nth_level(ulint level, + const dtuple_t *tuple, + page_cur_mode_t mode, + btr_latch_mode latch_mode, + btr_cur_t *cursor, mtr_t *mtr, + ib_uint64_t autoinc) { - ut_ad(index()->is_btree() || index()->is_ibuf()); - ut_ad(!index()->is_ibuf() || ibuf_inside(mtr)); + page_t* page = NULL; /* remove warning */ + buf_block_t* block; + buf_block_t* guess; + ulint height; + ulint up_match; + ulint up_bytes; + ulint low_match; + ulint low_bytes; + ulint rw_latch; + page_cur_mode_t page_mode; + page_cur_mode_t search_mode = PAGE_CUR_UNSUPP; + ulint buf_mode; + ulint node_ptr_max_size = srv_page_size / 2; + page_cur_t* page_cursor; + btr_op_t btr_op; + ulint root_height = 0; /* remove warning */ + + btr_intention_t lock_intention; + buf_block_t* tree_blocks[BTR_MAX_LEVELS]; + ulint tree_savepoints[BTR_MAX_LEVELS]; + ulint n_blocks = 0; + ulint n_releases = 0; + bool detected_same_key_root = false; + + ulint leftmost_from_level = 0; + buf_block_t** prev_tree_blocks = NULL; + ulint* prev_tree_savepoints = NULL; + ulint prev_n_blocks = 0; + ulint prev_n_releases = 0; + bool need_path = true; + bool rtree_parent_modified = false; + bool mbr_adj = false; + bool found = false; + dict_index_t * const index = cursor->index(); + + DBUG_ENTER("btr_cur_search_to_nth_level"); - buf_block_t *guess; - btr_op_t btr_op; - btr_intention_t lock_intention; - bool detected_same_key_root= false; +#ifdef BTR_CUR_ADAPT + btr_search_t* info; +#endif /* BTR_CUR_ADAPT */ + mem_heap_t* heap = NULL; + rec_offs offsets_[REC_OFFS_NORMAL_SIZE]; + rec_offs* offsets = offsets_; + rec_offs offsets2_[REC_OFFS_NORMAL_SIZE]; + rec_offs* offsets2 = offsets2_; + rec_offs_init(offsets_); + rec_offs_init(offsets2_); + /* Currently, PAGE_CUR_LE is the only search mode used for searches + ending to upper levels */ + + ut_ad(level == 0 || mode == PAGE_CUR_LE + || RTREE_SEARCH_MODE(mode)); + ut_ad(dict_index_check_search_tuple(index, tuple)); + ut_ad(!dict_index_is_ibuf(index) || ibuf_inside(mtr)); + ut_ad(dtuple_check_typed(tuple)); + ut_ad(!(index->type & DICT_FTS)); + ut_ad(index->page != FIL_NULL); - mem_heap_t* heap = NULL; - rec_offs offsets_[REC_OFFS_NORMAL_SIZE]; - rec_offs* offsets = offsets_; - rec_offs offsets2_[REC_OFFS_NORMAL_SIZE]; - rec_offs* offsets2 = offsets2_; - rec_offs_init(offsets_); - rec_offs_init(offsets2_); - - ut_ad(dict_index_check_search_tuple(index(), tuple)); - ut_ad(dtuple_check_typed(tuple)); - ut_ad(index()->page != FIL_NULL); - - MEM_UNDEFINED(&up_match, sizeof up_match); - MEM_UNDEFINED(&up_bytes, sizeof up_bytes); - MEM_UNDEFINED(&low_match, sizeof low_match); - MEM_UNDEFINED(&low_bytes, sizeof low_bytes); - ut_d(up_match= ULINT_UNDEFINED); - ut_d(low_match= ULINT_UNDEFINED); - - ut_ad(!(latch_mode & BTR_ALREADY_S_LATCHED) || - mtr->memo_contains_flagged(&index()->lock, - MTR_MEMO_S_LOCK | MTR_MEMO_SX_LOCK | - MTR_MEMO_X_LOCK)); - - /* These flags are mutually exclusive, they are lumped together - with the latch mode for historical reasons. It's possible for - none of the flags to be set. */ - switch (UNIV_EXPECT(latch_mode & BTR_DELETE, 0)) { - default: - btr_op= BTR_NO_OP; - break; - case BTR_INSERT: - btr_op= (latch_mode & BTR_IGNORE_SEC_UNIQUE) - ? BTR_INSERT_IGNORE_UNIQUE_OP - : BTR_INSERT_OP; - break; - case BTR_DELETE: - btr_op= BTR_DELETE_OP; - ut_a(purge_node); - break; - case BTR_DELETE_MARK: - btr_op= BTR_DELMARK_OP; - break; - } + MEM_UNDEFINED(&cursor->up_match, sizeof cursor->up_match); + MEM_UNDEFINED(&cursor->up_bytes, sizeof cursor->up_bytes); + MEM_UNDEFINED(&cursor->low_match, sizeof cursor->low_match); + MEM_UNDEFINED(&cursor->low_bytes, sizeof cursor->low_bytes); +#ifdef UNIV_DEBUG + cursor->up_match = ULINT_UNDEFINED; + cursor->low_match = ULINT_UNDEFINED; +#endif /* UNIV_DEBUG */ - /* Operations on the insert buffer tree cannot be buffered. */ - ut_ad(btr_op == BTR_NO_OP || !index()->is_ibuf()); - /* Operations on the clustered index cannot be buffered. */ - ut_ad(btr_op == BTR_NO_OP || !index()->is_clust()); - /* Operations on the temporary table(indexes) cannot be buffered. */ - ut_ad(btr_op == BTR_NO_OP || !index()->table->is_temporary()); + const bool latch_by_caller = latch_mode & BTR_ALREADY_S_LATCHED; - const bool latch_by_caller= latch_mode & BTR_ALREADY_S_LATCHED; - lock_intention= btr_cur_get_and_clear_intention(&latch_mode); - latch_mode= BTR_LATCH_MODE_WITHOUT_FLAGS(latch_mode); + ut_ad(!latch_by_caller + || srv_read_only_mode + || mtr->memo_contains_flagged(&index->lock, MTR_MEMO_S_LOCK + | MTR_MEMO_SX_LOCK)); - ut_ad(!latch_by_caller - || latch_mode == BTR_SEARCH_LEAF - || latch_mode == BTR_MODIFY_LEAF - || latch_mode == BTR_MODIFY_TREE - || latch_mode == BTR_MODIFY_ROOT_AND_LEAF); + /* These flags are mutually exclusive, they are lumped together + with the latch mode for historical reasons. It's possible for + none of the flags to be set. */ + switch (UNIV_EXPECT(latch_mode & BTR_DELETE, 0)) { + default: + btr_op = BTR_NO_OP; + break; + case BTR_INSERT: + btr_op = (latch_mode & BTR_IGNORE_SEC_UNIQUE) + ? BTR_INSERT_IGNORE_UNIQUE_OP + : BTR_INSERT_OP; + break; + case BTR_DELETE: + btr_op = BTR_DELETE_OP; + ut_a(cursor->purge_node); + break; + case BTR_DELETE_MARK: + btr_op = BTR_DELMARK_OP; + break; + } + + /* Operations on the insert buffer tree cannot be buffered. */ + ut_ad(btr_op == BTR_NO_OP || !dict_index_is_ibuf(index)); + /* Operations on the clustered index cannot be buffered. */ + ut_ad(btr_op == BTR_NO_OP || !dict_index_is_clust(index)); + /* Operations on the temporary table(indexes) cannot be buffered. */ + ut_ad(btr_op == BTR_NO_OP || !index->table->is_temporary()); + /* Operation on the spatial index cannot be buffered. */ + ut_ad(btr_op == BTR_NO_OP || !dict_index_is_spatial(index)); + + lock_intention = btr_cur_get_and_clear_intention(&latch_mode); + + /* Turn the flags unrelated to the latch mode off. */ + latch_mode = BTR_LATCH_MODE_WITHOUT_FLAGS(latch_mode); + + ut_ad(!latch_by_caller + || latch_mode == BTR_SEARCH_LEAF + || latch_mode == BTR_SEARCH_TREE + || latch_mode == BTR_MODIFY_LEAF); + + ut_ad(autoinc == 0 || dict_index_is_clust(index)); + ut_ad(autoinc == 0 + || latch_mode == BTR_MODIFY_TREE + || latch_mode == BTR_MODIFY_LEAF); + ut_ad(autoinc == 0 || level == 0); + + cursor->flag = BTR_CUR_BINARY; - flag= BTR_CUR_BINARY; #ifndef BTR_CUR_ADAPT - guess= nullptr; + guess = NULL; #else - btr_search_t *info= btr_search_get_info(index()); - guess= info->root_guess; - -# ifdef BTR_CUR_HASH_ADAPT -# ifdef UNIV_SEARCH_PERF_STAT - info->n_searches++; -# endif - /* We do a dirty read of btr_search_enabled below, - and btr_search_guess_on_hash() will have to check it again. */ - if (!btr_search_enabled); - else if (btr_search_guess_on_hash(index(), info, tuple, mode, - latch_mode, this, mtr)) - { - /* Search using the hash index succeeded */ - ut_ad(up_match != ULINT_UNDEFINED || mode != PAGE_CUR_GE); - ut_ad(up_match != ULINT_UNDEFINED || mode != PAGE_CUR_LE); - ut_ad(low_match != ULINT_UNDEFINED || mode != PAGE_CUR_LE); - ++btr_cur_n_sea; + info = btr_search_get_info(index); + guess = info->root_guess; - return DB_SUCCESS; - } - else - ++btr_cur_n_non_sea; +#ifdef BTR_CUR_HASH_ADAPT + +# ifdef UNIV_SEARCH_PERF_STAT + info->n_searches++; # endif -#endif + /* We do a dirty read of btr_search_enabled below, + and btr_search_guess_on_hash() will have to check it again. */ + if (!btr_search_enabled) { + } else if (autoinc == 0 + && latch_mode <= BTR_MODIFY_LEAF +# ifdef PAGE_CUR_LE_OR_EXTENDS + && mode != PAGE_CUR_LE_OR_EXTENDS +# endif /* PAGE_CUR_LE_OR_EXTENDS */ + && info->last_hash_succ + && !(tuple->info_bits & REC_INFO_MIN_REC_FLAG) + && !index->is_spatial() && !index->table->is_temporary() + && btr_search_guess_on_hash(index, info, tuple, mode, + latch_mode, cursor, mtr)) { + + /* Search using the hash index succeeded */ + + ut_ad(cursor->up_match != ULINT_UNDEFINED + || mode != PAGE_CUR_GE); + ut_ad(cursor->up_match != ULINT_UNDEFINED + || mode != PAGE_CUR_LE); + ut_ad(cursor->low_match != ULINT_UNDEFINED + || mode != PAGE_CUR_LE); + ++btr_cur_n_sea; + + DBUG_RETURN(DB_SUCCESS); + } else { + ++btr_cur_n_non_sea; + } +# endif /* BTR_CUR_HASH_ADAPT */ +#endif /* BTR_CUR_ADAPT */ - /* If the hash search did not succeed, do binary search down the - tree */ + /* If the hash search did not succeed, do binary search down the + tree */ - /* Store the position of the tree latch we push to mtr so that we - know how to release it when we have latched leaf node(s) */ + /* Store the position of the tree latch we push to mtr so that we + know how to release it when we have latched leaf node(s) */ - const ulint savepoint = mtr->get_savepoint(); + ulint savepoint = mtr_set_savepoint(mtr); - ulint node_ptr_max_size= 0; - rw_lock_type_t rw_latch= RW_S_LATCH; + rw_lock_type_t upper_rw_latch; - switch (latch_mode) { - case BTR_MODIFY_TREE: - rw_latch= RW_X_LATCH; - node_ptr_max_size= btr_node_ptr_max_size(index()); - if (latch_by_caller) - { - ut_ad(mtr->memo_contains_flagged(&index()->lock, MTR_MEMO_X_LOCK)); - break; - } - if (lock_intention == BTR_INTENTION_DELETE && buf_pool.n_pend_reads && - trx_sys.history_size_approx() > BTR_CUR_FINE_HISTORY_LENGTH) - /* Most delete-intended operations are due to the purge of history. - Prioritize them when the history list is growing huge. */ - mtr_x_lock_index(index(), mtr); - else - mtr_sx_lock_index(index(), mtr); - break; -#ifdef UNIV_DEBUG - case BTR_CONT_MODIFY_TREE: - ut_ad("invalid mode" == 0); - break; -#endif - case BTR_MODIFY_ROOT_AND_LEAF: - rw_latch= RW_SX_LATCH; - /* fall through */ - default: - if (!latch_by_caller) - mtr_s_lock_index(index(), mtr); - } + switch (latch_mode) { + case BTR_MODIFY_TREE: + /* Most of delete-intended operations are purging. + Free blocks and read IO bandwidth should be prior + for them, when the history list is glowing huge. */ + if (lock_intention == BTR_INTENTION_DELETE + && buf_pool.n_pend_reads + && trx_sys.history_size_approx() + > BTR_CUR_FINE_HISTORY_LENGTH) { +x_latch_index: + mtr_x_lock_index(index, mtr); + } else if (index->is_spatial() + && lock_intention <= BTR_INTENTION_BOTH) { + /* X lock the if there is possibility of + pessimistic delete on spatial index. As we could + lock upward for the tree */ + goto x_latch_index; + } else { + mtr_sx_lock_index(index, mtr); + } + upper_rw_latch = RW_X_LATCH; + break; + case BTR_CONT_MODIFY_TREE: + ut_ad(srv_read_only_mode + || mtr->memo_contains_flagged(&index->lock, + MTR_MEMO_X_LOCK + | MTR_MEMO_SX_LOCK)); + if (index->is_spatial()) { + /* If we are about to locate parent page for split + and/or merge operation for R-Tree index, X latch + the parent */ + upper_rw_latch = RW_X_LATCH; + break; + } + /* fall through */ + case BTR_CONT_SEARCH_TREE: + /* Do nothing */ + ut_ad(srv_read_only_mode + || mtr->memo_contains_flagged(&index->lock, + MTR_MEMO_X_LOCK + | MTR_MEMO_SX_LOCK)); + upper_rw_latch = RW_NO_LATCH; + break; + default: + if (!srv_read_only_mode) { + if (!latch_by_caller) { + ut_ad(latch_mode != BTR_SEARCH_TREE); + mtr_s_lock_index(index, mtr); + } + upper_rw_latch = RW_S_LATCH; + } else { + upper_rw_latch = RW_NO_LATCH; + } + } + const rw_lock_type_t root_leaf_rw_latch = btr_cur_latch_for_root_leaf( + latch_mode); - const ulint zip_size= index()->table->space->zip_size(); + page_cursor = btr_cur_get_page_cur(cursor); + page_cursor->index = index; - /* Start with the root page. */ - page_id_t page_id(index()->table->space_id, index()->page); + const ulint zip_size = index->table->space->zip_size(); - const page_cur_mode_t page_mode= btr_cur_nonleaf_mode(mode); - ulint height= ULINT_UNDEFINED; - up_match= 0; - up_bytes= 0; - low_match= 0; - low_bytes= 0; - ulint buf_mode= BUF_GET; - search_loop: - dberr_t err; - auto block_savepoint= mtr->get_savepoint(); - buf_block_t *block= - buf_page_get_gen(page_id, zip_size, rw_latch, guess, buf_mode, mtr, - &err, height == 0 && !index()->is_clust()); - if (!block) - { - switch (err) { - case DB_DECRYPTION_FAILED: - btr_decryption_failed(*index()); - /* fall through */ - default: - func_exit: - if (UNIV_LIKELY_NULL(heap)) - mem_heap_free(heap); - return err; - case DB_SUCCESS: - /* This must be a search to perform an insert, delete mark, or delete; - try using the change buffer */ - ut_ad(height == 0); - ut_ad(thr); - break; - } + /* Start with the root page. */ + page_id_t page_id(index->table->space_id, index->page); - switch (btr_op) { - default: - MY_ASSERT_UNREACHABLE(); - break; - case BTR_INSERT_OP: - case BTR_INSERT_IGNORE_UNIQUE_OP: - ut_ad(buf_mode == BUF_GET_IF_IN_POOL); + if (root_leaf_rw_latch == RW_X_LATCH) { + node_ptr_max_size = btr_node_ptr_max_size(index); + } - if (ibuf_insert(IBUF_OP_INSERT, tuple, index(), page_id, zip_size, thr)) - { - flag= BTR_CUR_INSERT_TO_IBUF; - goto func_exit; - } - break; + up_match = 0; + up_bytes = 0; + low_match = 0; + low_bytes = 0; - case BTR_DELMARK_OP: - ut_ad(buf_mode == BUF_GET_IF_IN_POOL); + height = ULINT_UNDEFINED; - if (ibuf_insert(IBUF_OP_DELETE_MARK, tuple, - index(), page_id, zip_size, thr)) - { - flag = BTR_CUR_DEL_MARK_IBUF; - goto func_exit; - } + /* We use these modified search modes on non-leaf levels of the + B-tree. These let us end up in the right B-tree leaf. In that leaf + we use the original search mode. */ - break; + switch (mode) { + case PAGE_CUR_GE: + page_mode = PAGE_CUR_L; + break; + case PAGE_CUR_G: + page_mode = PAGE_CUR_LE; + break; + default: +#ifdef PAGE_CUR_LE_OR_EXTENDS + ut_ad(mode == PAGE_CUR_L || mode == PAGE_CUR_LE + || RTREE_SEARCH_MODE(mode) + || mode == PAGE_CUR_LE_OR_EXTENDS); +#else /* PAGE_CUR_LE_OR_EXTENDS */ + ut_ad(mode == PAGE_CUR_L || mode == PAGE_CUR_LE + || RTREE_SEARCH_MODE(mode)); +#endif /* PAGE_CUR_LE_OR_EXTENDS */ + page_mode = mode; + break; + } - case BTR_DELETE_OP: - ut_ad(buf_mode == BUF_GET_IF_IN_POOL_OR_WATCH); - auto& chain = buf_pool.page_hash.cell_get(page_id.fold()); - - if (!row_purge_poss_sec(purge_node, index(), tuple)) - /* The record cannot be purged yet. */ - flag= BTR_CUR_DELETE_REF; - else if (ibuf_insert(IBUF_OP_DELETE, tuple, index(), - page_id, zip_size, thr)) - /* The purge was buffered. */ - flag= BTR_CUR_DELETE_IBUF; - else - { - /* The purge could not be buffered. */ - buf_pool.watch_unset(page_id, chain); - break; - } + /* Loop and search until we arrive at the desired level */ + btr_latch_leaves_t latch_leaves = {{NULL, NULL, NULL}, {0, 0, 0}}; - buf_pool.watch_unset(page_id, chain); - goto func_exit; - } +search_loop: + buf_mode = BUF_GET; + rw_latch = RW_NO_LATCH; + rtree_parent_modified = false; + + if (height != 0) { + /* We are about to fetch the root or a non-leaf page. */ + if ((latch_mode != BTR_MODIFY_TREE || height == level) + && !prev_tree_blocks) { + /* If doesn't have SX or X latch of index, + each pages should be latched before reading. */ + if (height == ULINT_UNDEFINED + && upper_rw_latch == RW_S_LATCH + && autoinc) { + /* needs sx-latch of root page + for writing PAGE_ROOT_AUTO_INC */ + rw_latch = RW_SX_LATCH; + } else { + rw_latch = upper_rw_latch; + } + } + } else if (latch_mode <= BTR_MODIFY_LEAF) { + rw_latch = latch_mode; - /* Change buffering did not succeed, we must read the page. */ - buf_mode= BUF_GET; - goto search_loop; - } + if (btr_op != BTR_NO_OP + && ibuf_should_try(index, btr_op != BTR_INSERT_OP)) { - if (!!page_is_comp(block->page.frame) != index()->table->not_redundant() || - btr_page_get_index_id(block->page.frame) != index()->id || - fil_page_get_type(block->page.frame) == FIL_PAGE_RTREE || - !fil_page_index_page_check(block->page.frame)) - { - corrupted: - ut_ad("corrupted" == 0); // FIXME: remove this - err= DB_CORRUPTION; - goto func_exit; - } + /* Try to buffer the operation if the leaf + page is not in the buffer pool. */ + + buf_mode = btr_op == BTR_DELETE_OP + ? BUF_GET_IF_IN_POOL_OR_WATCH + : BUF_GET_IF_IN_POOL; + } + } + +retry_page_get: + ut_ad(n_blocks < BTR_MAX_LEVELS); + tree_savepoints[n_blocks] = mtr_set_savepoint(mtr); + dberr_t err; + block = buf_page_get_gen(page_id, zip_size, rw_latch, guess, + buf_mode, mtr, &err, + height == 0 && !index->is_clust()); + if (!block) { + switch (err) { + case DB_SUCCESS: + /* change buffering */ + break; + case DB_DECRYPTION_FAILED: + btr_decryption_failed(*index); + /* fall through */ + default: + goto func_exit; + } + + /* This must be a search to perform an insert/delete + mark/ delete; try using the insert/delete buffer */ + + ut_ad(height == 0); + ut_ad(cursor->thr); + + switch (btr_op) { + default: + MY_ASSERT_UNREACHABLE(); + break; + case BTR_INSERT_OP: + case BTR_INSERT_IGNORE_UNIQUE_OP: + ut_ad(buf_mode == BUF_GET_IF_IN_POOL); + ut_ad(!dict_index_is_spatial(index)); + + if (ibuf_insert(IBUF_OP_INSERT, tuple, index, + page_id, zip_size, cursor->thr)) { + + cursor->flag = BTR_CUR_INSERT_TO_IBUF; + + goto func_exit; + } + break; + + case BTR_DELMARK_OP: + ut_ad(buf_mode == BUF_GET_IF_IN_POOL); + ut_ad(!dict_index_is_spatial(index)); + + if (ibuf_insert(IBUF_OP_DELETE_MARK, tuple, + index, page_id, zip_size, + cursor->thr)) { + + cursor->flag = BTR_CUR_DEL_MARK_IBUF; + + goto func_exit; + } + + break; + + case BTR_DELETE_OP: + ut_ad(buf_mode == BUF_GET_IF_IN_POOL_OR_WATCH); + ut_ad(!dict_index_is_spatial(index)); + auto& chain = buf_pool.page_hash.cell_get( + page_id.fold()); + + if (!row_purge_poss_sec(cursor->purge_node, + index, tuple)) { + + /* The record cannot be purged yet. */ + cursor->flag = BTR_CUR_DELETE_REF; + } else if (ibuf_insert(IBUF_OP_DELETE, tuple, + index, page_id, zip_size, + cursor->thr)) { + + /* The purge was buffered. */ + cursor->flag = BTR_CUR_DELETE_IBUF; + } else { + /* The purge could not be buffered. */ + buf_pool.watch_unset(page_id, chain); + break; + } + + buf_pool.watch_unset(page_id, chain); + goto func_exit; + } + + /* Insert to the insert/delete buffer did not succeed, we + must read the page from disk. */ + + buf_mode = BUF_GET; + + goto retry_page_get; + } + + tree_blocks[n_blocks] = block; + + if (height && prev_tree_blocks) { + /* also latch left sibling */ + ut_ad(rw_latch == RW_NO_LATCH); + + rw_latch = upper_rw_latch; + + /* Because we are holding index->lock, no page splits + or merges may run concurrently, and we may read + FIL_PAGE_PREV from a buffer-fixed, unlatched page. */ + uint32_t left_page_no = btr_page_get_prev(block->page.frame); + + if (left_page_no != FIL_NULL) { + ut_ad(prev_n_blocks < leftmost_from_level); + + prev_tree_savepoints[prev_n_blocks] + = mtr_set_savepoint(mtr); + buf_block_t* get_block = buf_page_get_gen( + page_id_t(page_id.space(), left_page_no), + zip_size, rw_latch, NULL, buf_mode, + mtr, &err); + if (!get_block) { + if (err == DB_DECRYPTION_FAILED) { + btr_decryption_failed(*index); + } + goto func_exit; + } + + prev_tree_blocks[prev_n_blocks++] = get_block; + /* BTR_MODIFY_TREE doesn't update prev/next_page_no, + without their parent page's lock. So, not needed to + retry here, because we have the parent page's lock. */ + } + + mtr->s_lock_register(tree_savepoints[n_blocks]); + block->page.lock.s_lock(); + } + + page = buf_block_get_frame(block); + + if (height == ULINT_UNDEFINED + && page_is_leaf(page) + && rw_latch != RW_NO_LATCH + && rw_latch != root_leaf_rw_latch) { + /* The root page is also a leaf page (root_leaf). + We should reacquire the page, because the root page + is latched differently from leaf pages. */ + ut_ad(root_leaf_rw_latch != RW_NO_LATCH); + ut_ad(rw_latch == RW_S_LATCH || rw_latch == RW_SX_LATCH); + ut_ad(rw_latch == RW_S_LATCH || autoinc); + ut_ad(!autoinc || root_leaf_rw_latch == RW_X_LATCH); + + ut_ad(n_blocks == 0); + mtr_release_block_at_savepoint( + mtr, tree_savepoints[n_blocks], + tree_blocks[n_blocks]); + + upper_rw_latch = root_leaf_rw_latch; + goto search_loop; + } - page_cur.block= block; - ut_ad(block == mtr->at_savepoint(block_savepoint)); - const page_t *page= buf_block_get_frame(block); #ifdef UNIV_ZIP_DEBUG - if (rw_latch != RW_NO_LATCH) - { - const page_zip_des_t *page_zip= buf_block_get_page_zip(block); - ut_a(!page_zip || page_zip_validate(page_zip, page, index())); - } + if (rw_latch != RW_NO_LATCH) { + const page_zip_des_t* page_zip + = buf_block_get_page_zip(block); + ut_a(!page_zip || page_zip_validate(page_zip, page, index)); + } #endif /* UNIV_ZIP_DEBUG */ - const uint32_t page_level= btr_page_get_level(page); - if (height == ULINT_UNDEFINED) - { - /* We are in the B-tree index root page. */ + ut_ad(fil_page_index_page_check(page)); + ut_ad(index->id == btr_page_get_index_id(page)); + + if (height == ULINT_UNDEFINED) { + /* We are in the root node */ + + height = btr_page_get_level(page); + root_height = height; + cursor->tree_height = root_height + 1; + + if (dict_index_is_spatial(index)) { + ut_ad(cursor->rtr_info); + + /* If SSN in memory is not initialized, fetch + it from root page */ + if (!rtr_get_current_ssn_id(index)) { + /* FIXME: do this in dict_load_table_one() */ + index->set_ssn(page_get_ssn_id(page) + 1); + } + + /* Save the MBR */ + cursor->rtr_info->thr = cursor->thr; + rtr_get_mbr_from_tuple(tuple, &cursor->rtr_info->mbr); + } + #ifdef BTR_CUR_ADAPT - info->root_guess= block; + info->root_guess = block; #endif - height= page_level; - tree_height= height + 1; + } - if (!height) - { - /* The root page is also a leaf page. - We may have to reacquire the page latch in a different mode. */ - switch (rw_latch) { - case RW_S_LATCH: - if ((latch_mode & ~12) != RW_S_LATCH) - { - rw_latch= rw_lock_type_t(latch_mode & ~12); - ut_ad(rw_latch == RW_X_LATCH || rw_latch == RW_SX_LATCH); - goto relatch; - } - if (latch_mode != BTR_MODIFY_PREV) - { - if (!latch_by_caller) - /* Release the tree s-latch */ - mtr->rollback_to_savepoint(savepoint, savepoint + 1); - goto reached_latched_leaf; + if (height == 0) { + if (rw_latch == RW_NO_LATCH) { + btr_cur_latch_leaves(block, latch_mode, cursor, mtr, + &latch_leaves); + } + + switch (latch_mode) { + case BTR_MODIFY_TREE: + case BTR_CONT_MODIFY_TREE: + case BTR_CONT_SEARCH_TREE: + break; + default: + if (!latch_by_caller + && !srv_read_only_mode) { + /* Release the tree s-latch */ + mtr_release_s_latch_at_savepoint( + mtr, savepoint, + &index->lock); + } + + /* release upper blocks */ + if (prev_tree_blocks) { + ut_ad(!autoinc); + for (; + prev_n_releases < prev_n_blocks; + prev_n_releases++) { + mtr_release_block_at_savepoint( + mtr, + prev_tree_savepoints[ + prev_n_releases], + prev_tree_blocks[ + prev_n_releases]); + } + } + + for (; n_releases < n_blocks; n_releases++) { + if (n_releases == 0 + && (autoinc)) { + /* keep the root page latch */ + ut_ad(mtr->memo_contains_flagged( + tree_blocks[n_releases], + MTR_MEMO_PAGE_SX_FIX + | MTR_MEMO_PAGE_X_FIX)); + continue; + } + + mtr_release_block_at_savepoint( + mtr, tree_savepoints[n_releases], + tree_blocks[n_releases]); + } + } + + page_mode = mode; + } + + if (dict_index_is_spatial(index)) { + /* Remember the page search mode */ + search_mode = page_mode; + + /* Some adjustment on search mode, when the + page search mode is PAGE_CUR_RTREE_LOCATE + or PAGE_CUR_RTREE_INSERT, as we are searching + with MBRs. When it is not the target level, we + should search all sub-trees that "CONTAIN" the + search range/MBR. When it is at the target + level, the search becomes PAGE_CUR_LE */ + if (page_mode == PAGE_CUR_RTREE_LOCATE + && level == height) { + if (level == 0) { + page_mode = PAGE_CUR_LE; + } else { + page_mode = PAGE_CUR_RTREE_GET_FATHER; + } + } + + if (page_mode == PAGE_CUR_RTREE_INSERT) { + page_mode = (level == height) + ? PAGE_CUR_LE + : PAGE_CUR_RTREE_INSERT; + + ut_ad(!page_is_leaf(page) || page_mode == PAGE_CUR_LE); + } + + /* "need_path" indicates if we need to tracking the parent + pages, if it is not spatial comparison, then no need to + track it */ + if (page_mode < PAGE_CUR_CONTAIN) { + need_path = false; + } + + up_match = 0; + low_match = 0; + + if (latch_mode == BTR_MODIFY_TREE + || latch_mode == BTR_CONT_MODIFY_TREE + || latch_mode == BTR_CONT_SEARCH_TREE) { + /* Tree are locked, no need for Page Lock to protect + the "path" */ + cursor->rtr_info->need_page_lock = false; + } } - /* fall through */ - case RW_SX_LATCH: - ut_ad(rw_latch == RW_S_LATCH || - latch_mode == BTR_MODIFY_ROOT_AND_LEAF); - rw_latch= RW_X_LATCH; - relatch: - mtr->rollback_to_savepoint(block_savepoint); - height= ULINT_UNDEFINED; - goto search_loop; - case RW_X_LATCH: - if (latch_mode == BTR_MODIFY_TREE) - goto reached_index_root_and_leaf; - goto reached_root_and_leaf; - case RW_NO_LATCH: - ut_ad(mtr->memo_contains_flagged(&index()->lock, MTR_MEMO_X_LOCK)); - } - goto reached_leaf; - } - } - else if (UNIV_UNLIKELY(height != page_level)) - goto corrupted; - else - switch (latch_mode) { - case BTR_MODIFY_TREE: - break; - case BTR_MODIFY_ROOT_AND_LEAF: - ut_ad((mtr->at_savepoint(block_savepoint - 1)->page.id().page_no() == - index()->page) == (tree_height <= height + 2)); - if (tree_height <= height + 2) - /* Retain the root page latch. */ - break; - /* fall through */ - default: - /* Release the parent page latch. */ - ut_ad(block_savepoint > savepoint); - mtr->rollback_to_savepoint(block_savepoint - 1, block_savepoint); - block_savepoint--; - } - if (!height) - { - reached_leaf: - /* We reached the leaf level. */ - ut_ad(block == mtr->at_savepoint(block_savepoint)); + page_cursor->block = block; - if (latch_mode == BTR_MODIFY_ROOT_AND_LEAF) - { - reached_root_and_leaf: - if (!latch_by_caller) - mtr->rollback_to_savepoint(savepoint, savepoint + 1); - reached_index_root_and_leaf: - ut_ad(rw_latch == RW_X_LATCH); -#ifdef BTR_CUR_HASH_ADAPT - btr_search_drop_page_hash_index(block, true); -#endif - if (page_cur_search_with_match(tuple, mode, &up_match, &low_match, - &page_cur, nullptr)) - goto corrupted; - ut_ad(up_match != ULINT_UNDEFINED || mode != PAGE_CUR_GE); - ut_ad(up_match != ULINT_UNDEFINED || mode != PAGE_CUR_LE); - ut_ad(low_match != ULINT_UNDEFINED || mode != PAGE_CUR_LE); - goto func_exit; - } + if (dict_index_is_spatial(index) && page_mode >= PAGE_CUR_CONTAIN) { + ut_ad(need_path); + found = rtr_cur_search_with_match( + block, index, tuple, page_mode, page_cursor, + cursor->rtr_info); - if (rw_latch == RW_NO_LATCH) - btr_cur_latch_leaves(block_savepoint, latch_mode, this, mtr); + /* Need to use BTR_MODIFY_TREE to do the MBR adjustment */ + if (search_mode == PAGE_CUR_RTREE_INSERT + && cursor->rtr_info->mbr_adj) { + static_assert(BTR_MODIFY_TREE + == (8 | BTR_MODIFY_LEAF), ""); - if (latch_mode != BTR_MODIFY_TREE) - { - if (!latch_by_caller) - { - /* Release the tree s-latch */ - block_savepoint--; - mtr->rollback_to_savepoint(savepoint, savepoint + 1); - } - /* release upper blocks */ - if (savepoint < block_savepoint) - mtr->rollback_to_savepoint(savepoint, block_savepoint); - } - else - ut_ad(rw_latch == RW_NO_LATCH); + if (!(latch_mode & 8)) { + /* Parent MBR needs updated, should retry + with BTR_MODIFY_TREE */ + goto func_exit; + } + + rtree_parent_modified = true; + cursor->rtr_info->mbr_adj = false; + mbr_adj = true; + } - reached_latched_leaf: + if (found && page_mode == PAGE_CUR_RTREE_GET_FATHER) { + cursor->low_match = + DICT_INDEX_SPATIAL_NODEPTR_SIZE + 1; + } #ifdef BTR_CUR_HASH_ADAPT - if (btr_search_enabled && !(tuple->info_bits & REC_INFO_MIN_REC_FLAG)) - { - if (page_cur_search_with_match_bytes(tuple, mode, - &up_match, &up_bytes, - &low_match, &low_bytes, &page_cur)) - goto corrupted; - } - else + } else if (height == 0 && btr_search_enabled + && !(tuple->info_bits & REC_INFO_MIN_REC_FLAG) + && index->is_btree()) { + /* The adaptive hash index is only used when searching + for leaf pages (height==0), but not in r-trees. + We only need the byte prefix comparison for the purpose + of updating the adaptive hash index. */ + if (page_cur_search_with_match_bytes( + tuple, page_mode, &up_match, &up_bytes, + &low_match, &low_bytes, page_cursor)) { + err = DB_CORRUPTION; + goto func_exit; + } #endif /* BTR_CUR_HASH_ADAPT */ - if (page_cur_search_with_match(tuple, mode, &up_match, &low_match, - &page_cur, nullptr)) - goto corrupted; + } else { + /* Search for complete index fields. */ + up_bytes = low_bytes = 0; + if (page_cur_search_with_match( + tuple, page_mode, &up_match, + &low_match, page_cursor, + need_path ? cursor->rtr_info : nullptr)) { + err = DB_CORRUPTION; + goto func_exit; + } + } - ut_ad(up_match != ULINT_UNDEFINED || mode != PAGE_CUR_GE); - ut_ad(up_match != ULINT_UNDEFINED || mode != PAGE_CUR_LE); - ut_ad(low_match != ULINT_UNDEFINED || mode != PAGE_CUR_LE); + /* If this is the desired level, leave the loop */ -#ifdef BTR_CUR_HASH_ADAPT - /* We do a dirty read of btr_search_enabled here. We will - properly check btr_search_enabled again in - btr_search_build_page_hash_index() before building a page hash - index, while holding search latch. */ - if (!btr_search_enabled); - else if (tuple->info_bits & REC_INFO_MIN_REC_FLAG) - /* This may be a search tuple for btr_pcur_t::restore_position(). */ - ut_ad(tuple->is_metadata() || - (tuple->is_metadata(tuple->info_bits ^ REC_STATUS_INSTANT))); - else if (index()->table->is_temporary()); - else if (!rec_is_metadata(page_cur.rec, *index())) - btr_search_info_update(index(), this); -#endif /* BTR_CUR_HASH_ADAPT */ + ut_ad(height == btr_page_get_level(page_cur_get_page(page_cursor))); - goto func_exit; - } + /* Add Predicate lock if it is serializable isolation + and only if it is in the search case */ + if (dict_index_is_spatial(index) + && cursor->rtr_info->need_prdt_lock + && mode != PAGE_CUR_RTREE_INSERT + && mode != PAGE_CUR_RTREE_LOCATE + && mode >= PAGE_CUR_CONTAIN) { + lock_prdt_t prdt; - guess= nullptr; - if (page_cur_search_with_match(tuple, page_mode, &up_match, &low_match, - &page_cur, nullptr)) - goto corrupted; - offsets= rec_get_offsets(page_cur.rec, index(), offsets, 0, ULINT_UNDEFINED, - &heap); + { + trx_t* trx = thr_get_trx(cursor->thr); + TMLockTrxGuard g{TMLockTrxArgs(*trx)}; + lock_init_prdt_from_mbr( + &prdt, &cursor->rtr_info->mbr, mode, + trx->lock.lock_heap); + } - ut_ad(block == mtr->at_savepoint(block_savepoint)); + if (rw_latch == RW_NO_LATCH && height != 0) { + block->page.lock.s_lock(); + } - switch (latch_mode) { - default: - break; - case BTR_MODIFY_TREE: - if (btr_cur_need_opposite_intention(page, lock_intention, page_cur.rec)) - { - /* If the rec is the first or last in the page for pessimistic - delete intention, it might cause node_ptr insert for the upper - level. We should change the intention and retry. */ - need_opposite_intention: - return pessimistic_search_leaf(tuple, mode, mtr); - } + lock_prdt_lock(block, &prdt, index, LOCK_S, + LOCK_PREDICATE, cursor->thr); - if (detected_same_key_root || lock_intention != BTR_INTENTION_BOTH || - index()->is_unique() || - (up_match <= rec_offs_n_fields(offsets) && - low_match <= rec_offs_n_fields(offsets))) - break; + if (rw_latch == RW_NO_LATCH && height != 0) { + block->page.lock.s_unlock(); + } + } - /* If the first or the last record of the page or the same key - value to the first record or last record, then another page might - be chosen when BTR_CONT_MODIFY_TREE. So, the parent page should - not released to avoiding deadlock with blocking the another search - with the same key value. */ - const rec_t *first= page_rec_get_next_const(page_get_infimum_rec(page)); - ulint matched_fields; + if (level != height) { - if (UNIV_UNLIKELY(!first)) - goto corrupted; - if (page_cur.rec == first || page_rec_is_last(page_cur.rec, page)) - { - same_key_root: - detected_same_key_root= true; - break; - } + const rec_t* node_ptr; + ut_ad(height > 0); - matched_fields= 0; - offsets2= rec_get_offsets(first, index(), offsets2, 0, ULINT_UNDEFINED, - &heap); - cmp_rec_rec(page_cur.rec, first, offsets, offsets2, index(), false, - &matched_fields); - if (matched_fields >= rec_offs_n_fields(offsets) - 1) - goto same_key_root; - if (const rec_t* last= - page_rec_get_prev_const(page_get_supremum_rec(page))) - { - matched_fields= 0; - offsets2= rec_get_offsets(last, index(), offsets2, 0, ULINT_UNDEFINED, - &heap); - cmp_rec_rec(page_cur.rec, last, offsets, offsets2, index(), false, - &matched_fields); - if (matched_fields >= rec_offs_n_fields(offsets) - 1) - goto same_key_root; - } - else - goto corrupted; + height--; + guess = NULL; - /* Release the non-root parent page unless it may need to be modified. */ - if (tree_height > height + 1 && - !btr_cur_will_modify_tree(index(), page, lock_intention, - page_cur.rec, node_ptr_max_size, - zip_size, mtr)) - { - mtr->rollback_to_savepoint(block_savepoint - 1, block_savepoint); - block_savepoint--; - } - } + node_ptr = page_cur_get_rec(page_cursor); - /* Go to the child node */ - page_id.set_page_no(btr_node_ptr_get_child_page_no(page_cur.rec, offsets)); + offsets = rec_get_offsets(node_ptr, index, offsets, 0, + ULINT_UNDEFINED, &heap); - if (!--height) - { - /* We are about to access the leaf level. */ - rw_latch= RW_NO_LATCH; + /* If the rec is the first or last in the page for + pessimistic delete intention, it might cause node_ptr insert + for the upper level. We should change the intention and retry. + */ + if (latch_mode == BTR_MODIFY_TREE + && btr_cur_need_opposite_intention( + page, lock_intention, node_ptr)) { - switch (latch_mode) { - case BTR_MODIFY_ROOT_AND_LEAF: - rw_latch= RW_X_LATCH; - break; - default: - break; - case BTR_MODIFY_PREV: - /* This is almost exclusively for ibuf_insert(), but also for - btr_pcur_move_to_prev(); the latter is not exercised by mtr */ - case BTR_SEARCH_PREV: - if (page_has_prev(page) && page_rec_is_first(page_cur.rec, page)) - { - ut_ad(block_savepoint + 1 == mtr->get_savepoint()); - /* Latch the previous page if the node pointer is the leftmost - of the current page. */ - buf_block_t *left= btr_block_get(*index(), btr_page_get_prev(page), - RW_NO_LATCH, false, mtr, &err); - if (!left) - goto func_exit; - static_assert(mtr_memo_type_t(BTR_MODIFY_PREV & ~4) == - MTR_MEMO_PAGE_X_FIX, ""); - static_assert(mtr_memo_type_t(BTR_SEARCH_PREV & ~4) == - MTR_MEMO_PAGE_S_FIX, ""); - mtr->lock_register(block_savepoint + 1, - mtr_memo_type_t(latch_mode & ~4)); - /* Because we are violating the latching order here, we will - have to temporarily release the right page latch if the left - page latch cannot be acquired without waiting. Concurrent page - splits or merges are impossible because we are holding a latch - on the parent of these sibling pages. */ - if (latch_mode == BTR_MODIFY_PREV) - { - if (!left->page.lock.x_lock_try()) - { - block->page.lock.x_unlock(); - left->page.lock.x_lock(); - } - } - else if (!left->page.lock.s_lock_try()) - { - block->page.lock.s_unlock(); - left->page.lock.s_lock(); - } -#ifdef BTR_CUR_HASH_ADAPT - btr_search_drop_page_hash_index(left, true); +need_opposite_intention: + ut_ad(upper_rw_latch == RW_X_LATCH); + + if (n_releases > 0) { + /* release root block */ + mtr_release_block_at_savepoint( + mtr, tree_savepoints[0], + tree_blocks[0]); + } + + /* release all blocks */ + for (; n_releases <= n_blocks; n_releases++) { + mtr_release_block_at_savepoint( + mtr, tree_savepoints[n_releases], + tree_blocks[n_releases]); + } + + lock_intention = BTR_INTENTION_BOTH; + + page_id.set_page_no(index->page); + up_match = 0; + low_match = 0; + height = ULINT_UNDEFINED; + + n_blocks = 0; + n_releases = 0; + + goto search_loop; + } + + if (dict_index_is_spatial(index)) { + if (page_rec_is_supremum(node_ptr)) { + cursor->low_match = 0; + cursor->up_match = 0; + goto func_exit; + } + + /* If we are doing insertion or record locating, + remember the tree nodes we visited */ + if (page_mode == PAGE_CUR_RTREE_INSERT + || (search_mode == PAGE_CUR_RTREE_LOCATE + && (latch_mode != BTR_MODIFY_LEAF))) { + bool add_latch = false; + + if (latch_mode == BTR_MODIFY_TREE + && rw_latch == RW_NO_LATCH) { + ut_ad(mtr->memo_contains_flagged( + &index->lock, MTR_MEMO_X_LOCK + | MTR_MEMO_SX_LOCK)); + block->page.lock.s_lock(); + add_latch = true; + } + + /* Store the parent cursor location */ +#ifdef UNIV_DEBUG + ulint num_stored = rtr_store_parent_path( + block, cursor, latch_mode, + height + 1, mtr); +#else + rtr_store_parent_path( + block, cursor, latch_mode, + height + 1, mtr); #endif - } - break; - case BTR_MODIFY_LEAF: - case BTR_SEARCH_LEAF: - if (index()->is_ibuf()) - break; - rw_latch= rw_lock_type_t(latch_mode); - if (btr_op != BTR_NO_OP && - ibuf_should_try(index(), btr_op != BTR_INSERT_OP)) - /* Try to buffer the operation if the leaf page - is not in the buffer pool. */ - buf_mode= btr_op == BTR_DELETE_OP - ? BUF_GET_IF_IN_POOL_OR_WATCH - : BUF_GET_IF_IN_POOL; - break; - case BTR_MODIFY_TREE: - if (index()->is_ibuf()) - break; - if (lock_intention == BTR_INTENTION_INSERT && page_has_next(page) && - page_rec_is_last(page_cur.rec, page)) - { - /* btr_insert_into_right_sibling() might cause deleting node_ptr - at upper level */ - mtr->rollback_to_savepoint(block_savepoint); - goto need_opposite_intention; - } - } - } - goto search_loop; -} + if (page_mode == PAGE_CUR_RTREE_INSERT) { + btr_pcur_t* r_cursor = + rtr_get_parent_cursor( + cursor, height + 1, + true); + /* If it is insertion, there should + be only one parent for each level + traverse */ +#ifdef UNIV_DEBUG + ut_ad(num_stored == 1); +#endif -ATTRIBUTE_COLD -dberr_t btr_cur_t::pessimistic_search_leaf(const dtuple_t *tuple, - page_cur_mode_t mode, mtr_t *mtr) -{ - ut_ad(index()->is_btree() || index()->is_ibuf()); - ut_ad(!index()->is_ibuf() || ibuf_inside(mtr)); + node_ptr = btr_pcur_get_rec(r_cursor); - rec_offs offsets_[REC_OFFS_NORMAL_SIZE]; - rec_offs* offsets = offsets_; - rec_offs_init(offsets_); + } - ut_ad(flag == BTR_CUR_BINARY); - ut_ad(dict_index_check_search_tuple(index(), tuple)); - ut_ad(dtuple_check_typed(tuple)); - buf_block_t *block= mtr->at_savepoint(1); - ut_ad(block->page.id().page_no() == index()->page); - block->page.fix(); - mtr->rollback_to_savepoint(1); - ut_ad(mtr->memo_contains_flagged(&index()->lock, - MTR_MEMO_SX_LOCK | MTR_MEMO_X_LOCK)); - - const page_cur_mode_t page_mode{btr_cur_nonleaf_mode(mode)}; - - mtr->page_lock(block, RW_X_LATCH); - - up_match= 0; - up_bytes= 0; - low_match= 0; - low_bytes= 0; - ulint height= btr_page_get_level(block->page.frame); - tree_height= height + 1; - mem_heap_t *heap= nullptr; + if (add_latch) { + block->page.lock.s_unlock(); + } - search_loop: - dberr_t err; - page_cur.block= block; + ut_ad(!page_rec_is_supremum(node_ptr)); + } - if (UNIV_UNLIKELY(!height)) - { - if (page_cur_search_with_match(tuple, mode, &up_match, &low_match, - &page_cur, nullptr)) - corrupted: - err= DB_CORRUPTION; - else - { - ut_ad(up_match != ULINT_UNDEFINED || mode != PAGE_CUR_GE); - ut_ad(up_match != ULINT_UNDEFINED || mode != PAGE_CUR_LE); - ut_ad(low_match != ULINT_UNDEFINED || mode != PAGE_CUR_LE); + ut_ad(page_mode == search_mode + || (page_mode == PAGE_CUR_WITHIN + && search_mode == PAGE_CUR_RTREE_LOCATE)); -#ifdef BTR_CUR_HASH_ADAPT - /* We do a dirty read of btr_search_enabled here. We will - properly check btr_search_enabled again in - btr_search_build_page_hash_index() before building a page hash - index, while holding search latch. */ - if (!btr_search_enabled); - else if (tuple->info_bits & REC_INFO_MIN_REC_FLAG) - /* This may be a search tuple for btr_pcur_t::restore_position(). */ - ut_ad(tuple->is_metadata() || - (tuple->is_metadata(tuple->info_bits ^ REC_STATUS_INSTANT))); - else if (index()->table->is_temporary()); - else if (!rec_is_metadata(page_cur.rec, *index())) - btr_search_info_update(index(), this); -#endif /* BTR_CUR_HASH_ADAPT */ - err= DB_SUCCESS; - } + page_mode = search_mode; + } - func_exit: - if (UNIV_LIKELY_NULL(heap)) - mem_heap_free(heap); - return err; - } + /* If the first or the last record of the page + or the same key value to the first record or last record, + the another page might be chosen when BTR_CONT_MODIFY_TREE. + So, the parent page should not released to avoiding deadlock + with blocking the another search with the same key value. */ + if (!detected_same_key_root + && lock_intention == BTR_INTENTION_BOTH + && !dict_index_is_unique(index) + && latch_mode == BTR_MODIFY_TREE + && (up_match >= rec_offs_n_fields(offsets) - 1 + || low_match >= rec_offs_n_fields(offsets) - 1)) { + const rec_t* first_rec = page_rec_get_next_const( + page_get_infimum_rec(page)); + ulint matched_fields; - if (page_cur_search_with_match(tuple, page_mode, &up_match, &low_match, - &page_cur, nullptr)) - goto corrupted; + ut_ad(upper_rw_latch == RW_X_LATCH); - page_id_t page_id{block->page.id()}; + if (UNIV_UNLIKELY(!first_rec)) { + corrupted: + err = DB_CORRUPTION; + goto func_exit; + } + if (node_ptr == first_rec + || page_rec_is_last(node_ptr, page)) { + detected_same_key_root = true; + } else { + matched_fields = 0; + + offsets2 = rec_get_offsets( + first_rec, index, offsets2, + 0, ULINT_UNDEFINED, &heap); + cmp_rec_rec(node_ptr, first_rec, + offsets, offsets2, index, false, + &matched_fields); + + if (matched_fields + >= rec_offs_n_fields(offsets) - 1) { + detected_same_key_root = true; + } else if (const rec_t* last_rec + = page_rec_get_prev_const( + page_get_supremum_rec( + page))) { + matched_fields = 0; + + offsets2 = rec_get_offsets( + last_rec, index, offsets2, + 0, ULINT_UNDEFINED, &heap); + cmp_rec_rec( + node_ptr, last_rec, + offsets, offsets2, index, + false, &matched_fields); + if (matched_fields + >= rec_offs_n_fields(offsets) - 1) { + detected_same_key_root = true; + } + } else { + goto corrupted; + } + } + } - offsets= rec_get_offsets(page_cur.rec, index(), offsets, 0, ULINT_UNDEFINED, - &heap); - /* Go to the child node */ - page_id.set_page_no(btr_node_ptr_get_child_page_no(page_cur.rec, offsets)); + /* If the page might cause modify_tree, + we should not release the parent page's lock. */ + if (!detected_same_key_root + && latch_mode == BTR_MODIFY_TREE + && !btr_cur_will_modify_tree( + index, page, lock_intention, node_ptr, + node_ptr_max_size, zip_size, mtr) + && !rtree_parent_modified) { + ut_ad(upper_rw_latch == RW_X_LATCH); + ut_ad(n_releases <= n_blocks); + + /* we can release upper blocks */ + for (; n_releases < n_blocks; n_releases++) { + if (n_releases == 0) { + /* we should not release root page + to pin to same block. */ + continue; + } - const auto block_savepoint= mtr->get_savepoint(); - block= - buf_page_get_gen(page_id, block->zip_size(), RW_NO_LATCH, nullptr, BUF_GET, - mtr, &err, !--height && !index()->is_clust()); + /* release unused blocks to unpin */ + mtr_release_block_at_savepoint( + mtr, tree_savepoints[n_releases], + tree_blocks[n_releases]); + } + } - if (!block) - { - if (err == DB_DECRYPTION_FAILED) - btr_decryption_failed(*index()); - goto func_exit; - } + if (height == level + && latch_mode == BTR_MODIFY_TREE) { + ut_ad(upper_rw_latch == RW_X_LATCH); + /* we should sx-latch root page, if released already. + It contains seg_header. */ + if (n_releases > 0) { + mtr->sx_latch_at_savepoint( + tree_savepoints[0], + tree_blocks[0]); + } - if (!!page_is_comp(block->page.frame) != index()->table->not_redundant() || - btr_page_get_index_id(block->page.frame) != index()->id || - fil_page_get_type(block->page.frame) == FIL_PAGE_RTREE || - !fil_page_index_page_check(block->page.frame)) - goto corrupted; + /* x-latch the branch blocks not released yet. */ + for (ulint i = n_releases; i <= n_blocks; i++) { + mtr->x_latch_at_savepoint( + tree_savepoints[i], + tree_blocks[i]); + } + } - if (height != btr_page_get_level(block->page.frame)) - goto corrupted; + /* We should consider prev_page of parent page, if the node_ptr + is the leftmost of the page. because BTR_SEARCH_PREV and + BTR_MODIFY_PREV latches prev_page of the leaf page. */ + if ((latch_mode == BTR_SEARCH_PREV + || latch_mode == BTR_MODIFY_PREV) + && !prev_tree_blocks) { + /* block should be latched for consistent + btr_page_get_prev() */ + ut_ad(mtr->memo_contains_flagged( + block, MTR_MEMO_PAGE_S_FIX + | MTR_MEMO_PAGE_X_FIX)); - if (page_has_prev(block->page.frame) && - !btr_block_get(*index(), btr_page_get_prev(block->page.frame), - RW_X_LATCH, false, mtr, &err)) - goto func_exit; - mtr->x_latch_at_savepoint(block_savepoint, block); -#ifdef BTR_CUR_HASH_ADAPT - btr_search_drop_page_hash_index(block, true); + if (page_has_prev(page) + && page_rec_is_first(node_ptr, page)) { + + if (leftmost_from_level == 0) { + leftmost_from_level = height + 1; + } + } else { + leftmost_from_level = 0; + } + + if (height == 0 && leftmost_from_level > 0) { + /* should retry to get also prev_page + from level==leftmost_from_level. */ + prev_tree_blocks = static_cast<buf_block_t**>( + ut_malloc_nokey(sizeof(buf_block_t*) + * leftmost_from_level)); + + prev_tree_savepoints = static_cast<ulint*>( + ut_malloc_nokey(sizeof(ulint) + * leftmost_from_level)); + + /* back to the level (leftmost_from_level+1) */ + ulint idx = n_blocks + - (leftmost_from_level - 1); + + page_id.set_page_no( + tree_blocks[idx]->page.id().page_no()); + + for (ulint i = n_blocks + - (leftmost_from_level - 1); + i <= n_blocks; i++) { + mtr_release_block_at_savepoint( + mtr, tree_savepoints[i], + tree_blocks[i]); + } + + n_blocks -= (leftmost_from_level - 1); + height = leftmost_from_level; + ut_ad(n_releases == 0); + + /* replay up_match, low_match */ + up_match = 0; + low_match = 0; + rtr_info_t* rtr_info = need_path + ? cursor->rtr_info : NULL; + + for (ulint i = 0; i < n_blocks; i++) { + page_cursor->block = tree_blocks[i]; + if (page_cur_search_with_match( + tuple, + page_mode, &up_match, + &low_match, page_cursor, + rtr_info)) { + err = DB_CORRUPTION; + goto func_exit; + } + } + + goto search_loop; + } + } + + /* Go to the child node */ + page_id.set_page_no( + btr_node_ptr_get_child_page_no(node_ptr, offsets)); + + n_blocks++; + + if (UNIV_UNLIKELY(height == 0 && dict_index_is_ibuf(index))) { + /* We're doing a search on an ibuf tree and we're one + level above the leaf page. */ + + ut_ad(level == 0); + + buf_mode = BUF_GET; + rw_latch = RW_NO_LATCH; + goto retry_page_get; + } + + if (dict_index_is_spatial(index) + && page_mode >= PAGE_CUR_CONTAIN + && page_mode != PAGE_CUR_RTREE_INSERT) { + ut_ad(need_path); + rtr_node_path_t* path = + cursor->rtr_info->path; + + if (!path->empty() && found) { + ut_ad(path->back().page_no + == page_id.page_no()); + path->pop_back(); +#ifdef UNIV_DEBUG + if (page_mode == PAGE_CUR_RTREE_LOCATE + && (latch_mode != BTR_MODIFY_LEAF)) { + btr_pcur_t* cur + = cursor->rtr_info->parent_path->back( + ).cursor; + rec_t* my_node_ptr + = btr_pcur_get_rec(cur); + + offsets = rec_get_offsets( + my_node_ptr, index, offsets, + 0, ULINT_UNDEFINED, &heap); + + ulint my_page_no + = btr_node_ptr_get_child_page_no( + my_node_ptr, offsets); + + ut_ad(page_id.page_no() == my_page_no); + } #endif -#ifdef UNIV_ZIP_DEBUG - const page_zip_des_t *page_zip= buf_block_get_page_zip(block); - ut_a(!page_zip || page_zip_validate(page_zip, page, index())); -#endif /* UNIV_ZIP_DEBUG */ - if (page_has_next(block->page.frame) && - !btr_block_get(*index(), btr_page_get_next(block->page.frame), - RW_X_LATCH, false, mtr, &err)) - goto func_exit; - goto search_loop; -} + } + } -/********************************************************************//** -Searches an index tree and positions a tree cursor on a given non-leaf level. -NOTE: n_fields_cmp in tuple must be set so that it cannot be compared -to node pointer page number fields on the upper levels of the tree! -cursor->up_match and cursor->low_match both will have sensible values. -Cursor is left at the place where an insert of the -search tuple should be performed in the B-tree. InnoDB does an insert -immediately after the cursor. Thus, the cursor may end up on a user record, -or on a page infimum record. -@param level the tree level of search -@param tuple data tuple; NOTE: n_fields_cmp in tuple must be set so that - it cannot get compared to the node ptr page number field! -@param latch RW_S_LATCH or RW_X_LATCH -@param cursor tree cursor; the cursor page is s- or x-latched, but see also - above! -@param mtr mini-transaction -@return DB_SUCCESS on success or error code otherwise */ -TRANSACTIONAL_TARGET -dberr_t btr_cur_search_to_nth_level(ulint level, - const dtuple_t *tuple, - rw_lock_type_t rw_latch, - btr_cur_t *cursor, mtr_t *mtr) -{ - dict_index_t *const index= cursor->index(); + goto search_loop; + } else if (!dict_index_is_spatial(index) + && latch_mode == BTR_MODIFY_TREE + && lock_intention == BTR_INTENTION_INSERT + && page_has_next(page) + && page_rec_is_last(page_cur_get_rec(page_cursor), page)) { + + /* btr_insert_into_right_sibling() might cause + deleting node_ptr at upper level */ + + guess = NULL; + + if (height == 0) { + /* release the leaf pages if latched */ + for (uint i = 0; i < 3; i++) { + if (latch_leaves.blocks[i] != NULL) { + mtr_release_block_at_savepoint( + mtr, latch_leaves.savepoints[i], + latch_leaves.blocks[i]); + latch_leaves.blocks[i] = NULL; + } + } + } - ut_ad(index->is_btree() || index->is_ibuf()); - mem_heap_t *heap= nullptr; - rec_offs offsets_[REC_OFFS_NORMAL_SIZE]; - rec_offs *offsets= offsets_; - rec_offs_init(offsets_); - ut_ad(level); - ut_ad(dict_index_check_search_tuple(index, tuple)); - ut_ad(index->is_ibuf() ? ibuf_inside(mtr) : index->is_btree()); - ut_ad(dtuple_check_typed(tuple)); - ut_ad(index->page != FIL_NULL); - - MEM_UNDEFINED(&cursor->up_bytes, sizeof cursor->up_bytes); - MEM_UNDEFINED(&cursor->low_bytes, sizeof cursor->low_bytes); - cursor->up_match= 0; - cursor->low_match= 0; - cursor->flag= BTR_CUR_BINARY; + goto need_opposite_intention; + } -#ifndef BTR_CUR_ADAPT - buf_block_t *block= nullptr; -#else - btr_search_t *info= btr_search_get_info(index); - buf_block_t *block= info->root_guess; -#endif /* BTR_CUR_ADAPT */ + if (level != 0) { + ut_ad(!autoinc); - ut_ad(mtr->memo_contains_flagged(&index->lock, - MTR_MEMO_X_LOCK | MTR_MEMO_SX_LOCK)); + if (upper_rw_latch == RW_NO_LATCH) { + ut_ad(latch_mode == BTR_CONT_MODIFY_TREE + || latch_mode == BTR_CONT_SEARCH_TREE); + btr_block_get( + *index, page_id.page_no(), + latch_mode == BTR_CONT_MODIFY_TREE + ? RW_X_LATCH : RW_SX_LATCH, false, mtr, &err); + } else { + ut_ad(mtr->memo_contains_flagged(block, + upper_rw_latch)); + + if (latch_by_caller) { + ut_ad(latch_mode == BTR_SEARCH_TREE); + /* to exclude modifying tree operations + should sx-latch the index. */ + ut_ad(mtr->memo_contains(index->lock, + MTR_MEMO_SX_LOCK)); + /* because has sx-latch of index, + can release upper blocks. */ + for (; n_releases < n_blocks; n_releases++) { + mtr_release_block_at_savepoint( + mtr, + tree_savepoints[n_releases], + tree_blocks[n_releases]); + } + } + } - const ulint zip_size= index->table->space->zip_size(); + if (page_mode <= PAGE_CUR_LE) { + cursor->low_match = low_match; + cursor->up_match = up_match; + } + } else { + cursor->low_match = low_match; + cursor->low_bytes = low_bytes; + cursor->up_match = up_match; + cursor->up_bytes = up_bytes; - /* Start with the root page. */ - page_id_t page_id(index->table->space_id, index->page); - ulint height= ULINT_UNDEFINED; + if (autoinc) { + page_set_autoinc(tree_blocks[0], autoinc, mtr, false); + } -search_loop: - dberr_t err= DB_SUCCESS; - if (buf_block_t *b= - mtr->get_already_latched(page_id, mtr_memo_type_t(rw_latch))) - block= b; - else if (!(block= buf_page_get_gen(page_id, zip_size, rw_latch, - block, BUF_GET, mtr, &err))) - { - if (err == DB_DECRYPTION_FAILED) - btr_decryption_failed(*index); - goto func_exit; - } +#ifdef BTR_CUR_HASH_ADAPT + /* We do a dirty read of btr_search_enabled here. We + will properly check btr_search_enabled again in + btr_search_build_page_hash_index() before building a + page hash index, while holding search latch. */ + if (!btr_search_enabled) { + } else if (tuple->info_bits & REC_INFO_MIN_REC_FLAG) { + /* This may be a search tuple for + btr_pcur_t::restore_position(). */ + ut_ad(tuple->is_metadata() + || (tuple->is_metadata(tuple->info_bits + ^ REC_STATUS_INSTANT))); + } else if (index->is_spatial()) { + } else if (index->table->is_temporary()) { + } else if (rec_is_metadata(btr_cur_get_rec(cursor), *index)) { + /* Only user records belong in the adaptive + hash index. */ + } else { + btr_search_info_update(index, cursor); + } +#endif /* BTR_CUR_HASH_ADAPT */ + ut_ad(cursor->up_match != ULINT_UNDEFINED + || mode != PAGE_CUR_GE); + ut_ad(cursor->up_match != ULINT_UNDEFINED + || mode != PAGE_CUR_LE); + ut_ad(cursor->low_match != ULINT_UNDEFINED + || mode != PAGE_CUR_LE); + } + + /* For spatial index, remember what blocks are still latched */ + if (dict_index_is_spatial(index) + && (latch_mode == BTR_MODIFY_TREE + || latch_mode == BTR_MODIFY_LEAF)) { + for (ulint i = 0; i < n_releases; i++) { + cursor->rtr_info->tree_blocks[i] = NULL; + cursor->rtr_info->tree_savepoints[i] = 0; + } -#ifdef UNIV_ZIP_DEBUG - if (const page_zip_des_t *page_zip= buf_block_get_page_zip(block)) - ut_a(page_zip_validate(page_zip, block->page.frame, index)); -#endif /* UNIV_ZIP_DEBUG */ + for (ulint i = n_releases; i <= n_blocks; i++) { + cursor->rtr_info->tree_blocks[i] = tree_blocks[i]; + cursor->rtr_info->tree_savepoints[i] = tree_savepoints[i]; + } + } - if (!!page_is_comp(block->page.frame) != index->table->not_redundant() || - btr_page_get_index_id(block->page.frame) != index->id || - fil_page_get_type(block->page.frame) == FIL_PAGE_RTREE || - !fil_page_index_page_check(block->page.frame)) - { - corrupted: - err= DB_CORRUPTION; - func_exit: - if (UNIV_LIKELY_NULL(heap)) - mem_heap_free(heap); - return err; - } +func_exit: - const uint32_t page_level= btr_page_get_level(block->page.frame); + if (UNIV_LIKELY_NULL(heap)) { + mem_heap_free(heap); + } - if (height == ULINT_UNDEFINED) - { - /* We are in the root node */ - height= page_level; - if (!height) - goto corrupted; - cursor->tree_height= height + 1; - } - else if (height != ulint{page_level}) - goto corrupted; - - cursor->page_cur.block= block; - - /* Search for complete index fields. */ - if (page_cur_search_with_match(tuple, PAGE_CUR_LE, &cursor->up_match, - &cursor->low_match, &cursor->page_cur, - nullptr)) - goto corrupted; - - /* If this is the desired level, leave the loop */ - if (level == height) - goto func_exit; - - ut_ad(height > level); - height--; - - offsets = rec_get_offsets(cursor->page_cur.rec, index, offsets, 0, - ULINT_UNDEFINED, &heap); - /* Go to the child node */ - page_id.set_page_no(btr_node_ptr_get_child_page_no(cursor->page_cur.rec, - offsets)); - block= nullptr; - goto search_loop; + ut_free(prev_tree_blocks); + ut_free(prev_tree_savepoints); + + if (mbr_adj) { + /* remember that we will need to adjust parent MBR */ + cursor->rtr_info->mbr_adj = true; + } + + DBUG_RETURN(err); } dberr_t btr_cur_t::open_leaf(bool first, dict_index_t *index, btr_latch_mode latch_mode, mtr_t *mtr) { + ulint node_ptr_max_size= srv_page_size / 2; btr_intention_t lock_intention; ulint n_blocks= 0; mem_heap_t *heap= nullptr; @@ -1860,21 +2424,29 @@ dberr_t btr_cur_t::open_leaf(bool first, dict_index_t *index, rec_offs_init(offsets_); const bool latch_by_caller= latch_mode & BTR_ALREADY_S_LATCHED; - latch_mode= btr_latch_mode(latch_mode & ~BTR_ALREADY_S_LATCHED); + latch_mode = btr_latch_mode(latch_mode & ~BTR_ALREADY_S_LATCHED); lock_intention= btr_cur_get_and_clear_intention(&latch_mode); + /* This function doesn't need to lock left page of the leaf page */ + if (latch_mode == BTR_SEARCH_PREV) + latch_mode= BTR_SEARCH_LEAF; + else if (latch_mode == BTR_MODIFY_PREV) + latch_mode= BTR_MODIFY_LEAF; + /* Store the position of the tree latch we push to mtr so that we know how to release it when we have latched the leaf node */ auto savepoint= mtr->get_savepoint(); rw_lock_type_t upper_rw_latch= RW_X_LATCH; - ulint node_ptr_max_size= 0; - if (latch_mode == BTR_MODIFY_TREE) - { - node_ptr_max_size= btr_node_ptr_max_size(index); + switch (latch_mode) { + case BTR_CONT_MODIFY_TREE: + case BTR_CONT_SEARCH_TREE: + abort(); + break; + case BTR_MODIFY_TREE: /* Most of delete-intended operations are purging. Free blocks and read IO bandwidth should be prioritized for them, when the history list is growing huge. */ @@ -1885,35 +2457,32 @@ dberr_t btr_cur_t::open_leaf(bool first, dict_index_t *index, mtr_x_lock_index(index, mtr); else mtr_sx_lock_index(index, mtr); - } - else - { - static_assert(int{BTR_CONT_MODIFY_TREE} == (12 | BTR_MODIFY_LEAF), ""); - ut_ad(!(latch_mode & 8)); - /* This function doesn't need to lock left page of the leaf page */ - static_assert(int{BTR_SEARCH_PREV} == (4 | BTR_SEARCH_LEAF), ""); - static_assert(int{BTR_MODIFY_PREV} == (4 | BTR_MODIFY_LEAF), ""); - latch_mode= btr_latch_mode(latch_mode & ~4); + break; + default: ut_ad(!latch_by_caller || mtr->memo_contains_flagged(&index->lock, MTR_MEMO_SX_LOCK | MTR_MEMO_S_LOCK)); upper_rw_latch= RW_S_LATCH; - if (!latch_by_caller) - { - savepoint++; - mtr_s_lock_index(index, mtr); - } + if (latch_by_caller) + break; + ut_ad(latch_mode != BTR_SEARCH_TREE); + savepoint++; + mtr_s_lock_index(index, mtr); } ut_ad(savepoint == mtr->get_savepoint()); - const rw_lock_type_t root_leaf_rw_latch= rw_lock_type_t(latch_mode & ~12); + const rw_lock_type_t root_leaf_rw_latch= + btr_cur_latch_for_root_leaf(latch_mode); page_cur.index = index; uint32_t page= index->page; const auto zip_size= index->table->space->zip_size(); + if (root_leaf_rw_latch == RW_X_LATCH) + node_ptr_max_size= btr_node_ptr_max_size(index); + for (ulint height= ULINT_UNDEFINED;;) { ut_ad(n_blocks < BTR_MAX_LEVELS); @@ -1962,15 +2531,20 @@ dberr_t btr_cur_t::open_leaf(bool first, dict_index_t *index, reached_leaf: const auto leaf_savepoint= mtr->get_savepoint(); ut_ad(leaf_savepoint); - ut_ad(block == mtr->at_savepoint(leaf_savepoint - 1)); if (rw_latch == RW_NO_LATCH) - btr_cur_latch_leaves(leaf_savepoint - 1, latch_mode, this, mtr); + btr_cur_latch_leaves(block, latch_mode, this, mtr); - if (latch_mode != BTR_MODIFY_TREE) + switch (latch_mode) { + case BTR_MODIFY_TREE: + case BTR_CONT_MODIFY_TREE: + case BTR_CONT_SEARCH_TREE: + break; + default: /* Release index->lock if needed, and the non-leaf pages. */ mtr->rollback_to_savepoint(savepoint - !latch_by_caller, leaf_savepoint - 1); + } break; } } @@ -4095,15 +4669,16 @@ btr_cur_pessimistic_update( } } -#if 0 // FIXME: this used to be a no-op, and will cause trouble if enabled - if (!big_rec_vec + if (!srv_read_only_mode + && !big_rec_vec && page_is_leaf(block->page.frame) && !dict_index_is_online_ddl(index)) { +#if 0 // FIXME: this used to be a no-op, and will cause trouble if enabled mtr->release(index->lock); +#endif /* NOTE: We cannot release root block latch here, because it has segment header and already modified in most of cases.*/ } -#endif err = DB_SUCCESS; goto return_after_reservations; @@ -4845,14 +5420,15 @@ return_after_reservations: err_exit: mem_heap_free(heap); -#if 0 // FIXME: this used to be a no-op, and will cause trouble if enabled - if (page_is_leaf(page) + if (!srv_read_only_mode + && page_is_leaf(page) && !dict_index_is_online_ddl(index)) { +#if 0 // FIXME: this used to be a no-op, and will cause trouble if enabled mtr->release(index->lock); +#endif /* NOTE: We cannot release root block latch here, because it has segment header and already modified in most of cases.*/ } -#endif index->table->space->release_free_extents(n_reserved); return(ret); @@ -4969,18 +5545,16 @@ public: buf_block_t *parent_block= m_block; ulint parent_savepoint= m_savepoint; + m_savepoint= mtr_set_savepoint(&mtr); m_block= btr_block_get(*index(), m_page_id.page_no(), RW_S_LATCH, !level, &mtr, nullptr); - if (!m_block) - return false; if (parent_block && parent_block != right_parent) - mtr.rollback_to_savepoint(parent_savepoint, parent_savepoint + 1); - - m_savepoint= mtr.get_savepoint() - 1; + mtr_release_block_at_savepoint(&mtr, parent_savepoint, parent_block); - return level == ULINT_UNDEFINED || - btr_page_get_level(m_block->page.frame) == level; + return m_block && + (level == ULINT_UNDEFINED || + btr_page_get_level(buf_block_get_frame(m_block)) == level); } /** Sets page mode for leaves */ @@ -5187,18 +5761,14 @@ static ha_rows btr_estimate_n_rows_in_range_on_level( buf_block_t *prev_block= block; ulint prev_savepoint= savepoint; - savepoint= mtr.get_savepoint(); + savepoint= mtr_set_savepoint(&mtr); /* Fetch the page. */ block= btr_block_get(*index, page_id.page_no(), RW_S_LATCH, !level, &mtr, nullptr); if (prev_block) - { - mtr.rollback_to_savepoint(prev_savepoint, prev_savepoint + 1); - if (block) - savepoint--; - } + mtr_release_block_at_savepoint(&mtr, prev_savepoint, prev_block); if (!block || btr_page_get_level(buf_block_get_frame(block)) != level) goto inexact; @@ -5227,20 +5797,14 @@ static ha_rows btr_estimate_n_rows_in_range_on_level( } while (page_id.page_no() != right_page_no); if (block) - { - ut_ad(block == mtr.at_savepoint(savepoint)); - mtr.rollback_to_savepoint(savepoint, savepoint + 1); - } + mtr_release_block_at_savepoint(&mtr, savepoint, block); return (n_rows); inexact: if (block) - { - ut_ad(block == mtr.at_savepoint(savepoint)); - mtr.rollback_to_savepoint(savepoint, savepoint + 1); - } + mtr_release_block_at_savepoint(&mtr, savepoint, block); is_n_rows_exact= false; @@ -5299,7 +5863,9 @@ ha_rows btr_estimate_n_rows_in_range(dict_index_t *index, mtr.start(); - ut_ad(mtr.get_savepoint() == 0); + /* Store the position of the tree latch we push to mtr so that we + know how to release it when we have latched leaf node(s) */ + ulint savepoint= mtr_set_savepoint(&mtr); mtr_s_lock_index(index, &mtr); ha_rows table_n_rows= dict_table_get_n_rows(index->table); @@ -5354,10 +5920,10 @@ search_loop: } if (height == 0) - /* There is no need to release non-leaf pages here as they must already be + /* There is no need to unlach non-leaf pages here as they must already be unlatched in btr_est_cur_t::fetch_child(). Try to search on pages after - releasing the index latch, to decrease contention. */ - mtr.rollback_to_savepoint(0, 1); + index->lock unlatching to decrease contention. */ + mtr_release_s_latch_at_savepoint(&mtr, savepoint, &index->lock); /* There is no need to search on left page if divergence_height != ULINT_UNDEFINED, as it was already searched before diff --git a/storage/innobase/btr/btr0defragment.cc b/storage/innobase/btr/btr0defragment.cc index 4e0a7d1f86a..76b173359da 100644 --- a/storage/innobase/btr/btr0defragment.cc +++ b/storage/innobase/btr/btr0defragment.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (C) 2012, 2014 Facebook, Inc. All Rights Reserved. -Copyright (C) 2014, 2023, MariaDB Corporation. +Copyright (C) 2014, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -280,70 +280,6 @@ btr_defragment_calc_n_recs_for_size( return n_recs; } -MY_ATTRIBUTE((nonnull(2,3,4), warn_unused_result)) -/************************************************************//** -Returns the upper level node pointer to a page. It is assumed that mtr holds -an sx-latch on the tree. -@return rec_get_offsets() of the node pointer record */ -static -rec_offs* -btr_page_search_father_node_ptr( - rec_offs* offsets,/*!< in: work area for the return value */ - mem_heap_t* heap, /*!< in: memory heap to use */ - btr_cur_t* cursor, /*!< in: cursor pointing to user record, - out: cursor on node pointer record, - its page x-latched */ - mtr_t* mtr) /*!< in: mtr */ -{ - const uint32_t page_no = btr_cur_get_block(cursor)->page.id().page_no(); - dict_index_t* index = btr_cur_get_index(cursor); - ut_ad(!index->is_spatial()); - - ut_ad(mtr->memo_contains_flagged(&index->lock, MTR_MEMO_X_LOCK - | MTR_MEMO_SX_LOCK)); - ut_ad(dict_index_get_page(index) != page_no); - - const auto level = btr_page_get_level(btr_cur_get_page(cursor)); - - const rec_t* user_rec = btr_cur_get_rec(cursor); - ut_a(page_rec_is_user_rec(user_rec)); - - if (btr_cur_search_to_nth_level(level + 1, - dict_index_build_node_ptr(index, - user_rec, 0, - heap, level), - RW_X_LATCH, - cursor, mtr) != DB_SUCCESS) { - return nullptr; - } - - const rec_t* node_ptr = btr_cur_get_rec(cursor); - ut_ad(!btr_cur_get_block(cursor)->page.lock.not_recursive() - || mtr->memo_contains(index->lock, MTR_MEMO_X_LOCK)); - - offsets = rec_get_offsets(node_ptr, index, offsets, 0, - ULINT_UNDEFINED, &heap); - - if (btr_node_ptr_get_child_page_no(node_ptr, offsets) != page_no) { - offsets = nullptr; - } - - return(offsets); -} - -static bool btr_page_search_father(mtr_t *mtr, btr_cur_t *cursor) -{ - rec_t *rec= - page_rec_get_next(page_get_infimum_rec(cursor->block()->page.frame)); - if (UNIV_UNLIKELY(!rec)) - return false; - cursor->page_cur.rec= rec; - mem_heap_t *heap= mem_heap_create(100); - const bool got= btr_page_search_father_node_ptr(nullptr, heap, cursor, mtr); - mem_heap_free(heap); - return got; -} - /*************************************************************//** Merge as many records from the from_block to the to_block. Delete the from_block if all records are successfully merged to to_block. @@ -472,7 +408,7 @@ btr_defragment_merge_pages( parent.page_cur.index = index; parent.page_cur.block = from_block; - if (!btr_page_search_father(mtr, &parent)) { + if (!btr_page_get_father(mtr, &parent)) { to_block = nullptr; } else if (n_recs_to_move == n_recs) { /* The whole page is merged with the previous page, diff --git a/storage/innobase/btr/btr0pcur.cc b/storage/innobase/btr/btr0pcur.cc index 68699ede469..d731bcbb893 100644 --- a/storage/innobase/btr/btr0pcur.cc +++ b/storage/innobase/btr/btr0pcur.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2016, 2023, MariaDB Corporation. +Copyright (c) 2016, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -212,100 +212,24 @@ btr_pcur_copy_stored_position( pcur_receive->old_n_fields = pcur_donate->old_n_fields; } -/** Optimistically latches the leaf page or pages requested. -@param[in] block guessed buffer block -@param[in,out] latch_mode BTR_SEARCH_LEAF, ... -@param[in,out] pcur cursor -@param[in,out] mtr mini-transaction -@return true if success */ -TRANSACTIONAL_TARGET -static bool btr_pcur_optimistic_latch_leaves(buf_block_t *block, - btr_pcur_t *pcur, - btr_latch_mode *latch_mode, - mtr_t *mtr) -{ - ut_ad(block->page.buf_fix_count()); - ut_ad(block->page.in_file()); - ut_ad(block->page.frame); - - static_assert(BTR_SEARCH_PREV & BTR_SEARCH_LEAF, ""); - static_assert(BTR_MODIFY_PREV & BTR_MODIFY_LEAF, ""); - static_assert((BTR_SEARCH_PREV ^ BTR_MODIFY_PREV) == - (RW_S_LATCH ^ RW_X_LATCH), ""); - - const rw_lock_type_t mode= - rw_lock_type_t(*latch_mode & (RW_X_LATCH | RW_S_LATCH)); - - switch (*latch_mode) { - default: - ut_ad(*latch_mode == BTR_SEARCH_LEAF || *latch_mode == BTR_MODIFY_LEAF); - return buf_page_optimistic_get(mode, block, pcur->modify_clock, mtr); - case BTR_SEARCH_PREV: /* btr_pcur_move_backward_from_page() */ - case BTR_MODIFY_PREV: /* Ditto, or ibuf_insert() */ - page_id_t id{0}; - uint32_t left_page_no; - ulint zip_size; - { - transactional_shared_lock_guard<block_lock> g{block->page.lock}; - if (block->modify_clock != pcur->modify_clock) - return false; - id= block->page.id(); - zip_size= block->zip_size(); - left_page_no= btr_page_get_prev(block->page.frame); - } - - if (left_page_no != FIL_NULL) - { - pcur->btr_cur.left_block= - buf_page_get_gen(page_id_t(id.space(), left_page_no), zip_size, - mode, nullptr, BUF_GET_POSSIBLY_FREED, mtr); - - if (pcur->btr_cur.left_block && - btr_page_get_next(pcur->btr_cur.left_block->page.frame) != - id.page_no()) - { -release_left_block: - mtr->release_last_page(); - return false; - } - } - else - pcur->btr_cur.left_block= nullptr; - - if (buf_page_optimistic_get(mode, block, pcur->modify_clock, mtr)) - { - if (btr_page_get_prev(block->page.frame) == left_page_no) - { - /* block was already buffer-fixed while entering the function and - buf_page_optimistic_get() buffer-fixes it again. */ - ut_ad(2 <= block->page.buf_fix_count()); - *latch_mode= btr_latch_mode(mode); - return true; - } - - mtr->release_last_page(); - } - - ut_ad(block->page.buf_fix_count()); - if (pcur->btr_cur.left_block) - goto release_left_block; - return false; - } -} - /** Structure acts as functor to do the latching of leaf pages. It returns true if latching of leaf pages succeeded and false otherwise. */ struct optimistic_latch_leaves { btr_pcur_t *const cursor; - btr_latch_mode *const latch_mode; + btr_latch_mode *latch_mode; mtr_t *const mtr; + optimistic_latch_leaves(btr_pcur_t *cursor, btr_latch_mode *latch_mode, + mtr_t *mtr) + : cursor(cursor), latch_mode(latch_mode), mtr(mtr) {} + bool operator() (buf_block_t *hint) const { - return hint && - btr_pcur_optimistic_latch_leaves(hint, cursor, latch_mode, mtr); + return hint && btr_cur_optimistic_latch_leaves( + hint, cursor->modify_clock, latch_mode, + btr_pcur_get_btr_cur(cursor), mtr); } }; @@ -379,8 +303,8 @@ btr_pcur_t::restore_position(btr_latch_mode restore_latch_mode, mtr_t *mtr) /* Try optimistic restoration. */ if (block_when_stored.run_with_hint( - optimistic_latch_leaves{this, &restore_latch_mode, - mtr})) { + optimistic_latch_leaves(this, &restore_latch_mode, + mtr))) { pos_state = BTR_PCUR_IS_POSITIONED; latch_mode = restore_latch_mode; @@ -541,9 +465,18 @@ btr_pcur_move_to_next_page( return DB_CORRUPTION; } + ulint mode = cursor->latch_mode; + switch (mode) { + case BTR_SEARCH_TREE: + mode = BTR_SEARCH_LEAF; + break; + case BTR_MODIFY_TREE: + mode = BTR_MODIFY_LEAF; + } + dberr_t err; buf_block_t* next_block = btr_block_get( - *cursor->index(), next_page_no, cursor->latch_mode & ~12, + *cursor->index(), next_page_no, mode, page_is_leaf(page), mtr, &err); if (UNIV_UNLIKELY(!next_block)) { diff --git a/storage/innobase/btr/btr0sea.cc b/storage/innobase/btr/btr0sea.cc index a1609248512..fc890f9233b 100644 --- a/storage/innobase/btr/btr0sea.cc +++ b/storage/innobase/btr/btr0sea.cc @@ -1055,24 +1055,26 @@ btr_search_guess_on_hash( index_id_t index_id; ut_ad(mtr->is_active()); - ut_ad(index->is_btree() || index->is_ibuf()); - /* Note that, for efficiency, the struct info may not be protected by - any latch here! */ - - if (latch_mode > BTR_MODIFY_LEAF - || !info->last_hash_succ || !info->n_hash_potential - || (tuple->info_bits & REC_INFO_MIN_REC_FLAG)) { + if (!btr_search_enabled) { return false; } - ut_ad(index->is_btree()); - ut_ad(!index->table->is_temporary()); - + ut_ad(!index->is_ibuf()); ut_ad(latch_mode == BTR_SEARCH_LEAF || latch_mode == BTR_MODIFY_LEAF); compile_time_assert(ulint{BTR_SEARCH_LEAF} == ulint{RW_S_LATCH}); compile_time_assert(ulint{BTR_MODIFY_LEAF} == ulint{RW_X_LATCH}); + /* Not supported for spatial index */ + ut_ad(!dict_index_is_spatial(index)); + + /* Note that, for efficiency, the struct info may not be protected by + any latch here! */ + + if (info->n_hash_potential == 0) { + return false; + } + cursor->n_fields = info->n_fields; cursor->n_bytes = info->n_bytes; diff --git a/storage/innobase/dict/dict0crea.cc b/storage/innobase/dict/dict0crea.cc index 614048b7ba0..daf3bc9a664 100644 --- a/storage/innobase/dict/dict0crea.cc +++ b/storage/innobase/dict/dict0crea.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2023, MariaDB Corporation. +Copyright (c) 2017, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -722,7 +722,7 @@ dict_build_field_def_step( } /***************************************************************//** -Creates an index tree for the index. +Creates an index tree for the index if it is not a member of a cluster. @return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */ static MY_ATTRIBUTE((nonnull, warn_unused_result)) dberr_t @@ -755,8 +755,9 @@ dict_create_index_tree_step( pcur.btr_cur.page_cur.index = UT_LIST_GET_FIRST(dict_sys.sys_indexes->indexes); - dberr_t err = btr_pcur_open(search_tuple, PAGE_CUR_L, BTR_MODIFY_LEAF, - &pcur, &mtr); + dberr_t err = + btr_pcur_open(search_tuple, PAGE_CUR_L, BTR_MODIFY_LEAF, + &pcur, 0, &mtr); if (err != DB_SUCCESS) { func_exit: @@ -767,25 +768,10 @@ func_exit: btr_pcur_move_to_next_user_rec(&pcur, &mtr); if (UNIV_UNLIKELY(btr_pcur_is_after_last_on_page(&pcur))) { -corrupted: err = DB_CORRUPTION; goto func_exit; } - ulint len; - byte* data = rec_get_nth_field_old(btr_pcur_get_rec(&pcur), - DICT_FLD__SYS_INDEXES__ID, - &len); - if (UNIV_UNLIKELY(len != 8 || mach_read_from_8(data) != index->id)) { - goto corrupted; - } - - data = rec_get_nth_field_old(btr_pcur_get_rec(&pcur), - DICT_FLD__SYS_INDEXES__PAGE_NO, &len); - if (len != 4) { - goto corrupted; - } - if (index->is_readable()) { index->set_modified(mtr); @@ -798,6 +784,11 @@ corrupted: err = DB_OUT_OF_FILE_SPACE; ); } + ulint len; + byte* data = rec_get_nth_field_old(btr_pcur_get_rec(&pcur), + DICT_FLD__SYS_INDEXES__PAGE_NO, + &len); + ut_ad(len == 4); mtr.write<4,mtr_t::MAYBE_NOP>(*btr_pcur_get_block(&pcur), data, node->page_no); goto func_exit; diff --git a/storage/innobase/dict/dict0dict.cc b/storage/innobase/dict/dict0dict.cc index 53d1031d270..21efb525fa8 100644 --- a/storage/innobase/dict/dict0dict.cc +++ b/storage/innobase/dict/dict0dict.cc @@ -2,7 +2,7 @@ Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2012, Facebook Inc. -Copyright (c) 2013, 2023, MariaDB Corporation. +Copyright (c) 2013, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -4143,7 +4143,8 @@ void dict_set_corrupted(dict_index_t *index, const char *ctx) dict_index_copy_types(tuple, sys_index, 2); cursor.page_cur.index = sys_index; - if (cursor.search_leaf(tuple, PAGE_CUR_LE, BTR_MODIFY_LEAF, &mtr) + if (btr_cur_search_to_nth_level(0, tuple, PAGE_CUR_LE, + BTR_MODIFY_LEAF, &cursor, &mtr) != DB_SUCCESS) { goto fail; } @@ -4218,7 +4219,8 @@ dict_index_set_merge_threshold( dict_index_copy_types(tuple, sys_index, 2); cursor.page_cur.index = sys_index; - if (cursor.search_leaf(tuple, PAGE_CUR_GE, BTR_MODIFY_LEAF, &mtr) + if (btr_cur_search_to_nth_level(0, tuple, PAGE_CUR_GE, + BTR_MODIFY_LEAF, &cursor, &mtr) != DB_SUCCESS) { goto func_exit; } diff --git a/storage/innobase/dict/dict0load.cc b/storage/innobase/dict/dict0load.cc index 9910a000b5b..004b00615e8 100644 --- a/storage/innobase/dict/dict0load.cc +++ b/storage/innobase/dict/dict0load.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2016, 2023, MariaDB Corporation. +Copyright (c) 2016, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -1321,7 +1321,7 @@ static dberr_t dict_load_columns(dict_table_t *table, unsigned use_uncommitted, dict_index_copy_types(&tuple, sys_index, 1); pcur.btr_cur.page_cur.index = sys_index; - dberr_t err = btr_pcur_open_on_user_rec(&tuple, + dberr_t err = btr_pcur_open_on_user_rec(&tuple, PAGE_CUR_GE, BTR_SEARCH_LEAF, &pcur, &mtr); if (err != DB_SUCCESS) { goto func_exit; @@ -1452,7 +1452,7 @@ dict_load_virtual_col(dict_table_t *table, bool uncommitted, ulint nth_v_col) dict_index_copy_types(&tuple, sys_virtual_index, 2); pcur.btr_cur.page_cur.index = sys_virtual_index; - dberr_t err = btr_pcur_open_on_user_rec(&tuple, + dberr_t err = btr_pcur_open_on_user_rec(&tuple, PAGE_CUR_GE, BTR_SEARCH_LEAF, &pcur, &mtr); if (err != DB_SUCCESS) { goto func_exit; @@ -1688,7 +1688,8 @@ static dberr_t dict_load_fields(dict_index_t *index, bool uncommitted, dict_index_copy_types(&tuple, sys_index, 1); pcur.btr_cur.page_cur.index = sys_index; - dberr_t error = btr_pcur_open_on_user_rec(&tuple, BTR_SEARCH_LEAF, + dberr_t error = btr_pcur_open_on_user_rec(&tuple, + PAGE_CUR_GE, BTR_SEARCH_LEAF, &pcur, &mtr); if (error != DB_SUCCESS) { goto func_exit; @@ -1946,7 +1947,8 @@ dberr_t dict_load_indexes(dict_table_t *table, bool uncommitted, dict_index_copy_types(&tuple, sys_index, 1); pcur.btr_cur.page_cur.index = sys_index; - dberr_t error = btr_pcur_open_on_user_rec(&tuple, BTR_SEARCH_LEAF, + dberr_t error = btr_pcur_open_on_user_rec(&tuple, + PAGE_CUR_GE, BTR_SEARCH_LEAF, &pcur, &mtr); if (error != DB_SUCCESS) { goto func_exit; @@ -2347,7 +2349,7 @@ static dict_table_t *dict_load_table_one(const span<const char> &name, bool uncommitted = false; reload: mtr.start(); - dberr_t err = btr_pcur_open_on_user_rec(&tuple, + dberr_t err = btr_pcur_open_on_user_rec(&tuple, PAGE_CUR_GE, BTR_SEARCH_LEAF, &pcur, &mtr); if (err != DB_SUCCESS || !btr_pcur_is_on_user_rec(&pcur)) { @@ -2605,7 +2607,8 @@ dict_load_table_on_id( dict_table_t* table = nullptr; - if (btr_pcur_open_on_user_rec(&tuple, BTR_SEARCH_LEAF, &pcur, &mtr) + if (btr_pcur_open_on_user_rec(&tuple, PAGE_CUR_GE, + BTR_SEARCH_LEAF, &pcur, &mtr) == DB_SUCCESS && btr_pcur_is_on_user_rec(&pcur)) { /*---------------------------------------------------*/ @@ -2711,7 +2714,7 @@ static dberr_t dict_load_foreign_cols(dict_foreign_t *foreign, trx_id_t trx_id) pcur.btr_cur.page_cur.index = sys_index; mem_heap_t* heap = nullptr; - dberr_t err = btr_pcur_open_on_user_rec(&tuple, + dberr_t err = btr_pcur_open_on_user_rec(&tuple, PAGE_CUR_GE, BTR_SEARCH_LEAF, &pcur, &mtr); if (err != DB_SUCCESS) { goto func_exit; @@ -2888,7 +2891,7 @@ dict_load_foreign( mtr.start(); mem_heap_t* heap = nullptr; - dberr_t err = btr_pcur_open_on_user_rec(&tuple, + dberr_t err = btr_pcur_open_on_user_rec(&tuple, PAGE_CUR_GE, BTR_SEARCH_LEAF, &pcur, &mtr); if (err != DB_SUCCESS) { goto err_exit; @@ -3099,7 +3102,7 @@ start_load: dict_index_copy_types(&tuple, sec_index, 1); pcur.btr_cur.page_cur.index = sec_index; - dberr_t err = btr_pcur_open_on_user_rec(&tuple, + dberr_t err = btr_pcur_open_on_user_rec(&tuple, PAGE_CUR_GE, BTR_SEARCH_LEAF, &pcur, &mtr); if (err != DB_SUCCESS) { DBUG_RETURN(err); diff --git a/storage/innobase/dict/dict0stats.cc b/storage/innobase/dict/dict0stats.cc index 845f133f1a6..44fcf9f2c18 100644 --- a/storage/innobase/dict/dict0stats.cc +++ b/storage/innobase/dict/dict0stats.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 2009, 2019, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2015, 2023, MariaDB Corporation. +Copyright (c) 2015, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -1697,7 +1697,7 @@ static dberr_t page_cur_open_level(page_cur_t *page_cur, ulint level, static dberr_t btr_pcur_open_level(btr_pcur_t *pcur, ulint level, mtr_t *mtr, dict_index_t *index) { - pcur->latch_mode= BTR_SEARCH_LEAF; + pcur->latch_mode= BTR_SEARCH_TREE; pcur->search_mode= PAGE_CUR_G; pcur->pos_state= BTR_PCUR_IS_POSITIONED; pcur->btr_cur.page_cur.index= index; diff --git a/storage/innobase/fsp/fsp0fsp.cc b/storage/innobase/fsp/fsp0fsp.cc index e9f3106feb0..12e9a6913ba 100644 --- a/storage/innobase/fsp/fsp0fsp.cc +++ b/storage/innobase/fsp/fsp0fsp.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2023, MariaDB Corporation. +Copyright (c) 2017, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -122,22 +122,15 @@ MY_ATTRIBUTE((nonnull, warn_unused_result)) static buf_block_t *fsp_get_header(const fil_space_t *space, mtr_t *mtr, dberr_t *err) { - const page_id_t id{space->id, 0}; - buf_block_t *block= mtr->get_already_latched(id, MTR_MEMO_PAGE_SX_FIX); - if (block) - *err= DB_SUCCESS; - else + buf_block_t *block= buf_page_get_gen(page_id_t(space->id, 0), + space->zip_size(), RW_SX_LATCH, + nullptr, BUF_GET_POSSIBLY_FREED, + mtr, err); + if (block && space->id != mach_read_from_4(FSP_HEADER_OFFSET + FSP_SPACE_ID + + block->page.frame)) { - block= buf_page_get_gen(id, space->zip_size(), RW_SX_LATCH, - nullptr, BUF_GET_POSSIBLY_FREED, - mtr, err); - if (block && - space->id != mach_read_from_4(FSP_HEADER_OFFSET + FSP_SPACE_ID + - block->page.frame)) - { - *err= DB_CORRUPTION; - block= nullptr; - } + *err= DB_CORRUPTION; + block= nullptr; } return block; } diff --git a/storage/innobase/gis/gis0sea.cc b/storage/innobase/gis/gis0sea.cc index 9eeeb9aeed3..207d49abeba 100644 --- a/storage/innobase/gis/gis0sea.cc +++ b/storage/innobase/gis/gis0sea.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 2016, 2018, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2023, MariaDB Corporation. +Copyright (c) 2017, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -44,6 +44,7 @@ Created 2014/01/16 Jimmy Yang static bool rtr_cur_restore_position( + ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */ btr_cur_t* cursor, /*!< in: detached persistent cursor */ ulint level, /*!< in: index level */ mtr_t* mtr); /*!< in: mtr */ @@ -134,7 +135,6 @@ rtr_pcur_getnext_from_path( && (my_latch_mode | 4) == BTR_CONT_MODIFY_TREE; if (!index_locked) { - ut_ad(mtr->is_empty()); mtr_s_lock_index(index, mtr); } else { ut_ad(mtr->memo_contains_flagged(&index->lock, @@ -154,12 +154,14 @@ rtr_pcur_getnext_from_path( node_seq_t path_ssn; const page_t* page; rw_lock_type_t rw_latch; + ulint tree_idx; mysql_mutex_lock(&rtr_info->rtr_path_mutex); next_rec = rtr_info->path->back(); rtr_info->path->pop_back(); level = next_rec.level; path_ssn = next_rec.seq_no; + tree_idx = btr_cur->tree_height - level - 1; /* Maintain the parent path info as well, if needed */ if (need_parent && !skip_parent && !new_split) { @@ -221,15 +223,37 @@ rtr_pcur_getnext_from_path( rw_latch = RW_X_LATCH; } - if (my_latch_mode == BTR_MODIFY_LEAF) { - mtr->rollback_to_savepoint(1); + /* Release previous locked blocks */ + if (my_latch_mode != BTR_SEARCH_LEAF) { + for (ulint idx = 0; idx < btr_cur->tree_height; + idx++) { + if (rtr_info->tree_blocks[idx]) { + mtr_release_block_at_savepoint( + mtr, + rtr_info->tree_savepoints[idx], + rtr_info->tree_blocks[idx]); + rtr_info->tree_blocks[idx] = NULL; + } + } + for (ulint idx = RTR_MAX_LEVELS; idx < RTR_MAX_LEVELS + 3; + idx++) { + if (rtr_info->tree_blocks[idx]) { + mtr_release_block_at_savepoint( + mtr, + rtr_info->tree_savepoints[idx], + rtr_info->tree_blocks[idx]); + rtr_info->tree_blocks[idx] = NULL; + } + } } + /* set up savepoint to record any locks to be taken */ + rtr_info->tree_savepoints[tree_idx] = mtr_set_savepoint(mtr); + ut_ad((my_latch_mode | 4) == BTR_CONT_MODIFY_TREE || !page_is_leaf(btr_cur_get_page(btr_cur)) || !btr_cur->page_cur.block->page.lock.have_any()); - const auto block_savepoint = mtr->get_savepoint(); block = buf_page_get_gen( page_id_t(index->table->space_id, next_rec.page_no), zip_size, @@ -240,6 +264,8 @@ rtr_pcur_getnext_from_path( break; } + rtr_info->tree_blocks[tree_idx] = block; + page = buf_block_get_frame(block); page_ssn = page_get_ssn_id(page); @@ -370,23 +396,24 @@ rtr_pcur_getnext_from_path( if (found) { if (level == target_level) { - ut_ad(block - == mtr->at_savepoint(block_savepoint)); + page_cur_t* r_cur;; if (my_latch_mode == BTR_MODIFY_TREE && level == 0) { ut_ad(rw_latch == RW_NO_LATCH); btr_cur_latch_leaves( - block_savepoint, + block, BTR_MODIFY_TREE, btr_cur, mtr); } + r_cur = btr_cur_get_page_cur(btr_cur); + page_cur_position( page_cur_get_rec(page_cursor), page_cur_get_block(page_cursor), - btr_cur_get_page_cur(btr_cur)); + r_cur); btr_cur->low_match = level != 0 ? DICT_INDEX_SPATIAL_NODEPTR_SIZE + 1 @@ -398,7 +425,13 @@ rtr_pcur_getnext_from_path( last node just located */ skip_parent = true; } else { - mtr->release_last_page(); + /* Release latch on the current page */ + ut_ad(rtr_info->tree_blocks[tree_idx]); + + mtr_release_block_at_savepoint( + mtr, rtr_info->tree_savepoints[tree_idx], + rtr_info->tree_blocks[tree_idx]); + rtr_info->tree_blocks[tree_idx] = NULL; } } while (!rtr_info->path->empty()); @@ -476,524 +509,50 @@ static void rtr_compare_cursor_rec(const rec_t *rec, dict_index_t *index, } #endif -TRANSACTIONAL_TARGET -dberr_t rtr_search_to_nth_level(ulint level, const dtuple_t *tuple, - page_cur_mode_t mode, - btr_latch_mode latch_mode, - btr_cur_t *cur, mtr_t *mtr) -{ - page_cur_mode_t page_mode; - page_cur_mode_t search_mode= PAGE_CUR_UNSUPP; - - bool mbr_adj= false; - bool found= false; - dict_index_t *const index= cur->index(); - - mem_heap_t *heap= nullptr; - rec_offs offsets_[REC_OFFS_NORMAL_SIZE]; - rec_offs *offsets= offsets_; - rec_offs_init(offsets_); - ut_ad(level == 0 || mode == PAGE_CUR_LE || RTREE_SEARCH_MODE(mode)); - ut_ad(dict_index_check_search_tuple(index, tuple)); - ut_ad(dtuple_check_typed(tuple)); - ut_ad(index->is_spatial()); - ut_ad(index->page != FIL_NULL); - - MEM_UNDEFINED(&cur->up_match, sizeof cur->up_match); - MEM_UNDEFINED(&cur->up_bytes, sizeof cur->up_bytes); - MEM_UNDEFINED(&cur->low_match, sizeof cur->low_match); - MEM_UNDEFINED(&cur->low_bytes, sizeof cur->low_bytes); - ut_d(cur->up_match= ULINT_UNDEFINED); - ut_d(cur->low_match= ULINT_UNDEFINED); - - const bool latch_by_caller= latch_mode & BTR_ALREADY_S_LATCHED; - - ut_ad(!latch_by_caller - || mtr->memo_contains_flagged(&index->lock, MTR_MEMO_S_LOCK - | MTR_MEMO_SX_LOCK)); - latch_mode= BTR_LATCH_MODE_WITHOUT_FLAGS(latch_mode); - - ut_ad(!latch_by_caller || latch_mode == BTR_SEARCH_LEAF || - latch_mode == BTR_MODIFY_LEAF); - - cur->flag= BTR_CUR_BINARY; - -#ifndef BTR_CUR_ADAPT - buf_block_t *guess= nullptr; -#else - btr_search_t *const info= btr_search_get_info(index); - buf_block_t *guess= info->root_guess; -#endif - - /* Store the position of the tree latch we push to mtr so that we - know how to release it when we have latched leaf node(s) */ - - const ulint savepoint= mtr->get_savepoint(); - - rw_lock_type_t upper_rw_latch, root_leaf_rw_latch= RW_NO_LATCH; - - switch (latch_mode) { - case BTR_MODIFY_TREE: - mtr_x_lock_index(index, mtr); - upper_rw_latch= root_leaf_rw_latch= RW_X_LATCH; - break; - case BTR_CONT_MODIFY_TREE: - ut_ad(mtr->memo_contains_flagged(&index->lock, MTR_MEMO_X_LOCK | - MTR_MEMO_SX_LOCK)); - upper_rw_latch= RW_X_LATCH; - break; - default: - ut_ad(latch_mode != BTR_MODIFY_PREV); - ut_ad(latch_mode != BTR_SEARCH_PREV); - if (!latch_by_caller) - mtr_s_lock_index(index, mtr); - upper_rw_latch= root_leaf_rw_latch= RW_S_LATCH; - if (latch_mode == BTR_MODIFY_LEAF) - root_leaf_rw_latch= RW_X_LATCH; - } - - auto root_savepoint= mtr->get_savepoint(); - const ulint zip_size= index->table->space->zip_size(); - - /* Start with the root page. */ - page_id_t page_id(index->table->space_id, index->page); - - ulint up_match= 0, up_bytes= 0, low_match= 0, low_bytes= 0; - ulint height= ULINT_UNDEFINED; - - /* We use these modified search modes on non-leaf levels of the - B-tree. These let us end up in the right B-tree leaf. In that leaf - we use the original search mode. */ - - switch (mode) { - case PAGE_CUR_GE: - page_mode= PAGE_CUR_L; - break; - case PAGE_CUR_G: - page_mode= PAGE_CUR_LE; - break; - default: -#ifdef PAGE_CUR_LE_OR_EXTENDS - ut_ad(mode == PAGE_CUR_L || mode == PAGE_CUR_LE - || RTREE_SEARCH_MODE(mode) - || mode == PAGE_CUR_LE_OR_EXTENDS); -#else /* PAGE_CUR_LE_OR_EXTENDS */ - ut_ad(mode == PAGE_CUR_L || mode == PAGE_CUR_LE - || RTREE_SEARCH_MODE(mode)); -#endif /* PAGE_CUR_LE_OR_EXTENDS */ - page_mode= mode; - break; - } - - search_loop: - auto buf_mode= BUF_GET; - ulint rw_latch= RW_NO_LATCH; - - if (height) - { - /* We are about to fetch the root or a non-leaf page. */ - if (latch_mode != BTR_MODIFY_TREE || height == level) - /* If doesn't have SX or X latch of index, - each page should be latched before reading. */ - rw_latch= upper_rw_latch; - } - else if (latch_mode <= BTR_MODIFY_LEAF) - rw_latch= latch_mode; - - dberr_t err; - auto block_savepoint= mtr->get_savepoint(); - buf_block_t *block= buf_page_get_gen(page_id, zip_size, rw_latch, guess, - buf_mode, mtr, &err, false); - if (!block) - { - if (err == DB_DECRYPTION_FAILED) - btr_decryption_failed(*index); - func_exit: - if (UNIV_LIKELY_NULL(heap)) - mem_heap_free(heap); - - if (mbr_adj) - /* remember that we will need to adjust parent MBR */ - cur->rtr_info->mbr_adj= true; - - return err; - } - - const page_t *page= buf_block_get_frame(block); -#ifdef UNIV_ZIP_DEBUG - if (rw_latch != RW_NO_LATCH) { - const page_zip_des_t *page_zip= buf_block_get_page_zip(block); - ut_a(!page_zip || page_zip_validate(page_zip, page, index)); - } -#endif /* UNIV_ZIP_DEBUG */ - - ut_ad(fil_page_index_page_check(page)); - ut_ad(index->id == btr_page_get_index_id(page)); - - if (height != ULINT_UNDEFINED); - else if (page_is_leaf(page) && - rw_latch != RW_NO_LATCH && rw_latch != root_leaf_rw_latch) - { - /* The root page is also a leaf page (root_leaf). - We should reacquire the page, because the root page - is latched differently from leaf pages. */ - ut_ad(root_leaf_rw_latch != RW_NO_LATCH); - ut_ad(rw_latch == RW_S_LATCH || rw_latch == RW_SX_LATCH); - - ut_ad(block == mtr->at_savepoint(block_savepoint)); - mtr->rollback_to_savepoint(block_savepoint); - - upper_rw_latch= root_leaf_rw_latch; - goto search_loop; - } - else - { - /* We are in the root node */ - - height= btr_page_get_level(page); - cur->tree_height= height + 1; - - ut_ad(cur->rtr_info); - - /* If SSN in memory is not initialized, fetch it from root page */ - if (!rtr_get_current_ssn_id(index)) - /* FIXME: do this in dict_load_table_one() */ - index->set_ssn(page_get_ssn_id(page) + 1); - - /* Save the MBR */ - cur->rtr_info->thr= cur->thr; - rtr_get_mbr_from_tuple(tuple, &cur->rtr_info->mbr); - -#ifdef BTR_CUR_ADAPT - info->root_guess= block; -#endif - } - - if (height == 0) { - if (rw_latch == RW_NO_LATCH) - { - ut_ad(block == mtr->at_savepoint(block_savepoint)); - btr_cur_latch_leaves(block_savepoint, latch_mode, cur, mtr); - } - - switch (latch_mode) { - case BTR_MODIFY_TREE: - case BTR_CONT_MODIFY_TREE: - break; - default: - if (!latch_by_caller) - { - /* Release the tree s-latch */ - mtr->rollback_to_savepoint(savepoint, - savepoint + 1); - block_savepoint--; - root_savepoint--; - } - /* release upper blocks */ - if (savepoint < block_savepoint) - mtr->rollback_to_savepoint(savepoint, block_savepoint); - } - - page_mode= mode; - } - - /* Remember the page search mode */ - search_mode= page_mode; - - /* Some adjustment on search mode, when the page search mode is - PAGE_CUR_RTREE_LOCATE or PAGE_CUR_RTREE_INSERT, as we are searching - with MBRs. When it is not the target level, we should search all - sub-trees that "CONTAIN" the search range/MBR. When it is at the - target level, the search becomes PAGE_CUR_LE */ - - if (page_mode == PAGE_CUR_RTREE_INSERT) - { - page_mode= (level == height) - ? PAGE_CUR_LE - : PAGE_CUR_RTREE_INSERT; - - ut_ad(!page_is_leaf(page) || page_mode == PAGE_CUR_LE); - } - else if (page_mode == PAGE_CUR_RTREE_LOCATE && level == height) - page_mode= level == 0 ? PAGE_CUR_LE : PAGE_CUR_RTREE_GET_FATHER; - - up_match= 0; - low_match= 0; - - if (latch_mode == BTR_MODIFY_TREE || latch_mode == BTR_CONT_MODIFY_TREE) - /* Tree are locked, no need for Page Lock to protect the "path" */ - cur->rtr_info->need_page_lock= false; - - cur->page_cur.block= block; - - if (page_mode >= PAGE_CUR_CONTAIN) - { - found= rtr_cur_search_with_match(block, index, tuple, page_mode, - &cur->page_cur, cur->rtr_info); - - /* Need to use BTR_MODIFY_TREE to do the MBR adjustment */ - if (search_mode == PAGE_CUR_RTREE_INSERT && cur->rtr_info->mbr_adj) { - static_assert(BTR_MODIFY_TREE == (8 | BTR_MODIFY_LEAF), ""); - - if (!(latch_mode & 8)) - /* Parent MBR needs updated, should retry with BTR_MODIFY_TREE */ - goto func_exit; - - cur->rtr_info->mbr_adj= false; - mbr_adj= true; - } - - if (found && page_mode == PAGE_CUR_RTREE_GET_FATHER) - cur->low_match= DICT_INDEX_SPATIAL_NODEPTR_SIZE + 1; - } - else - { - /* Search for complete index fields. */ - up_bytes= low_bytes= 0; - if (page_cur_search_with_match(tuple, page_mode, &up_match, - &low_match, &cur->page_cur, nullptr)) { - err= DB_CORRUPTION; - goto func_exit; - } - } - - /* If this is the desired level, leave the loop */ - - ut_ad(height == btr_page_get_level(btr_cur_get_page(cur))); - - /* Add Predicate lock if it is serializable isolation - and only if it is in the search case */ - if (mode >= PAGE_CUR_CONTAIN && mode != PAGE_CUR_RTREE_INSERT && - mode != PAGE_CUR_RTREE_LOCATE && cur->rtr_info->need_prdt_lock) - { - lock_prdt_t prdt; - - { - trx_t* trx= thr_get_trx(cur->thr); - TMLockTrxGuard g{TMLockTrxArgs(*trx)}; - lock_init_prdt_from_mbr(&prdt, &cur->rtr_info->mbr, mode, - trx->lock.lock_heap); - } - - if (rw_latch == RW_NO_LATCH && height != 0) - block->page.lock.s_lock(); - - lock_prdt_lock(block, &prdt, index, LOCK_S, LOCK_PREDICATE, cur->thr); - - if (rw_latch == RW_NO_LATCH && height != 0) - block->page.lock.s_unlock(); - } - - if (level != height) - { - ut_ad(height > 0); - - height--; - guess= nullptr; - - const rec_t *node_ptr= btr_cur_get_rec(cur); - - offsets= rec_get_offsets(node_ptr, index, offsets, 0, - ULINT_UNDEFINED, &heap); - - if (page_rec_is_supremum(node_ptr)) - { - cur->low_match= 0; - cur->up_match= 0; - goto func_exit; - } - - /* If we are doing insertion or record locating, - remember the tree nodes we visited */ - if (page_mode == PAGE_CUR_RTREE_INSERT || - (search_mode == PAGE_CUR_RTREE_LOCATE && - latch_mode != BTR_MODIFY_LEAF)) - { - const bool add_latch= latch_mode == BTR_MODIFY_TREE && - rw_latch == RW_NO_LATCH; - - if (add_latch) - { - ut_ad(mtr->memo_contains_flagged(&index->lock, MTR_MEMO_X_LOCK | - MTR_MEMO_SX_LOCK)); - block->page.lock.s_lock(); - } - - /* Store the parent cursor location */ - ut_d(auto num_stored=) - rtr_store_parent_path(block, cur, latch_mode, height + 1, mtr); - - if (page_mode == PAGE_CUR_RTREE_INSERT) - { - btr_pcur_t *r_cursor= rtr_get_parent_cursor(cur, height + 1, true); - /* If it is insertion, there should be only one parent for - each level traverse */ - ut_ad(num_stored == 1); - node_ptr= btr_pcur_get_rec(r_cursor); - } - - if (add_latch) - block->page.lock.s_unlock(); - - ut_ad(!page_rec_is_supremum(node_ptr)); - } - - ut_ad(page_mode == search_mode || - (page_mode == PAGE_CUR_WITHIN && - search_mode == PAGE_CUR_RTREE_LOCATE)); - page_mode= search_mode; - - if (height == level && latch_mode == BTR_MODIFY_TREE) - { - ut_ad(upper_rw_latch == RW_X_LATCH); - for (auto i= root_savepoint, n= mtr->get_savepoint(); i < n; i++) - mtr->x_latch_at_savepoint(i, mtr->at_savepoint(i)); - } - - /* Go to the child node */ - page_id.set_page_no(btr_node_ptr_get_child_page_no(node_ptr, offsets)); - - if (page_mode >= PAGE_CUR_CONTAIN && page_mode != PAGE_CUR_RTREE_INSERT) - { - rtr_node_path_t *path= cur->rtr_info->path; - - if (found && !path->empty()) - { - ut_ad(path->back().page_no == page_id.page_no()); - path->pop_back(); -#ifdef UNIV_DEBUG - if (page_mode == PAGE_CUR_RTREE_LOCATE && - latch_mode != BTR_MODIFY_LEAF) - { - btr_pcur_t* pcur= cur->rtr_info->parent_path->back().cursor; - rec_t *my_node_ptr= btr_pcur_get_rec(pcur); - - offsets= rec_get_offsets(my_node_ptr, index, offsets, - 0, ULINT_UNDEFINED, &heap); - - ut_ad(page_id.page_no() == - btr_node_ptr_get_child_page_no(my_node_ptr, offsets)); - } -#endif - } - } - - goto search_loop; - } - - if (level) - { - if (upper_rw_latch == RW_NO_LATCH) - { - ut_ad(latch_mode == BTR_CONT_MODIFY_TREE); - btr_block_get(*index, page_id.page_no(), RW_X_LATCH, false, mtr, &err); - } - else - { - ut_ad(mtr->memo_contains_flagged(block, upper_rw_latch)); - ut_ad(!latch_by_caller); - } - - if (page_mode <= PAGE_CUR_LE) - { - cur->low_match= low_match; - cur->up_match= up_match; - } - } - else - { - cur->low_match= low_match; - cur->low_bytes= low_bytes; - cur->up_match= up_match; - cur->up_bytes= up_bytes; - - ut_ad(up_match != ULINT_UNDEFINED || mode != PAGE_CUR_GE); - ut_ad(up_match != ULINT_UNDEFINED || mode != PAGE_CUR_LE); - ut_ad(low_match != ULINT_UNDEFINED || mode != PAGE_CUR_LE); - } - - goto func_exit; -} - -dberr_t rtr_search_leaf(btr_cur_t *cur, const dtuple_t *tuple, - btr_latch_mode latch_mode, - mtr_t *mtr, page_cur_mode_t mode) -{ - return rtr_search_to_nth_level(0, tuple, mode, latch_mode, cur, mtr); -} - -/** Search for a spatial index leaf page record. -@param pcur cursor -@param tuple search tuple -@param mode search mode -@param mtr mini-transaction */ -dberr_t rtr_search_leaf(btr_pcur_t *pcur, const dtuple_t *tuple, - page_cur_mode_t mode, mtr_t *mtr) -{ -#ifdef UNIV_DEBUG - switch (mode) { - case PAGE_CUR_CONTAIN: - case PAGE_CUR_INTERSECT: - case PAGE_CUR_WITHIN: - case PAGE_CUR_DISJOINT: - case PAGE_CUR_MBR_EQUAL: - break; - default: - ut_ad("invalid mode" == 0); - } -#endif - pcur->latch_mode= BTR_SEARCH_LEAF; - pcur->search_mode= mode; - pcur->pos_state= BTR_PCUR_IS_POSITIONED; - pcur->trx_if_known= nullptr; - return rtr_search_leaf(&pcur->btr_cur, tuple, BTR_SEARCH_LEAF, mtr, mode); -} - /**************************************************************//** Initializes and opens a persistent cursor to an index tree. It should be -closed with btr_pcur_close. */ -bool rtr_search( +closed with btr_pcur_close. Mainly called by row_search_index_entry() */ +bool +rtr_pcur_open( + dict_index_t* index, /*!< in: index */ const dtuple_t* tuple, /*!< in: tuple on which search done */ - btr_latch_mode latch_mode,/*!< in: BTR_MODIFY_LEAF, ... */ + btr_latch_mode latch_mode,/*!< in: BTR_SEARCH_LEAF, ... */ btr_pcur_t* cursor, /*!< in: memory buffer for persistent cursor */ mtr_t* mtr) /*!< in: mtr */ { static_assert(BTR_MODIFY_TREE == (8 | BTR_MODIFY_LEAF), ""); ut_ad(latch_mode & BTR_MODIFY_LEAF); - ut_ad(!(latch_mode & BTR_ALREADY_S_LATCHED)); - ut_ad(mtr->is_empty()); /* Initialize the cursor */ btr_pcur_init(cursor); cursor->latch_mode = BTR_LATCH_MODE_WITHOUT_FLAGS(latch_mode); - cursor->search_mode = PAGE_CUR_RTREE_LOCATE; - cursor->trx_if_known = nullptr; - - if (latch_mode & 8) { - mtr_x_lock_index(cursor->index(), mtr); - } else { - latch_mode - = btr_latch_mode(latch_mode | BTR_ALREADY_S_LATCHED); - mtr_sx_lock_index(cursor->index(), mtr); - } + cursor->search_mode = PAGE_CUR_RTREE_LOCATE; + cursor->trx_if_known = NULL; /* Search with the tree cursor */ btr_cur_t* btr_cursor = btr_pcur_get_btr_cur(cursor); + btr_cursor->page_cur.index = index; - btr_cursor->rtr_info - = rtr_create_rtr_info(false, false, - btr_cursor, cursor->index()); + btr_cursor->rtr_info = rtr_create_rtr_info(false, false, + btr_cursor, index); + /* Purge will SX lock the tree instead of take Page Locks */ if (btr_cursor->thr) { btr_cursor->rtr_info->need_page_lock = true; btr_cursor->rtr_info->thr = btr_cursor->thr; } - if (rtr_search_leaf(btr_cursor, tuple, latch_mode, mtr) - != DB_SUCCESS) { + if ((latch_mode & 8) && index->lock.have_u_not_x()) { + index->lock.u_x_upgrade(SRW_LOCK_CALL); + mtr->lock_upgrade(index->lock); + } + + if (btr_cur_search_to_nth_level(0, tuple, PAGE_CUR_RTREE_LOCATE, + latch_mode, + btr_cursor, mtr) != DB_SUCCESS) { return true; } @@ -1001,8 +560,7 @@ bool rtr_search( const rec_t* rec = btr_pcur_get_rec(cursor); - const bool d= rec_get_deleted_flag( - rec, cursor->index()->table->not_redundant()); + const bool d= rec_get_deleted_flag(rec, index->table->not_redundant()); if (page_rec_is_infimum(rec) || btr_pcur_get_low_match(cursor) != dtuple_get_n_fields(tuple) @@ -1013,12 +571,26 @@ bool rtr_search( btr_cursor->rtr_info->fd_del = true; btr_cursor->low_match = 0; } - - mtr->rollback_to_savepoint(1); + /* Did not find matched row in first dive. Release + latched block if any before search more pages */ + if (!(latch_mode & 8)) { + ulint tree_idx = btr_cursor->tree_height - 1; + rtr_info_t* rtr_info = btr_cursor->rtr_info; + + if (rtr_info->tree_blocks[tree_idx]) { + mtr_release_block_at_savepoint( + mtr, + rtr_info->tree_savepoints[tree_idx], + rtr_info->tree_blocks[tree_idx]); + rtr_info->tree_blocks[tree_idx] = NULL; + } + } if (!rtr_pcur_getnext_from_path(tuple, PAGE_CUR_RTREE_LOCATE, btr_cursor, 0, latch_mode, - true, mtr)) { + latch_mode + & (8 | BTR_ALREADY_S_LATCHED), + mtr)) { return true; } @@ -1026,10 +598,6 @@ bool rtr_search( == dtuple_get_n_fields(tuple)); } - if (!(latch_mode & 8)) { - mtr->rollback_to_savepoint(0, 1); - } - return false; } @@ -1073,7 +641,8 @@ static const rec_t* rtr_get_father_node( if (sea_cur && sea_cur->tree_height > level) { ut_ad(mtr->memo_contains_flagged(&index->lock, MTR_MEMO_X_LOCK | MTR_MEMO_SX_LOCK)); - if (rtr_cur_restore_position(sea_cur, level, mtr)) { + if (rtr_cur_restore_position(BTR_CONT_MODIFY_TREE, sea_cur, + level, mtr)) { btr_pcur_t* r_cursor = rtr_get_parent_cursor( sea_cur, level, false); @@ -1099,8 +668,9 @@ static const rec_t* rtr_get_father_node( btr_cur->rtr_info = rtr_create_rtr_info(false, false, btr_cur, index); - if (rtr_search_to_nth_level(level, tuple, PAGE_CUR_RTREE_LOCATE, - BTR_CONT_MODIFY_TREE, btr_cur, mtr) + if (btr_cur_search_to_nth_level(level, tuple, + PAGE_CUR_RTREE_LOCATE, + BTR_CONT_MODIFY_TREE, btr_cur, mtr) != DB_SUCCESS) { } else if (sea_cur && sea_cur->tree_height == level) { rec = btr_cur_get_rec(btr_cur); @@ -1159,8 +729,9 @@ rtr_page_get_father_node_ptr( page_no = btr_cur_get_block(cursor)->page.id().page_no(); index = btr_cur_get_index(cursor); - ut_ad(mtr->memo_contains_flagged(&index->lock, MTR_MEMO_X_LOCK - | MTR_MEMO_SX_LOCK)); + ut_ad(srv_read_only_mode + || mtr->memo_contains_flagged(&index->lock, MTR_MEMO_X_LOCK + | MTR_MEMO_SX_LOCK)); ut_ad(dict_index_get_page(index) != page_no); @@ -1308,10 +879,32 @@ rtr_init_rtr_info( if (!reinit) { /* Reset all members. */ - memset(rtr_info, 0, sizeof *rtr_info); - static_assert(PAGE_CUR_UNSUPP == 0, "compatibility"); + rtr_info->path = NULL; + rtr_info->parent_path = NULL; + rtr_info->matches = NULL; + mysql_mutex_init(rtr_path_mutex_key, &rtr_info->rtr_path_mutex, nullptr); + + memset(rtr_info->tree_blocks, 0x0, + sizeof(rtr_info->tree_blocks)); + memset(rtr_info->tree_savepoints, 0x0, + sizeof(rtr_info->tree_savepoints)); + rtr_info->mbr.xmin = 0.0; + rtr_info->mbr.xmax = 0.0; + rtr_info->mbr.ymin = 0.0; + rtr_info->mbr.ymax = 0.0; + rtr_info->thr = NULL; + rtr_info->heap = NULL; + rtr_info->cursor = NULL; + rtr_info->index = NULL; + rtr_info->need_prdt_lock = false; + rtr_info->need_page_lock = false; + rtr_info->allocated = false; + rtr_info->mbr_adj = false; + rtr_info->fd_del = false; + rtr_info->search_tuple = NULL; + rtr_info->search_mode = PAGE_CUR_UNSUPP; } ut_ad(!rtr_info->matches || rtr_info->matches->matched_recs->empty()); @@ -1537,6 +1130,7 @@ struct optimistic_get static bool rtr_cur_restore_position( + ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */ btr_cur_t* btr_cur, /*!< in: detached persistent cursor */ ulint level, /*!< in: index level */ mtr_t* mtr) /*!< in: mtr */ @@ -1564,6 +1158,8 @@ rtr_cur_restore_position( r_cursor->modify_clock = 100; ); + ut_ad(latch_mode == BTR_CONT_MODIFY_TREE); + if (r_cursor->block_when_stored.run_with_hint( optimistic_get(r_cursor, mtr))) { ut_ad(r_cursor->pos_state == BTR_PCUR_IS_POSITIONED); diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 244d535f020..949a9eae0d7 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -1549,7 +1549,8 @@ static void innodb_drop_database(handlerton*, char *path) mtr_t mtr; mtr.start(); pcur.btr_cur.page_cur.index = sys_index; - err= btr_pcur_open_on_user_rec(&tuple, BTR_SEARCH_LEAF, &pcur, &mtr); + err= btr_pcur_open_on_user_rec(&tuple, PAGE_CUR_GE, + BTR_SEARCH_LEAF, &pcur, &mtr); if (err != DB_SUCCESS) goto err_exit; diff --git a/storage/innobase/handler/handler0alter.cc b/storage/innobase/handler/handler0alter.cc index 8ff36bc4bdd..7c162b9af6a 100644 --- a/storage/innobase/handler/handler0alter.cc +++ b/storage/innobase/handler/handler0alter.cc @@ -6081,8 +6081,7 @@ func_exit: que_thr_t* thr = pars_complete_graph_for_exec( NULL, trx, ctx->heap, NULL); - page_id_t id{block->page.id()}; - const bool is_root = id.page_no() == index->page; + const bool is_root = block->page.id().page_no() == index->page; if (rec_is_metadata(rec, *index)) { ut_ad(page_rec_is_user_rec(rec)); @@ -6099,10 +6098,8 @@ func_exit: } /* Ensure that the root page is in the correct format. */ - id.set_page_no(index->page); - buf_block_t* root = mtr.get_already_latched( - id, MTR_MEMO_PAGE_SX_FIX); - + buf_block_t* root = btr_root_block_get(index, RW_X_LATCH, + &mtr, &err); if (UNIV_UNLIKELY(!root)) { goto func_exit; } diff --git a/storage/innobase/ibuf/ibuf0ibuf.cc b/storage/innobase/ibuf/ibuf0ibuf.cc index c3dddf8251d..5a97ea4ebe0 100644 --- a/storage/innobase/ibuf/ibuf0ibuf.cc +++ b/storage/innobase/ibuf/ibuf0ibuf.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2016, 2023, MariaDB Corporation. +Copyright (c) 2016, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -2297,7 +2297,7 @@ loop: btr_pcur_t pcur; pcur.btr_cur.page_cur.index= ibuf.index; ibuf_mtr_start(&mtr); - if (btr_pcur_open(&tuple, PAGE_CUR_GE, BTR_MODIFY_LEAF, &pcur, &mtr)) + if (btr_pcur_open(&tuple, PAGE_CUR_GE, BTR_MODIFY_LEAF, &pcur, 0, &mtr)) goto func_exit; if (!btr_pcur_is_on_user_rec(&pcur)) { @@ -2493,8 +2493,8 @@ ibuf_merge_space( /* Position the cursor on the first matching record. */ pcur.btr_cur.page_cur.index = ibuf.index; - dberr_t err = btr_pcur_open(&tuple, PAGE_CUR_GE, BTR_SEARCH_LEAF, - &pcur, &mtr); + dberr_t err = btr_pcur_open(&tuple, PAGE_CUR_GE, + BTR_SEARCH_LEAF, &pcur, 0, &mtr); ut_ad(err != DB_SUCCESS || page_validate(btr_pcur_get_page(&pcur), ibuf.index)); @@ -3238,7 +3238,7 @@ ibuf_insert_low( ibuf_mtr_start(&mtr); pcur.btr_cur.page_cur.index = ibuf.index; - err = btr_pcur_open(ibuf_entry, PAGE_CUR_LE, mode, &pcur, &mtr); + err = btr_pcur_open(ibuf_entry, PAGE_CUR_LE, mode, &pcur, 0, &mtr); if (err != DB_SUCCESS) { func_exit: ibuf_mtr_commit(&mtr); @@ -4037,12 +4037,12 @@ bool ibuf_delete_rec(const page_id_t page_id, btr_pcur_t* pcur, ibuf_mtr_start(mtr); mysql_mutex_lock(&ibuf_mutex); - ibuf.index->lock.u_lock(SRW_LOCK_ARGS(__FILE__, __LINE__)); if (!ibuf_restore_pos(page_id, search_tuple, BTR_PURGE_TREE, pcur, mtr)) { + mysql_mutex_unlock(&ibuf_mutex); - ibuf.index->lock.u_unlock(); + ut_ad(mtr->has_committed()); goto func_exit; } @@ -4053,11 +4053,13 @@ bool ibuf_delete_rec(const page_id_t page_id, btr_pcur_t* pcur, ut_a(err == DB_SUCCESS); ibuf_size_update(ibuf_root->page.frame); + mysql_mutex_unlock(&ibuf_mutex); + ibuf.empty = page_is_empty(ibuf_root->page.frame); + } else { + mysql_mutex_unlock(&ibuf_mutex); } - mysql_mutex_unlock(&ibuf_mutex); - ibuf.index->lock.u_unlock(); ibuf_btr_pcur_commit_specify_mtr(pcur, mtr); func_exit: @@ -4235,7 +4237,7 @@ loop: /* Position pcur in the insert buffer at the first entry for this index page */ - if (btr_pcur_open_on_user_rec(search_tuple, + if (btr_pcur_open_on_user_rec(search_tuple, PAGE_CUR_GE, BTR_MODIFY_LEAF, &pcur, &mtr) != DB_SUCCESS) { err = DB_CORRUPTION; @@ -4452,7 +4454,7 @@ loop: /* Position pcur in the insert buffer at the first entry for the space */ - if (btr_pcur_open_on_user_rec(&search_tuple, + if (btr_pcur_open_on_user_rec(&search_tuple, PAGE_CUR_GE, BTR_MODIFY_LEAF, &pcur, &mtr) != DB_SUCCESS) { goto leave_loop; diff --git a/storage/innobase/include/btr0btr.h b/storage/innobase/include/btr0btr.h index a1cc10b05db..a2aa46b62da 100644 --- a/storage/innobase/include/btr0btr.h +++ b/storage/innobase/include/btr0btr.h @@ -2,7 +2,7 @@ Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2012, Facebook Inc. -Copyright (c) 2014, 2023, MariaDB Corporation. +Copyright (c) 2014, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -211,12 +211,13 @@ btr_write_autoinc(dict_index_t* index, ib_uint64_t autoinc, bool reset = false) @param[in,out] mtr mini-transaction */ void btr_set_instant(buf_block_t* root, const dict_index_t& index, mtr_t* mtr); -ATTRIBUTE_COLD __attribute__((nonnull)) +ATTRIBUTE_COLD __attribute__((nonnull, warn_unused_result)) /** Reset the table to the canonical format on ROLLBACK of instant ALTER TABLE. @param[in] index clustered index with instant ALTER TABLE @param[in] all whether to reset FIL_PAGE_TYPE as well -@param[in,out] mtr mini-transaction */ -void btr_reset_instant(const dict_index_t &index, bool all, mtr_t *mtr); +@param[in,out] mtr mini-transaction +@return error code */ +dberr_t btr_reset_instant(const dict_index_t &index, bool all, mtr_t *mtr); /*************************************************************//** Makes tree one level higher by splitting the root, and inserts @@ -240,7 +241,7 @@ btr_root_raise_and_insert( ulint n_ext, /*!< in: number of externally stored columns */ mtr_t* mtr, /*!< in: mtr */ dberr_t* err) /*!< out: error code */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); + MY_ATTRIBUTE((warn_unused_result)); /*************************************************************//** Reorganizes an index page. diff --git a/storage/innobase/include/btr0cur.h b/storage/innobase/include/btr0cur.h index e9c44d6d9bf..49bc8a4ff1b 100644 --- a/storage/innobase/include/btr0cur.h +++ b/storage/innobase/include/btr0cur.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1994, 2019, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2023, MariaDB Corporation. +Copyright (c) 2017, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -63,6 +63,12 @@ enum { BTR_KEEP_IBUF_BITMAP = 32 }; +/* btr_cur_latch_leaves() returns latched blocks and savepoints. */ +struct btr_latch_leaves_t { + buf_block_t* blocks[3]; + ulint savepoints[3]; +}; + #include "que0types.h" #include "row0types.h" @@ -120,28 +126,51 @@ bool btr_cur_instant_root_init(dict_index_t* index, const page_t* page) ATTRIBUTE_COLD __attribute__((nonnull, warn_unused_result)); +/** Optimistically latches the leaf page or pages requested. +@param[in] block guessed buffer block +@param[in] modify_clock modify clock value +@param[in,out] latch_mode BTR_SEARCH_LEAF, ... +@param[in,out] cursor cursor +@param[in] mtr mini-transaction +@return true if success */ +bool +btr_cur_optimistic_latch_leaves( + buf_block_t* block, + ib_uint64_t modify_clock, + btr_latch_mode* latch_mode, + btr_cur_t* cursor, + mtr_t* mtr); + MY_ATTRIBUTE((warn_unused_result)) -/********************************************************************//** -Searches an index tree and positions a tree cursor on a given non-leaf level. +/** Searches an index tree and positions a tree cursor on a given level. NOTE: n_fields_cmp in tuple must be set so that it cannot be compared to node pointer page number fields on the upper levels of the tree! +Note that if mode is PAGE_CUR_LE, which is used in inserts, then cursor->up_match and cursor->low_match both will have sensible values. -Cursor is left at the place where an insert of the -search tuple should be performed in the B-tree. InnoDB does an insert -immediately after the cursor. Thus, the cursor may end up on a user record, -or on a page infimum record. +If mode is PAGE_CUR_GE, then up_match will a have a sensible value. @param level the tree level of search @param tuple data tuple; NOTE: n_fields_cmp in tuple must be set so that it cannot get compared to the node ptr page number field! -@param latch RW_S_LATCH or RW_X_LATCH +@param mode PAGE_CUR_L, ...; NOTE that if the search is made using a + unique prefix of a record, mode should be PAGE_CUR_LE, not + PAGE_CUR_GE, as the latter may end up on the previous page of + the record! Inserts should always be made using PAGE_CUR_LE + to search the position! +@param latch_mode BTR_SEARCH_LEAF, ..., ORed with at most one of BTR_INSERT, + BTR_DELETE_MARK, or BTR_DELETE; + cursor->left_block is used to store a pointer to the left + neighbor page @param cursor tree cursor; the cursor page is s- or x-latched, but see also above! @param mtr mini-transaction +@param autoinc PAGE_ROOT_AUTO_INC to be written (0 if none) @return DB_SUCCESS on success or error code otherwise */ dberr_t btr_cur_search_to_nth_level(ulint level, const dtuple_t *tuple, - rw_lock_type_t rw_latch, - btr_cur_t *cursor, mtr_t *mtr); + page_cur_mode_t mode, + btr_latch_mode latch_mode, + btr_cur_t *cursor, mtr_t *mtr, + ib_uint64_t autoinc= 0); /*************************************************************//** Tries to perform an insert to a page in an index tree, next to cursor. @@ -628,13 +657,15 @@ btr_rec_copy_externally_stored_field( @param[in] block leaf page where the search converged @param[in] latch_mode BTR_SEARCH_LEAF, ... @param[in] cursor cursor -@param[in,out] mtr mini-transaction */ +@param[in,out] mtr mini-transaction +@param[out] latch_leaves latched blocks and savepoints */ void btr_cur_latch_leaves( - ulint block_savepoint, + buf_block_t* block, btr_latch_mode latch_mode, btr_cur_t* cursor, - mtr_t* mtr); + mtr_t* mtr, + btr_latch_leaves_t* latch_leaves = nullptr); /*######################################################################*/ @@ -703,14 +734,14 @@ struct btr_cur_t { BTR_MODIFY_PREV */ /*------------------------------*/ que_thr_t* thr; /*!< this field is only used - when search_leaf() + when btr_cur_search_to_nth_level is called for an index entry insertion: the calling query thread is passed here to be used in the insert buffer */ /*------------------------------*/ /** The following fields are used in - search_leaf() to pass information: */ + btr_cur_search_to_nth_level to pass information: */ /* @{ */ enum btr_cur_method flag; /*!< Search method used */ ulint tree_height; /*!< Tree height if the search is done @@ -719,7 +750,8 @@ struct btr_cur_t { ulint up_match; /*!< If the search mode was PAGE_CUR_LE, the number of matched fields to the the first user record to the right of - the cursor record after search_leaf(); + the cursor record after + btr_cur_search_to_nth_level; for the mode PAGE_CUR_GE, the matched fields to the first user record AT THE CURSOR or to the right of it; @@ -736,7 +768,8 @@ struct btr_cur_t { ulint low_match; /*!< if search mode was PAGE_CUR_LE, the number of matched fields to the first user record AT THE CURSOR or - to the left of it after search_leaf(); + to the left of it after + btr_cur_search_to_nth_level; NOT defined for PAGE_CUR_GE or any other search modes; see also the NOTE in up_match! */ @@ -770,24 +803,6 @@ struct btr_cur_t { dberr_t open_leaf(bool first, dict_index_t *index, btr_latch_mode latch_mode, mtr_t *mtr); - /** Search the leaf page record corresponding to a key. - @param tuple key to search for, with correct n_fields_cmp - @param mode search mode; PAGE_CUR_LE for unique prefix or for inserting - @param latch_mode latch mode - @param mtr mini-transaction - @return error code */ - dberr_t search_leaf(const dtuple_t *tuple, page_cur_mode_t mode, - btr_latch_mode latch_mode, mtr_t *mtr); - - /** Search the leaf page record corresponding to a key, exclusively latching - all sibling pages on the way. - @param tuple key to search for, with correct n_fields_cmp - @param mode search mode; PAGE_CUR_LE for unique prefix or for inserting - @param mtr mini-transaction - @return error code */ - dberr_t pessimistic_search_leaf(const dtuple_t *tuple, page_cur_mode_t mode, - mtr_t *mtr); - /** Open the cursor at a random leaf page record. @param offsets temporary memory for rec_get_offsets() @param heap memory heap for rec_get_offsets() @@ -847,14 +862,14 @@ inherited external field. */ #define BTR_EXTERN_INHERITED_FLAG 64U #ifdef BTR_CUR_HASH_ADAPT -/** Number of searches down the B-tree in btr_cur_t::search_leaf(). */ +/** Number of searches down the B-tree in btr_cur_search_to_nth_level(). */ extern ib_counter_t<ulint, ib_counter_element_t> btr_cur_n_non_sea; /** Old value of btr_cur_n_non_sea. Copied by srv_refresh_innodb_monitor_stats(). Referenced by srv_printf_innodb_monitor(). */ extern ulint btr_cur_n_non_sea_old; /** Number of successful adaptive hash index lookups in -btr_cur_t::search_leaf(). */ +btr_cur_search_to_nth_level(). */ extern ib_counter_t<ulint, ib_counter_element_t> btr_cur_n_sea; /** Old value of btr_cur_n_sea. Copied by srv_refresh_innodb_monitor_stats(). Referenced by diff --git a/storage/innobase/include/btr0pcur.h b/storage/innobase/include/btr0pcur.h index a25704ede53..cd8eacdc212 100644 --- a/storage/innobase/include/btr0pcur.h +++ b/storage/innobase/include/btr0pcur.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2023, MariaDB Corporation. +Copyright (c) 2017, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -70,6 +70,24 @@ btr_pcur_init( /*==========*/ btr_pcur_t* pcur); /*!< in: persistent cursor */ +/**************************************************************//** +Initializes and opens a persistent cursor to an index tree. */ +inline +dberr_t +btr_pcur_open( + const dtuple_t* tuple, /*!< in: tuple on which search done */ + page_cur_mode_t mode, /*!< in: PAGE_CUR_L, ...; + NOTE that if the search is made using a unique + prefix of a record, mode should be + PAGE_CUR_LE, not PAGE_CUR_GE, as the latter + may end up on the previous page from the + record! */ + btr_latch_mode latch_mode,/*!< in: BTR_SEARCH_LEAF, ... */ + btr_pcur_t* cursor, /*!< in: memory buffer for persistent cursor */ + ib_uint64_t autoinc,/*!< in: PAGE_ROOT_AUTO_INC to be written + (0 if none) */ + mtr_t* mtr) /*!< in: mtr */ + MY_ATTRIBUTE((nonnull, warn_unused_result)); /** Opens an persistent cursor to an index tree without initializing the cursor. @param tuple tuple on which search done @@ -82,7 +100,8 @@ cursor. @param mtr mini-transaction @return DB_SUCCESS on success or error code otherwise. */ inline -dberr_t btr_pcur_open_with_no_init(const dtuple_t *tuple, page_cur_mode_t mode, +dberr_t btr_pcur_open_with_no_init(const dtuple_t *tuple, + page_cur_mode_t mode, btr_latch_mode latch_mode, btr_pcur_t *cursor, mtr_t *mtr); @@ -390,7 +409,8 @@ struct btr_pcur_t pos_state= BTR_PCUR_IS_POSITIONED; old_rec= nullptr; - return btr_cur.open_leaf(first, index, this->latch_mode, mtr); + return btr_cur.open_leaf(first, index, + BTR_LATCH_MODE_WITHOUT_FLAGS(latch_mode), mtr); } }; @@ -413,24 +433,6 @@ inline rec_t *btr_pcur_get_rec(const btr_pcur_t *cursor) return cursor->btr_cur.page_cur.rec; } -/**************************************************************//** -Initializes and opens a persistent cursor to an index tree. */ -inline -dberr_t -btr_pcur_open( - const dtuple_t* tuple, /*!< in: tuple on which search done */ - page_cur_mode_t mode, /*!< in: PAGE_CUR_LE, ... */ - btr_latch_mode latch_mode,/*!< in: BTR_SEARCH_LEAF, ... */ - btr_pcur_t* cursor, /*!< in: memory buffer for persistent cursor */ - mtr_t* mtr) /*!< in: mtr */ -{ - cursor->latch_mode= BTR_LATCH_MODE_WITHOUT_FLAGS(latch_mode); - cursor->search_mode= mode; - cursor->pos_state= BTR_PCUR_IS_POSITIONED; - cursor->trx_if_known= nullptr; - return cursor->btr_cur.search_leaf(tuple, mode, latch_mode, mtr); -} - /** Open a cursor on the first user record satisfying the search condition; in case of no match, after the last index record. */ MY_ATTRIBUTE((nonnull, warn_unused_result)) @@ -438,15 +440,16 @@ inline dberr_t btr_pcur_open_on_user_rec( const dtuple_t* tuple, /*!< in: tuple on which search done */ + page_cur_mode_t mode, /*!< in: PAGE_CUR_L, ... */ btr_latch_mode latch_mode, /*!< in: BTR_SEARCH_LEAF or BTR_MODIFY_LEAF */ btr_pcur_t* cursor, /*!< in: memory buffer for persistent cursor */ mtr_t* mtr) /*!< in: mtr */ { + ut_ad(mode == PAGE_CUR_GE || mode == PAGE_CUR_G); ut_ad(latch_mode == BTR_SEARCH_LEAF || latch_mode == BTR_MODIFY_LEAF); - if (dberr_t err= - btr_pcur_open(tuple, PAGE_CUR_GE, latch_mode, cursor, mtr)) + if (dberr_t err= btr_pcur_open(tuple, mode, latch_mode, cursor, 0, mtr)) return err; if (!btr_pcur_is_after_last_on_page(cursor) || btr_pcur_is_after_last_in_tree(cursor)) diff --git a/storage/innobase/include/btr0pcur.inl b/storage/innobase/include/btr0pcur.inl index b827d70dc47..551f8f20fca 100644 --- a/storage/innobase/include/btr0pcur.inl +++ b/storage/innobase/include/btr0pcur.inl @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2015, 2023, MariaDB Corporation. +Copyright (c) 2015, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -299,10 +299,38 @@ btr_pcur_init( pcur->btr_cur.rtr_info = NULL; } +/**************************************************************//** +Initializes and opens a persistent cursor to an index tree. */ +inline +dberr_t +btr_pcur_open( + const dtuple_t* tuple, /*!< in: tuple on which search done */ + page_cur_mode_t mode, /*!< in: PAGE_CUR_L, ...; + NOTE that if the search is made using a unique + prefix of a record, mode should be + PAGE_CUR_LE, not PAGE_CUR_GE, as the latter + may end up on the previous page from the + record! */ + btr_latch_mode latch_mode,/*!< in: BTR_SEARCH_LEAF, ... */ + btr_pcur_t* cursor, /*!< in: memory buffer for persistent cursor */ + ib_uint64_t autoinc,/*!< in: PAGE_ROOT_AUTO_INC to be written + (0 if none) */ + mtr_t* mtr) /*!< in: mtr */ +{ + ut_ad(!cursor->index()->is_spatial()); + cursor->latch_mode= BTR_LATCH_MODE_WITHOUT_FLAGS(latch_mode); + cursor->search_mode= mode; + cursor->pos_state= BTR_PCUR_IS_POSITIONED; + cursor->trx_if_known= nullptr; + return btr_cur_search_to_nth_level(0, tuple, mode, latch_mode, + btr_pcur_get_btr_cur(cursor), + mtr, autoinc); +} + /** Opens an persistent cursor to an index tree without initializing the cursor. @param tuple tuple on which search done -@param mode search mode; NOTE that if the search is made using a +@param mode PAGE_CUR_L, ...; NOTE that if the search is made using a unique prefix of a record, mode should be PAGE_CUR_LE, not PAGE_CUR_GE, as the latter may end up on the previous page of the record! @@ -311,7 +339,8 @@ cursor. @param mtr mini-transaction @return DB_SUCCESS on success or error code otherwise. */ inline -dberr_t btr_pcur_open_with_no_init(const dtuple_t *tuple, page_cur_mode_t mode, +dberr_t btr_pcur_open_with_no_init(const dtuple_t *tuple, + page_cur_mode_t mode, btr_latch_mode latch_mode, btr_pcur_t *cursor, mtr_t *mtr) { @@ -319,7 +348,10 @@ dberr_t btr_pcur_open_with_no_init(const dtuple_t *tuple, page_cur_mode_t mode, cursor->search_mode= mode; cursor->pos_state= BTR_PCUR_IS_POSITIONED; cursor->trx_if_known= nullptr; - return cursor->btr_cur.search_leaf(tuple, mode, latch_mode, mtr); + + /* Search with the tree cursor */ + return btr_cur_search_to_nth_level(0, tuple, mode, latch_mode, + btr_pcur_get_btr_cur(cursor), mtr); } /**************************************************************//** diff --git a/storage/innobase/include/btr0types.h b/storage/innobase/include/btr0types.h index 912c022c64f..6118bfbc128 100644 --- a/storage/innobase/include/btr0types.h +++ b/storage/innobase/include/btr0types.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2018, 2023, MariaDB Corporation. +Copyright (c) 2018, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -55,26 +55,25 @@ in the index record. */ #define BTR_EXTERN_LOCAL_STORED_MAX_SIZE \ (BTR_EXTERN_FIELD_REF_SIZE * 2) -/** Latching modes for btr_cur_t::search_leaf(). */ +/** Latching modes for btr_cur_search_to_nth_level(). */ enum btr_latch_mode { /** Search a record on a leaf page and S-latch it. */ BTR_SEARCH_LEAF = RW_S_LATCH, /** (Prepare to) modify a record on a leaf page and X-latch it. */ BTR_MODIFY_LEAF = RW_X_LATCH, - /** U-latch root and X-latch a leaf page */ - BTR_MODIFY_ROOT_AND_LEAF = RW_SX_LATCH, /** Obtain no latches. */ BTR_NO_LATCHES = RW_NO_LATCH, - /** Search the previous record. - Used in btr_pcur_move_backward_from_page(). */ + /** Search the previous record. */ BTR_SEARCH_PREV = 4 | BTR_SEARCH_LEAF, - /** Modify the previous record. - Used in btr_pcur_move_backward_from_page() and ibuf_insert(). */ + /** Modify the previous record. */ BTR_MODIFY_PREV = 4 | BTR_MODIFY_LEAF, - /** Start modifying the entire B-tree. */ + /** Start searching the entire B-tree. */ + BTR_SEARCH_TREE = 8 | BTR_SEARCH_LEAF, + /** Start modifying1 the entire B-tree. */ BTR_MODIFY_TREE = 8 | BTR_MODIFY_LEAF, - /** Continue modifying the entire R-tree. - Only used by rtr_search_to_nth_level(). */ + /** Continue searching the entire B-tree. */ + BTR_CONT_SEARCH_TREE = 4 | BTR_SEARCH_TREE, + /** Continue modifying the entire B-tree. */ BTR_CONT_MODIFY_TREE = 4 | BTR_MODIFY_TREE, /* BTR_INSERT, BTR_DELETE and BTR_DELETE_MARK are mutually @@ -99,14 +98,14 @@ enum btr_latch_mode { dict_index_t::lock S-latch is being held. */ BTR_SEARCH_LEAF_ALREADY_S_LATCHED = BTR_SEARCH_LEAF | BTR_ALREADY_S_LATCHED, + /** Search the entire index tree, assuming that the + dict_index_t::lock S-latch is being held. */ + BTR_SEARCH_TREE_ALREADY_S_LATCHED = BTR_SEARCH_TREE + | BTR_ALREADY_S_LATCHED, /** Search and X-latch a leaf page, assuming that the dict_index_t::lock is being held in non-exclusive mode. */ BTR_MODIFY_LEAF_ALREADY_LATCHED = BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED, - /** U-latch root and X-latch a leaf page, assuming that - dict_index_t::lock is being held in U mode. */ - BTR_MODIFY_ROOT_AND_LEAF_ALREADY_LATCHED = BTR_MODIFY_ROOT_AND_LEAF - | BTR_ALREADY_S_LATCHED, /** Attempt to delete-mark a secondary index record. */ BTR_DELETE_MARK_LEAF = BTR_MODIFY_LEAF | BTR_DELETE_MARK, @@ -133,9 +132,6 @@ enum btr_latch_mode { /** Attempt to delete a record in the tree. */ BTR_PURGE_TREE = BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE, - /** Attempt to delete a record in an x-latched tree. */ - BTR_PURGE_TREE_ALREADY_LATCHED = BTR_PURGE_TREE - | BTR_ALREADY_S_LATCHED, /** Attempt to insert a record into the tree. */ BTR_INSERT_TREE = BTR_MODIFY_TREE | BTR_LATCH_FOR_INSERT, diff --git a/storage/innobase/include/gis0rtree.h b/storage/innobase/include/gis0rtree.h index b07261ce042..777f2432c93 100644 --- a/storage/innobase/include/gis0rtree.h +++ b/storage/innobase/include/gis0rtree.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 2014, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2023, MariaDB Corporation. +Copyright (c) 2017, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -59,44 +59,6 @@ Created 2013/03/27 Jimmy Yang and Allen Lai /* Geometry data header */ #define GEO_DATA_HEADER_SIZE 4 - -/** Search for a spatial index leaf page record. -@param cur cursor -@param tuple search tuple -@param latch_mode latching mode -@param mtr mini-transaction -@param mode search mode */ -dberr_t rtr_search_leaf(btr_cur_t *cur, const dtuple_t *tuple, - btr_latch_mode latch_mode, mtr_t *mtr, - page_cur_mode_t mode= PAGE_CUR_RTREE_LOCATE) - MY_ATTRIBUTE((nonnull, warn_unused_result)); - -/** Search for inserting a spatial index leaf page record. -@param cur cursor -@param tuple search tuple -@param latch_mode latching mode -@param mtr mini-transaction */ -inline dberr_t rtr_insert_leaf(btr_cur_t *cur, const dtuple_t *tuple, - btr_latch_mode latch_mode, mtr_t *mtr) -{ - return rtr_search_leaf(cur, tuple, latch_mode, mtr, PAGE_CUR_RTREE_INSERT); -} - -/** Search for a spatial index leaf page record. -@param pcur cursor -@param tuple search tuple -@param mode search mode -@param mtr mini-transaction */ -dberr_t rtr_search_leaf(btr_pcur_t *pcur, const dtuple_t *tuple, - page_cur_mode_t mode, mtr_t *mtr) - MY_ATTRIBUTE((nonnull, warn_unused_result)); - -dberr_t rtr_search_to_nth_level(ulint level, const dtuple_t *tuple, - page_cur_mode_t mode, - btr_latch_mode latch_mode, - btr_cur_t *cur, mtr_t *mtr) - MY_ATTRIBUTE((nonnull, warn_unused_result)); - /**********************************************************************//** Builds a Rtree node pointer out of a physical record and a page number. @return own: node pointer */ @@ -333,9 +295,11 @@ rtr_store_parent_path( /**************************************************************//** Initializes and opens a persistent cursor to an index tree. It should be closed with btr_pcur_close. */ -bool rtr_search( +bool +rtr_pcur_open( + dict_index_t* index, /*!< in: index */ const dtuple_t* tuple, /*!< in: tuple on which search done */ - btr_latch_mode latch_mode,/*!< in: BTR_MODIFY_LEAF, ... */ + btr_latch_mode latch_mode,/*!< in: BTR_SEARCH_LEAF, ... */ btr_pcur_t* cursor, /*!< in: memory buffer for persistent cursor */ mtr_t* mtr) /*!< in: mtr */ MY_ATTRIBUTE((warn_unused_result)); diff --git a/storage/innobase/include/gis0type.h b/storage/innobase/include/gis0type.h index d6a4ef67a38..4fccfdb6c26 100644 --- a/storage/innobase/include/gis0type.h +++ b/storage/innobase/include/gis0type.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 2014, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2018, 2023, MariaDB Corporation. +Copyright (c) 2018, 2020, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -105,6 +105,12 @@ typedef struct rtr_info{ matched_rec_t* matches;/*!< struct holding matching leaf records */ mysql_mutex_t rtr_path_mutex; /*!< mutex protect the "path" vector */ + buf_block_t* tree_blocks[RTR_MAX_LEVELS + RTR_LEAF_LATCH_NUM]; + /*!< tracking pages that would be locked + at leaf level, for future free */ + ulint tree_savepoints[RTR_MAX_LEVELS + RTR_LEAF_LATCH_NUM]; + /*!< savepoint used to release latches/blocks + on each level and leaf level */ rtr_mbr_t mbr; /*!< the search MBR */ que_thr_t* thr; /*!< the search thread */ mem_heap_t* heap; /*!< memory heap */ diff --git a/storage/innobase/include/ibuf0ibuf.inl b/storage/innobase/include/ibuf0ibuf.inl index 1e21f74ff2b..9f4e937f31d 100644 --- a/storage/innobase/include/ibuf0ibuf.inl +++ b/storage/innobase/include/ibuf0ibuf.inl @@ -100,8 +100,9 @@ ibuf_should_try( decide */ { return(innodb_change_buffering - && !(index->type & (DICT_CLUSTERED | DICT_IBUF)) && ibuf.max_size != 0 + && !dict_index_is_clust(index) + && !dict_index_is_spatial(index) && index->table->quiesce == QUIESCE_NONE && (ignore_sec_unique || !dict_index_is_unique(index))); } diff --git a/storage/innobase/include/mtr0log.h b/storage/innobase/include/mtr0log.h index 0dfb50125bd..093b706c1de 100644 --- a/storage/innobase/include/mtr0log.h +++ b/storage/innobase/include/mtr0log.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2019, 2023, MariaDB Corporation. +Copyright (c) 2019, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -401,9 +401,7 @@ inline byte *mtr_t::log_write(const page_id_t id, const buf_page_t *bpage, ut_ad(have_offset || offset == 0); ut_ad(offset + len <= srv_page_size); static_assert(MIN_4BYTE >= UNIV_PAGE_SIZE_MAX, "consistency"); - ut_ad(type == FREE_PAGE || type == OPTION || (type == EXTENDED && !bpage) || - memo_contains_flagged(bpage, - MTR_MEMO_PAGE_X_FIX | MTR_MEMO_PAGE_SX_FIX)); + size_t max_len; if (!have_len) max_len= 1 + 5 + 5; diff --git a/storage/innobase/include/mtr0mtr.h b/storage/innobase/include/mtr0mtr.h index 3c4e9da69a2..41f9b473856 100644 --- a/storage/innobase/include/mtr0mtr.h +++ b/storage/innobase/include/mtr0mtr.h @@ -37,6 +37,15 @@ Created 11/26/1995 Heikki Tuuri /** Commit a mini-transaction. */ #define mtr_commit(m) (m)->commit() +/** Set and return a savepoint in mtr. +@return savepoint */ +#define mtr_set_savepoint(m) (m)->get_savepoint() + +/** Release the (index tree) s-latch stored in an mtr memo after a +savepoint. */ +#define mtr_release_s_latch_at_savepoint(m, s, l) \ + (m)->release_s_latch_at_savepoint((s), (l)) + /** Change the logging mode of a mini-transaction. @return old mode */ #define mtr_set_log_mode(m, d) (m)->set_log_mode((d)) @@ -51,10 +60,13 @@ Created 11/26/1995 Heikki Tuuri # define mtr_sx_lock_index(i,m) (m)->u_lock(&(i)->lock) #endif +#define mtr_release_block_at_savepoint(m, s, b) \ + (m)->release_block_at_savepoint((s), (b)) + /** Mini-transaction memo stack slot. */ struct mtr_memo_slot_t { - /** pointer to the object */ + /** pointer to the object, or nullptr if released */ void *object; /** type of the stored object */ mtr_memo_type_t type; @@ -113,36 +125,41 @@ struct mtr_t { return m_memo.size(); } - /** Get the block at a savepoint */ - buf_block_t *at_savepoint(ulint savepoint) const + /** Release the (index tree) s-latch stored in an mtr memo after a savepoint. + @param savepoint value returned by get_savepoint() + @param lock index latch to release */ + void release_s_latch_at_savepoint(ulint savepoint, index_lock *lock) { ut_ad(is_active()); - const mtr_memo_slot_t &slot= m_memo[savepoint]; - ut_ad(slot.type < MTR_MEMO_S_LOCK); - ut_ad(slot.object); - return static_cast<buf_block_t*>(slot.object); + mtr_memo_slot_t &slot= m_memo[savepoint]; + ut_ad(slot.object == lock); + ut_ad(slot.type == MTR_MEMO_S_LOCK); + slot.object= nullptr; + lock->s_unlock(); } - - /** Try to get a block at a savepoint. - @param savepoint the savepoint right before the block was acquired - @return the block at the savepoint - @retval nullptr if no buffer block was registered at that savepoint */ - buf_block_t *block_at_savepoint(ulint savepoint) const + /** Release the block in an mtr memo after a savepoint. */ + void release_block_at_savepoint(ulint savepoint, buf_block_t *block) { ut_ad(is_active()); - const mtr_memo_slot_t &slot= m_memo[savepoint]; - return slot.type < MTR_MEMO_S_LOCK - ? static_cast<buf_block_t*>(slot.object) - : nullptr; + mtr_memo_slot_t &slot= m_memo[savepoint]; + ut_ad(slot.object == block); + ut_ad(!(slot.type & MTR_MEMO_MODIFY)); + slot.object= nullptr; + block->page.unfix(); + + switch (slot.type) { + case MTR_MEMO_PAGE_S_FIX: + block->page.lock.s_unlock(); + break; + case MTR_MEMO_PAGE_SX_FIX: + case MTR_MEMO_PAGE_X_FIX: + block->page.lock.u_or_x_unlock(slot.type == MTR_MEMO_PAGE_SX_FIX); + break; + default: + break; + } } - /** Retrieve a page that has already been latched. - @param id page identifier - @param type page latch type - @return block */ - buf_block_t *get_already_latched(const page_id_t id, mtr_memo_type_t type) - const; - /** @return if we are about to make a clean buffer block dirty */ static bool is_block_dirtied(const buf_page_t &b) { @@ -391,17 +408,28 @@ public: @param rw_latch latch to acquire */ void upgrade_buffer_fix(ulint savepoint, rw_lock_type_t rw_latch); - /** Register a page latch on a previously buffer-fixed block. */ - void lock_register(ulint savepoint, mtr_memo_type_t type) + /** Register a page latch on a buffer-fixed block was buffer-fixed. + @param latch latch type */ + void u_lock_register(ulint savepoint) + { + mtr_memo_slot_t &slot= m_memo[savepoint]; + ut_ad(slot.type == MTR_MEMO_BUF_FIX); + slot.type= MTR_MEMO_PAGE_SX_FIX; + } + + /** Register a page latch on a buffer-fixed block was buffer-fixed. + @param latch latch type */ + void s_lock_register(ulint savepoint) { mtr_memo_slot_t &slot= m_memo[savepoint]; ut_ad(slot.type == MTR_MEMO_BUF_FIX); - ut_ad(type <= MTR_MEMO_PAGE_SX_FIX); - slot.type= type; + slot.type= MTR_MEMO_PAGE_S_FIX; } /** Upgrade U locks on a block to X */ void page_lock_upgrade(const buf_block_t &block); + /** Upgrade U lock to X */ + void lock_upgrade(const index_lock &lock); /** Check if we are holding tablespace latch @param space tablespace to search for diff --git a/storage/innobase/include/small_vector.h b/storage/innobase/include/small_vector.h index d28a36184b8..76069cfc168 100644 --- a/storage/innobase/include/small_vector.h +++ b/storage/innobase/include/small_vector.h @@ -71,7 +71,6 @@ public: using const_iterator= const T *; using reverse_iterator= std::reverse_iterator<iterator>; using reference= T &; - using const_reference= const T&; iterator begin() { return static_cast<iterator>(BeginX); } const_iterator begin() const { return static_cast<const_iterator>(BeginX); } @@ -82,8 +81,6 @@ public: reverse_iterator rend() { return reverse_iterator(begin()); } reference operator[](size_t i) { assert(i < size()); return begin()[i]; } - const_reference operator[](size_t i) const - { return const_cast<small_vector&>(*this)[i]; } void erase(const_iterator S, const_iterator E) { diff --git a/storage/innobase/mtr/mtr0mtr.cc b/storage/innobase/mtr/mtr0mtr.cc index 66a3df3a1ce..2c93d7ffe5a 100644 --- a/storage/innobase/mtr/mtr0mtr.cc +++ b/storage/innobase/mtr/mtr0mtr.cc @@ -40,8 +40,6 @@ Created 11/26/1995 Heikki Tuuri void mtr_memo_slot_t::release() const { - ut_ad(object); - switch (type) { case MTR_MEMO_S_LOCK: static_cast<index_lock*>(object)->s_unlock(); @@ -123,7 +121,11 @@ inline void mtr_t::release_resources() void mtr_t::release() { for (auto it= m_memo.rbegin(); it != m_memo.rend(); it++) - it->release(); + { + mtr_memo_slot_t &slot= *it; + if (slot.object) + slot.release(); + } m_memo.clear(); } @@ -189,7 +191,7 @@ void mtr_t::commit() for (const mtr_memo_slot_t &slot : m_memo) { - if (slot.type & MTR_MEMO_MODIFY) + if (slot.object && slot.type & MTR_MEMO_MODIFY) { ut_ad(slot.type == MTR_MEMO_PAGE_X_MODIFY || slot.type == MTR_MEMO_PAGE_SX_MODIFY); @@ -224,7 +226,8 @@ void mtr_t::rollback_to_savepoint(ulint begin, ulint end) while (s-- > begin) { const mtr_memo_slot_t &slot= m_memo[s]; - ut_ad(slot.object); + if (!slot.object) + continue; /* This is intended for releasing latches on indexes or unmodified buffer pool pages. */ ut_ad(slot.type <= MTR_MEMO_SX_LOCK); @@ -268,7 +271,8 @@ void mtr_t::commit_shrink(fil_space_t &space) for (mtr_memo_slot_t &slot : m_memo) { - ut_ad(slot.object); + if (!slot.object) + continue; switch (slot.type) { default: ut_ad("invalid type" == 0); @@ -932,6 +936,15 @@ void mtr_t::page_lock_upgrade(const buf_block_t &block) #endif /* BTR_CUR_HASH_ADAPT */ } +void mtr_t::lock_upgrade(const index_lock &lock) +{ + ut_ad(lock.have_x()); + + for (mtr_memo_slot_t &slot : m_memo) + if (slot.object == &lock && slot.type == MTR_MEMO_SX_LOCK) + slot.type= MTR_MEMO_X_LOCK; +} + /** Latch a buffer pool block. @param block block to be latched @param rw_latch RW_S_LATCH, RW_SX_LATCH, RW_X_LATCH, RW_NO_LATCH */ @@ -1103,8 +1116,7 @@ buf_block_t* mtr_t::memo_contains_page_flagged(const byte *ptr, ulint flags) for (const mtr_memo_slot_t &slot : m_memo) { - ut_ad(slot.object); - if (!(flags & slot.type)) + if (!slot.object || !(flags & slot.type)) continue; buf_page_t *bpage= static_cast<buf_page_t*>(slot.object); @@ -1164,23 +1176,20 @@ void mtr_t::free(const fil_space_t &space, uint32_t offset) buf_block_t *freed= nullptr; const page_id_t id{space.id, offset}; - for (auto it= m_memo.end(); it != m_memo.begin(); ) + for (auto it= m_memo.rbegin(); it != m_memo.rend(); it++) { - it--; - next: mtr_memo_slot_t &slot= *it; buf_block_t *block= static_cast<buf_block_t*>(slot.object); - ut_ad(block); - if (block == freed) + if (!block); + else if (block == freed) { if (slot.type & (MTR_MEMO_PAGE_SX_FIX | MTR_MEMO_PAGE_X_FIX)) slot.type= MTR_MEMO_PAGE_X_FIX; else { ut_ad(slot.type == MTR_MEMO_BUF_FIX); + slot.object= nullptr; block->page.unfix(); - m_memo.erase(it, it + 1); - goto next; } } else if (slot.type & (MTR_MEMO_PAGE_X_FIX | MTR_MEMO_PAGE_SX_FIX) && diff --git a/storage/innobase/row/row0import.cc b/storage/innobase/row/row0import.cc index 861095b421e..45c35bc6995 100644 --- a/storage/innobase/row/row0import.cc +++ b/storage/innobase/row/row0import.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 2012, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2015, 2023, MariaDB Corporation. +Copyright (c) 2015, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -1629,9 +1629,6 @@ inline dberr_t IndexPurge::purge_pessimistic_delete() noexcept dberr_t IndexPurge::purge() noexcept { btr_pcur_store_position(&m_pcur, &m_mtr); - m_mtr.commit(); - m_mtr.start(); - m_mtr.set_log_mode(MTR_LOG_NO_REDO); dberr_t err= purge_pessimistic_delete(); m_mtr.start(); diff --git a/storage/innobase/row/row0ins.cc b/storage/innobase/row/row0ins.cc index eea66ae05aa..81babd0082e 100644 --- a/storage/innobase/row/row0ins.cc +++ b/storage/innobase/row/row0ins.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2016, 2023, MariaDB Corporation. +Copyright (c) 2016, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -314,10 +314,8 @@ row_ins_clust_index_entry_by_modify( } if (mode != BTR_MODIFY_TREE) { - ut_ad(mode == BTR_MODIFY_LEAF - || mode == BTR_MODIFY_LEAF_ALREADY_LATCHED - || mode == BTR_MODIFY_ROOT_AND_LEAF - || mode == BTR_MODIFY_ROOT_AND_LEAF_ALREADY_LATCHED); + ut_ad((mode & ulint(~BTR_ALREADY_S_LATCHED)) + == BTR_MODIFY_LEAF); /* Try optimistic updating of the record, keeping changes within the page */ @@ -1623,7 +1621,8 @@ row_ins_check_foreign_constraint( dtuple_set_n_fields_cmp(entry, foreign->n_fields); pcur.btr_cur.page_cur.index = check_index; - err = btr_pcur_open(entry, PAGE_CUR_GE, BTR_SEARCH_LEAF, &pcur, &mtr); + err = btr_pcur_open(entry, PAGE_CUR_GE, BTR_SEARCH_LEAF, &pcur, 0, + &mtr); if (UNIV_UNLIKELY(err != DB_SUCCESS)) { goto end_scan; } @@ -2120,7 +2119,7 @@ row_ins_scan_sec_index_for_duplicate( pcur.btr_cur.page_cur.index = index; trx_t* const trx = thr_get_trx(thr); dberr_t err = btr_pcur_open(entry, PAGE_CUR_GE, BTR_SEARCH_LEAF, - &pcur, mtr); + &pcur, 0, mtr); if (err != DB_SUCCESS) { goto end_scan; } @@ -2540,8 +2539,8 @@ row_ins_index_entry_big_rec( index->set_modified(mtr); } - dberr_t error = btr_pcur_open(entry, PAGE_CUR_LE, BTR_MODIFY_TREE, - &pcur, &mtr); + dberr_t error = btr_pcur_open(entry, PAGE_CUR_LE, + BTR_MODIFY_TREE, &pcur, 0, &mtr); if (error != DB_SUCCESS) { return error; } @@ -2599,10 +2598,11 @@ row_ins_clust_index_entry_low( que_thr_t* thr) /*!< in: query thread */ { btr_pcur_t pcur; + btr_cur_t* cursor; dberr_t err = DB_SUCCESS; big_rec_t* big_rec = NULL; mtr_t mtr; - uint64_t auto_inc = 0; + ib_uint64_t auto_inc = 0; mem_heap_t* offsets_heap = NULL; rec_offs offsets_[REC_OFFS_NORMAL_SIZE]; rec_offs* offsets = offsets_; @@ -2618,7 +2618,7 @@ row_ins_clust_index_entry_low( ut_ad(!n_uniq || n_uniq == dict_index_get_n_unique(index)); ut_ad(!trx->in_rollback); - mtr.start(); + mtr_start(&mtr); if (index->table->is_temporary()) { /* Disable REDO logging as the lifetime of temp-tables is @@ -2658,13 +2658,6 @@ row_ins_clust_index_entry_low( dfield->type.mtype, dfield->type.prtype & DATA_UNSIGNED); - if (auto_inc - && mode != BTR_MODIFY_TREE) { - mode = btr_latch_mode( - BTR_MODIFY_ROOT_AND_LEAF - ^ BTR_MODIFY_LEAF - ^ mode); - } } } } @@ -2674,26 +2667,20 @@ row_ins_clust_index_entry_low( the function will return in both low_match and up_match of the cursor sensible values */ pcur.btr_cur.page_cur.index = index; - err = btr_pcur_open(entry, PAGE_CUR_LE, mode, &pcur, &mtr); + err = btr_pcur_open(entry, PAGE_CUR_LE, mode, &pcur, auto_inc, &mtr); if (err != DB_SUCCESS) { index->table->file_unreadable = true; -err_exit: +commit_exit: mtr.commit(); goto func_exit; } - if (auto_inc) { - buf_block_t* root - = mtr.at_savepoint(mode != BTR_MODIFY_ROOT_AND_LEAF); - ut_ad(index->page == root->page.id().page_no()); - page_set_autoinc(root, auto_inc, &mtr, false); - } - - btr_pcur_get_btr_cur(&pcur)->thr = thr; + cursor = btr_pcur_get_btr_cur(&pcur); + cursor->thr = thr; #ifdef UNIV_DEBUG { - page_t* page = btr_pcur_get_page(&pcur); + page_t* page = btr_cur_get_page(cursor); rec_t* first_rec = page_rec_get_next( page_get_infimum_rec(page)); @@ -2702,7 +2689,7 @@ err_exit: } #endif /* UNIV_DEBUG */ - block = btr_pcur_get_block(&pcur); + block = btr_cur_get_block(cursor); DBUG_EXECUTE_IF("row_ins_row_level", goto skip_bulk_insert;); @@ -2724,7 +2711,7 @@ err_exit: if (err != DB_SUCCESS) { trx->error_state = err; - goto err_exit; + goto commit_exit; } if (index->table->n_rec_locks) { @@ -2754,7 +2741,7 @@ skip_bulk_insert: ut_ad(index->is_instant()); ut_ad(!dict_index_is_online_ddl(index)); - const rec_t* rec = btr_pcur_get_rec(&pcur); + const rec_t* rec = btr_cur_get_rec(cursor); if (rec_get_info_bits(rec, page_rec_is_comp(rec)) & REC_INFO_MIN_REC_FLAG) { @@ -2763,17 +2750,16 @@ skip_bulk_insert: goto err_exit; } - ut_ad(!row_ins_must_modify_rec(&pcur.btr_cur)); + ut_ad(!row_ins_must_modify_rec(cursor)); goto do_insert; } - if (rec_is_metadata(btr_pcur_get_rec(&pcur), *index)) { + if (rec_is_metadata(btr_cur_get_rec(cursor), *index)) { goto do_insert; } if (n_uniq - && (pcur.btr_cur.up_match >= n_uniq - || pcur.btr_cur.low_match >= n_uniq)) { + && (cursor->up_match >= n_uniq || cursor->low_match >= n_uniq)) { if (flags == (BTR_CREATE_FLAG | BTR_NO_LOCKING_FLAG @@ -2781,7 +2767,7 @@ skip_bulk_insert: /* Set no locks when applying log in online table rebuild. Only check for duplicates. */ err = row_ins_duplicate_error_in_clust_online( - n_uniq, entry, &pcur.btr_cur, + n_uniq, entry, cursor, &offsets, &offsets_heap); switch (err) { @@ -2792,24 +2778,26 @@ skip_bulk_insert: /* fall through */ case DB_SUCCESS_LOCKED_REC: case DB_DUPLICATE_KEY: - trx->error_info = index; + trx->error_info = cursor->index(); } } else { /* Note that the following may return also DB_LOCK_WAIT */ err = row_ins_duplicate_error_in_clust( - flags, &pcur.btr_cur, entry, thr); + flags, cursor, entry, thr); } if (err != DB_SUCCESS) { - goto err_exit; +err_exit: + mtr_commit(&mtr); + goto func_exit; } } /* Note: Allowing duplicates would qualify for modification of an existing record as the new entry is exactly same as old entry. */ - if (row_ins_must_modify_rec(&pcur.btr_cur)) { + if (row_ins_must_modify_rec(cursor)) { /* There is already an index entry with a long enough common prefix, we must convert the insert into a modify of an existing record */ @@ -2827,13 +2815,10 @@ do_insert: rec_t* insert_rec; if (mode != BTR_MODIFY_TREE) { - ut_ad(mode == BTR_MODIFY_LEAF - || mode == BTR_MODIFY_LEAF_ALREADY_LATCHED - || mode == BTR_MODIFY_ROOT_AND_LEAF - || mode - == BTR_MODIFY_ROOT_AND_LEAF_ALREADY_LATCHED); + ut_ad(mode == BTR_MODIFY_LEAF || + mode == BTR_MODIFY_LEAF_ALREADY_LATCHED); err = btr_cur_optimistic_insert( - flags, &pcur.btr_cur, &offsets, &offsets_heap, + flags, cursor, &offsets, &offsets_heap, entry, &insert_rec, &big_rec, n_ext, thr, &mtr); } else { @@ -2842,15 +2827,17 @@ do_insert: goto err_exit; } + DEBUG_SYNC_C("before_insert_pessimitic_row_ins_clust"); + err = btr_cur_optimistic_insert( - flags, &pcur.btr_cur, + flags, cursor, &offsets, &offsets_heap, entry, &insert_rec, &big_rec, n_ext, thr, &mtr); if (err == DB_FAIL) { err = btr_cur_pessimistic_insert( - flags, &pcur.btr_cur, + flags, cursor, &offsets, &offsets_heap, entry, &insert_rec, &big_rec, n_ext, thr, &mtr); @@ -2962,7 +2949,9 @@ row_ins_sec_index_entry_low( rtr_init_rtr_info(&rtr_info, false, &cursor, index, false); rtr_info_update_btr(&cursor, &rtr_info); - err = rtr_insert_leaf(&cursor, entry, search_mode, &mtr); + err = btr_cur_search_to_nth_level(0, entry, + PAGE_CUR_RTREE_INSERT, + search_mode, &cursor, &mtr); if (err == DB_SUCCESS && search_mode == BTR_MODIFY_LEAF && rtr_info.mbr_adj) { @@ -2978,8 +2967,9 @@ row_ins_sec_index_entry_low( } else { index->set_modified(mtr); } - err = rtr_insert_leaf(&cursor, entry, - search_mode, &mtr); + err = btr_cur_search_to_nth_level( + 0, entry, PAGE_CUR_RTREE_INSERT, + search_mode, &cursor, &mtr); } DBUG_EXECUTE_IF( @@ -2995,8 +2985,8 @@ row_ins_sec_index_entry_low( : BTR_INSERT)); } - err = cursor.search_leaf(entry, PAGE_CUR_LE, search_mode, - &mtr); + err = btr_cur_search_to_nth_level(0, entry, PAGE_CUR_LE, + search_mode, &cursor, &mtr); } if (err != DB_SUCCESS) { @@ -3072,12 +3062,12 @@ row_ins_sec_index_entry_low( prevent any insertion of a duplicate by another transaction. Let us now reposition the cursor and continue the insertion (bypassing the change buffer). */ - err = cursor.search_leaf( - entry, PAGE_CUR_LE, + err = btr_cur_search_to_nth_level( + 0, entry, PAGE_CUR_LE, btr_latch_mode(search_mode & ~(BTR_INSERT | BTR_IGNORE_SEC_UNIQUE)), - &mtr); + &cursor, &mtr); if (err != DB_SUCCESS) { goto func_exit; } diff --git a/storage/innobase/row/row0log.cc b/storage/innobase/row/row0log.cc index 0743dc2bb50..94d69d88fb5 100644 --- a/storage/innobase/row/row0log.cc +++ b/storage/innobase/row/row0log.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 2011, 2018, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2023, MariaDB Corporation. +Copyright (c) 2017, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -1696,8 +1696,8 @@ err_exit: mtr->start(); index->set_modified(*mtr); pcur->btr_cur.page_cur.index = index; - error = btr_pcur_open(entry, PAGE_CUR_LE, BTR_PURGE_TREE, pcur, - mtr); + error = btr_pcur_open(entry, PAGE_CUR_LE, + BTR_PURGE_TREE, pcur, 0, mtr); if (error) { goto err_exit; } @@ -1780,8 +1780,8 @@ row_log_table_apply_delete( mtr_start(&mtr); index->set_modified(mtr); - dberr_t err = btr_pcur_open(old_pk, PAGE_CUR_LE, BTR_PURGE_TREE, &pcur, - &mtr); + dberr_t err = btr_pcur_open(old_pk, PAGE_CUR_LE, + BTR_PURGE_TREE, &pcur, 0, &mtr); if (err != DB_SUCCESS) { goto all_done; } @@ -1917,8 +1917,8 @@ row_log_table_apply_update( mtr.start(); index->set_modified(mtr); - error = btr_pcur_open(old_pk, PAGE_CUR_LE, BTR_MODIFY_TREE, &pcur, - &mtr); + error = btr_pcur_open(old_pk, PAGE_CUR_LE, + BTR_MODIFY_TREE, &pcur, 0, &mtr); if (error != DB_SUCCESS) { func_exit: mtr.commit(); @@ -3084,8 +3084,11 @@ row_log_apply_op_low( record. The operation may already have been performed, depending on when the row in the clustered index was scanned. */ - *error = cursor.search_leaf(entry, PAGE_CUR_LE, has_index_lock - ? BTR_MODIFY_TREE : BTR_MODIFY_LEAF, &mtr); + *error = btr_cur_search_to_nth_level(0, entry, PAGE_CUR_LE, + has_index_lock + ? BTR_MODIFY_TREE + : BTR_MODIFY_LEAF, + &cursor, &mtr); if (UNIV_UNLIKELY(*error != DB_SUCCESS)) { goto func_exit; } @@ -3135,9 +3138,9 @@ row_log_apply_op_low( mtr_commit(&mtr); mtr_start(&mtr); index->set_modified(mtr); - *error = cursor.search_leaf(entry, PAGE_CUR_LE, - BTR_MODIFY_TREE, - &mtr); + *error = btr_cur_search_to_nth_level( + 0, entry, PAGE_CUR_LE, + BTR_MODIFY_TREE, &cursor, &mtr); if (UNIV_UNLIKELY(*error != DB_SUCCESS)) { goto func_exit; } @@ -3239,9 +3242,9 @@ insert_the_rec: mtr_commit(&mtr); mtr_start(&mtr); index->set_modified(mtr); - *error = cursor.search_leaf(entry, PAGE_CUR_LE, - BTR_MODIFY_TREE, - &mtr); + *error = btr_cur_search_to_nth_level( + 0, entry, PAGE_CUR_LE, + BTR_MODIFY_TREE, &cursor, &mtr); if (*error != DB_SUCCESS) { break; } diff --git a/storage/innobase/row/row0merge.cc b/storage/innobase/row/row0merge.cc index 535762ee105..caa2646eee2 100644 --- a/storage/innobase/row/row0merge.cc +++ b/storage/innobase/row/row0merge.cc @@ -150,8 +150,9 @@ public: false); rtr_info_update_btr(&ins_cur, &rtr_info); - error = rtr_insert_leaf(&ins_cur, dtuple, - BTR_MODIFY_LEAF, &mtr); + error = btr_cur_search_to_nth_level( + 0, dtuple, PAGE_CUR_RTREE_INSERT, + BTR_MODIFY_LEAF, &ins_cur, &mtr); /* It need to update MBR in parent entry, so change search mode to BTR_MODIFY_TREE */ @@ -163,8 +164,10 @@ public: rtr_info_update_btr(&ins_cur, &rtr_info); mtr.start(); index->set_modified(mtr); - error = rtr_insert_leaf(&ins_cur, dtuple, - BTR_MODIFY_TREE, &mtr); + error = btr_cur_search_to_nth_level( + 0, dtuple, + PAGE_CUR_RTREE_INSERT, + BTR_MODIFY_TREE, &ins_cur, &mtr); } if (error == DB_SUCCESS) { @@ -186,8 +189,11 @@ public: &ins_cur, index, false); rtr_info_update_btr(&ins_cur, &rtr_info); - error = rtr_insert_leaf(&ins_cur, dtuple, - BTR_MODIFY_TREE, &mtr); + error = btr_cur_search_to_nth_level( + 0, dtuple, + PAGE_CUR_RTREE_INSERT, + BTR_MODIFY_TREE, + &ins_cur, &mtr); if (error == DB_SUCCESS) { error = btr_cur_pessimistic_insert( diff --git a/storage/innobase/row/row0purge.cc b/storage/innobase/row/row0purge.cc index 753b42332fc..e216d9faa3b 100644 --- a/storage/innobase/row/row0purge.cc +++ b/storage/innobase/row/row0purge.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1997, 2017, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2023, MariaDB Corporation. +Copyright (c) 2017, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -104,7 +104,7 @@ bool row_purge_remove_clust_if_poss_low( /*===============================*/ purge_node_t* node, /*!< in/out: row purge node */ - btr_latch_mode mode) /*!< in: BTR_MODIFY_LEAF or BTR_PURGE_TREE */ + btr_latch_mode mode) /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */ { dict_index_t* index = dict_table_get_first_index(node->table); table_id_t table_id = 0; @@ -342,20 +342,17 @@ row_purge_remove_sec_if_poss_tree( ibool success = TRUE; dberr_t err; mtr_t mtr; + enum row_search_result search_result; log_free_check(); mtr.start(); index->set_modified(mtr); pcur.btr_cur.page_cur.index = index; - if (index->is_spatial()) { - if (!rtr_search(entry, BTR_PURGE_TREE, &pcur, &mtr)) { - goto found; - } - goto func_exit; - } + search_result = row_search_index_entry(entry, BTR_PURGE_TREE, + &pcur, &mtr); - switch (row_search_index_entry(entry, BTR_PURGE_TREE, &pcur, &mtr)) { + switch (search_result) { case ROW_NOT_FOUND: /* Not found. This is a legitimate condition. In a rollback, InnoDB will remove secondary recs that would @@ -384,7 +381,6 @@ row_purge_remove_sec_if_poss_tree( which cannot be purged yet, requires its existence. If some requires, we should do nothing. */ -found: if (row_purge_poss_sec(node, index, entry, &pcur, &mtr, true)) { /* Remove the index record, which should have been @@ -443,6 +439,8 @@ row_purge_remove_sec_if_poss_leaf( { mtr_t mtr; btr_pcur_t pcur; + enum btr_latch_mode mode; + enum row_search_result search_result; bool success = true; log_free_check(); @@ -451,27 +449,31 @@ row_purge_remove_sec_if_poss_leaf( mtr.start(); index->set_modified(mtr); + /* Change buffering is disabled for spatial index and + virtual index. */ + mode = (index->type & (DICT_SPATIAL | DICT_VIRTUAL)) + ? BTR_MODIFY_LEAF : BTR_PURGE_LEAF; pcur.btr_cur.page_cur.index = index; /* Set the purge node for the call to row_purge_poss_sec(). */ pcur.btr_cur.purge_node = node; if (index->is_spatial()) { pcur.btr_cur.thr = NULL; - if (!rtr_search(entry, BTR_MODIFY_LEAF, &pcur, &mtr)) { - goto found; - } - goto func_exit; + index->lock.u_lock(SRW_LOCK_CALL); + search_result = row_search_index_entry( + entry, mode, &pcur, &mtr); + index->lock.u_unlock(); + } else { + /* Set the query thread, so that ibuf_insert_low() will be + able to invoke thd_get_trx(). */ + pcur.btr_cur.thr = static_cast<que_thr_t*>( + que_node_get_parent(node)); + search_result = row_search_index_entry( + entry, mode, &pcur, &mtr); } - /* Set the query thread, so that ibuf_insert_low() will be - able to invoke thd_get_trx(). */ - pcur.btr_cur.thr = static_cast<que_thr_t*>(que_node_get_parent(node)); - - switch (row_search_index_entry(entry, index->has_virtual() - ? BTR_MODIFY_LEAF : BTR_PURGE_LEAF, - &pcur, &mtr)) { + switch (search_result) { case ROW_FOUND: -found: /* Before attempting to purge a record, check if it is safe to do so. */ if (row_purge_poss_sec(node, index, entry, &pcur, &mtr, false)) { diff --git a/storage/innobase/row/row0row.cc b/storage/innobase/row/row0row.cc index 4a00b2a430e..b998d27d836 100644 --- a/storage/innobase/row/row0row.cc +++ b/storage/innobase/row/row0row.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2018, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2018, 2023, MariaDB Corporation. +Copyright (c) 2018, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -1216,7 +1216,7 @@ row_search_on_row_ref( & REC_INFO_MIN_REC_FLAG; } else { ut_a(ref->n_fields == index->n_uniq); - if (btr_pcur_open(ref, PAGE_CUR_LE, mode, pcur, mtr) + if (btr_pcur_open(ref, PAGE_CUR_LE, mode, pcur, 0, mtr) != DB_SUCCESS) { return false; } @@ -1278,13 +1278,21 @@ row_search_index_entry( ut_ad(dtuple_check_typed(entry)); - if (btr_pcur_open(entry, PAGE_CUR_LE, mode, pcur, mtr) != DB_SUCCESS) { - return ROW_NOT_FOUND; + if (pcur->index()->is_spatial()) { + if (rtr_pcur_open(pcur->index(), entry, mode, pcur, mtr)) { + return ROW_NOT_FOUND; + } + } else { + if (btr_pcur_open(entry, PAGE_CUR_LE, mode, pcur, 0, mtr) + != DB_SUCCESS) { + return ROW_NOT_FOUND; + } } switch (btr_pcur_get_btr_cur(pcur)->flag) { case BTR_CUR_DELETE_REF: ut_ad(!(~mode & BTR_DELETE)); + ut_ad(!pcur->index()->is_spatial()); return(ROW_NOT_DELETED_REF); case BTR_CUR_DEL_MARK_IBUF: diff --git a/storage/innobase/row/row0sel.cc b/storage/innobase/row/row0sel.cc index e44cc466295..87e3ca43b1c 100644 --- a/storage/innobase/row/row0sel.cc +++ b/storage/innobase/row/row0sel.cc @@ -2,7 +2,7 @@ Copyright (c) 1997, 2017, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, Google Inc. -Copyright (c) 2015, 2023, MariaDB Corporation. +Copyright (c) 2015, 2022, MariaDB Corporation. Portions of this file contain modifications contributed and copyrighted by Google, Inc. Those modifications are gratefully acknowledged and are described @@ -4776,7 +4776,7 @@ wait_table_again: pcur->btr_cur.thr = thr; pcur->old_rec = nullptr; - if (index->is_spatial()) { + if (dict_index_is_spatial(index)) { if (!prebuilt->rtr_info) { prebuilt->rtr_info = rtr_create_rtr_info( set_also_gap_locks, true, @@ -4792,14 +4792,11 @@ wait_table_again: prebuilt->rtr_info->search_tuple = search_tuple; prebuilt->rtr_info->search_mode = mode; } - - err = rtr_search_leaf(pcur, search_tuple, mode, &mtr); - } else { - err = btr_pcur_open_with_no_init(search_tuple, mode, - BTR_SEARCH_LEAF, - pcur, &mtr); } + err = btr_pcur_open_with_no_init(search_tuple, mode, + BTR_SEARCH_LEAF, pcur, &mtr); + if (err != DB_SUCCESS) { page_corrupted: rec = NULL; @@ -5774,7 +5771,8 @@ next_rec_after_check: if (spatial_search) { /* No need to do store restore for R-tree */ - mtr.rollback_to_savepoint(0); + mtr.commit(); + mtr.start(); } else if (mtr_extra_clust_savepoint) { /* We must release any clustered index latches if we are moving to the next non-clustered diff --git a/storage/innobase/row/row0uins.cc b/storage/innobase/row/row0uins.cc index 50196e78092..6567019a33d 100644 --- a/storage/innobase/row/row0uins.cc +++ b/storage/innobase/row/row0uins.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1997, 2017, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2023, MariaDB Corporation. +Copyright (c) 2017, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -233,7 +233,7 @@ func_exit: if (err == DB_SUCCESS && node->rec_type == TRX_UNDO_INSERT_METADATA) { /* When rolling back the very first instant ADD COLUMN operation, reset the root page to the basic state. */ - btr_reset_instant(*index, true, &mtr); + err = btr_reset_instant(*index, true, &mtr); } btr_pcur_commit_specify_mtr(&node->pcur, &mtr); @@ -268,32 +268,21 @@ row_undo_ins_remove_sec_low( pcur.btr_cur.page_cur.index = index; row_mtr_start(&mtr, index, !modify_leaf); + if (modify_leaf) { + mode = BTR_MODIFY_LEAF_ALREADY_LATCHED; + mtr_s_lock_index(index, &mtr); + } else { + ut_ad(mode == BTR_PURGE_TREE); + mtr_sx_lock_index(index, &mtr); + } + if (index->is_spatial()) { mode = modify_leaf - ? btr_latch_mode(BTR_MODIFY_LEAF + ? btr_latch_mode(BTR_MODIFY_LEAF_ALREADY_LATCHED | BTR_RTREE_DELETE_MARK | BTR_RTREE_UNDO_INS) : btr_latch_mode(BTR_PURGE_TREE | BTR_RTREE_UNDO_INS); btr_pcur_get_btr_cur(&pcur)->thr = thr; - if (rtr_search(entry, mode, &pcur, &mtr)) { - goto func_exit; - } - - if (rec_get_deleted_flag( - btr_pcur_get_rec(&pcur), - dict_table_is_comp(index->table))) { - ib::error() << "Record found in index " << index->name - << " is deleted marked on insert rollback."; - ut_ad(0); - } - goto found; - } else if (modify_leaf) { - mode = BTR_MODIFY_LEAF_ALREADY_LATCHED; - mtr_s_lock_index(index, &mtr); - } else { - ut_ad(mode == BTR_PURGE_TREE); - mode = BTR_PURGE_TREE_ALREADY_LATCHED; - mtr_x_lock_index(index, &mtr); } switch (row_search_index_entry(entry, mode, &pcur, &mtr)) { @@ -306,7 +295,15 @@ row_undo_ins_remove_sec_low( case ROW_NOT_FOUND: break; case ROW_FOUND: - found: + if (dict_index_is_spatial(index) + && rec_get_deleted_flag( + btr_pcur_get_rec(&pcur), + dict_table_is_comp(index->table))) { + ib::error() << "Record found in index " << index->name + << " is deleted marked on insert rollback."; + ut_ad(0); + } + btr_cur_t* btr_cur = btr_pcur_get_btr_cur(&pcur); if (modify_leaf) { @@ -321,7 +318,6 @@ row_undo_ins_remove_sec_low( } } -func_exit: btr_pcur_close(&pcur); mtr_commit(&mtr); diff --git a/storage/innobase/row/row0umod.cc b/storage/innobase/row/row0umod.cc index 50e15e03cc9..2d04dca4003 100644 --- a/storage/innobase/row/row0umod.cc +++ b/storage/innobase/row/row0umod.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1997, 2017, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2023, MariaDB Corporation. +Copyright (c) 2017, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -133,7 +133,8 @@ row_undo_mod_clust_low( && node->ref == &trx_undo_metadata && btr_cur_get_index(btr_cur)->table->instant && node->update->info_bits == REC_INFO_METADATA_ADD) { - btr_reset_instant(*btr_cur->index(), false, mtr); + err = btr_reset_instant(*btr_cur_get_index(btr_cur), + false, mtr); } } @@ -489,6 +490,7 @@ row_undo_mod_del_mark_or_remove_sec_low( dberr_t err = DB_SUCCESS; mtr_t mtr; mtr_t mtr_vers; + row_search_result search_result; const bool modify_leaf = mode == BTR_MODIFY_LEAF; row_mtr_start(&mtr, index, !modify_leaf); @@ -503,11 +505,6 @@ row_undo_mod_del_mark_or_remove_sec_low( | BTR_RTREE_UNDO_INS) : btr_latch_mode(BTR_PURGE_TREE | BTR_RTREE_UNDO_INS); btr_cur->thr = thr; - if (UNIV_LIKELY(!rtr_search(entry, mode, &pcur, &mtr))) { - goto found; - } else { - goto func_exit; - } } else if (!index->is_committed()) { /* The index->online_status may change if the index is or was being created online, but not committed yet. It @@ -517,8 +514,7 @@ row_undo_mod_del_mark_or_remove_sec_low( mtr_s_lock_index(index, &mtr); } else { ut_ad(mode == BTR_PURGE_TREE); - mode = BTR_PURGE_TREE_ALREADY_LATCHED; - mtr_x_lock_index(index, &mtr); + mtr_sx_lock_index(index, &mtr); } } else { /* For secondary indexes, @@ -527,8 +523,9 @@ row_undo_mod_del_mark_or_remove_sec_low( ut_ad(!dict_index_is_online_ddl(index)); } - switch (UNIV_EXPECT(row_search_index_entry(entry, mode, &pcur, &mtr), - ROW_FOUND)) { + search_result = row_search_index_entry(entry, mode, &pcur, &mtr); + + switch (UNIV_EXPECT(search_result, ROW_FOUND)) { case ROW_NOT_FOUND: /* In crash recovery, the secondary index record may be missing if the UPDATE did not have time to insert @@ -550,7 +547,6 @@ row_undo_mod_del_mark_or_remove_sec_low( ut_error; } -found: /* We should remove the index record if no prior version of the row, which cannot be purged yet, requires its existence. If some requires, we should delete mark the record. */ @@ -669,12 +665,13 @@ row_undo_mod_del_unmark_sec_and_undo_update( trx_t* trx = thr_get_trx(thr); const ulint flags = BTR_KEEP_SYS_FLAG | BTR_NO_LOCKING_FLAG; + row_search_result search_result; const auto orig_mode = mode; pcur.btr_cur.page_cur.index = index; ut_ad(trx->id != 0); - if (index->is_spatial()) { + if (dict_index_is_spatial(index)) { /* FIXME: Currently we do a 2-pass search for the undo due to avoid undel-mark a wrong rec in rolling back in partial update. Later, we could log some info in @@ -689,22 +686,9 @@ try_again: btr_cur->thr = thr; - if (index->is_spatial()) { - if (!rtr_search(entry, mode, &pcur, &mtr)) { - goto found; - } + search_result = row_search_index_entry(entry, mode, &pcur, &mtr); - if (mode != orig_mode && btr_cur->rtr_info->fd_del) { - mode = orig_mode; - btr_pcur_close(&pcur); - mtr.commit(); - goto try_again; - } - - goto not_found; - } - - switch (row_search_index_entry(entry, mode, &pcur, &mtr)) { + switch (search_result) { mem_heap_t* heap; mem_heap_t* offsets_heap; rec_offs* offsets; @@ -715,7 +699,17 @@ try_again: flags BTR_INSERT, BTR_DELETE, or BTR_DELETE_MARK. */ ut_error; case ROW_NOT_FOUND: -not_found: + /* For spatial index, if first search didn't find an + undel-marked rec, try to find a del-marked rec. */ + if (dict_index_is_spatial(index) && btr_cur->rtr_info->fd_del) { + if (mode != orig_mode) { + mode = orig_mode; + btr_pcur_close(&pcur); + mtr_commit(&mtr); + goto try_again; + } + } + if (btr_cur->up_match >= dict_index_get_n_unique(index) || btr_cur->low_match >= dict_index_get_n_unique(index)) { ib::warn() << "Record in index " << index->name @@ -773,7 +767,6 @@ not_found: break; case ROW_FOUND: -found: btr_rec_set_deleted<false>(btr_cur_get_block(btr_cur), btr_cur_get_rec(btr_cur), &mtr); heap = mem_heap_create( diff --git a/storage/innobase/row/row0upd.cc b/storage/innobase/row/row0upd.cc index fe88fce58a2..a3f940adff5 100644 --- a/storage/innobase/row/row0upd.cc +++ b/storage/innobase/row/row0upd.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2017, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2015, 2023, MariaDB Corporation. +Copyright (c) 2015, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -1832,10 +1832,12 @@ row_upd_sec_index_entry( que_thr_t* thr) /*!< in: query thread */ { mtr_t mtr; + const rec_t* rec; btr_pcur_t pcur; mem_heap_t* heap; dtuple_t* entry; dict_index_t* index; + btr_cur_t* btr_cur; dberr_t err = DB_SUCCESS; trx_t* trx = thr_get_trx(thr); btr_latch_mode mode; @@ -1874,6 +1876,10 @@ row_upd_sec_index_entry( case SRV_TMP_SPACE_ID: mtr.set_log_mode(MTR_LOG_NO_REDO); flags = BTR_NO_LOCKING_FLAG; + if (index->is_spatial()) { + mode = btr_latch_mode(BTR_MODIFY_LEAF + | BTR_RTREE_DELETE_MARK); + } break; default: index->set_modified(mtr); @@ -1882,35 +1888,26 @@ row_upd_sec_index_entry( flags = index->table->no_rollback() ? BTR_NO_ROLLBACK : 0; /* We can only buffer delete-mark operations if there are no foreign key constraints referring to the index. */ - if (!referenced) { - mode = BTR_DELETE_MARK_LEAF; - } + mode = index->is_spatial() + ? btr_latch_mode(BTR_MODIFY_LEAF + | BTR_RTREE_DELETE_MARK) + : referenced + ? BTR_MODIFY_LEAF : BTR_DELETE_MARK_LEAF; break; } /* Set the query thread, so that ibuf_insert_low() will be able to invoke thd_get_trx(). */ - pcur.btr_cur.thr = thr; + btr_pcur_get_btr_cur(&pcur)->thr = thr; pcur.btr_cur.page_cur.index = index; - if (index->is_spatial()) { - mode = btr_latch_mode(BTR_MODIFY_LEAF | BTR_RTREE_DELETE_MARK); - if (UNIV_LIKELY(!rtr_search(entry, mode, &pcur, &mtr))) { - goto found; - } - - if (pcur.btr_cur.rtr_info->fd_del) { - /* We found the record, but a delete marked */ - goto close; - } + search_result = row_search_index_entry(entry, mode, &pcur, &mtr); - goto not_found; - } + btr_cur = btr_pcur_get_btr_cur(&pcur); - search_result = row_search_index_entry(entry, mode, &pcur, &mtr); + rec = btr_cur_get_rec(btr_cur); switch (search_result) { - const rec_t* rec; case ROW_NOT_DELETED_REF: /* should only occur for BTR_DELETE */ ut_error; break; @@ -1919,8 +1916,11 @@ row_upd_sec_index_entry( break; case ROW_NOT_FOUND: -not_found: - rec = btr_pcur_get_rec(&pcur); + if (dict_index_is_spatial(index) && btr_cur->rtr_info->fd_del) { + /* We found the record, but a delete marked */ + break; + } + ib::error() << "Record in index " << index->name << " of table " << index->table->name @@ -1934,9 +1934,7 @@ not_found: #endif /* UNIV_DEBUG */ break; case ROW_FOUND: -found: ut_ad(err == DB_SUCCESS); - rec = btr_pcur_get_rec(&pcur); /* Delete mark the old index record; it can already be delete marked if we return after a lock wait in @@ -1945,14 +1943,14 @@ found: rec, dict_table_is_comp(index->table))) { err = lock_sec_rec_modify_check_and_lock( flags, - btr_pcur_get_block(&pcur), - btr_pcur_get_rec(&pcur), index, thr, &mtr); + btr_cur_get_block(btr_cur), + btr_cur_get_rec(btr_cur), index, thr, &mtr); if (err != DB_SUCCESS) { break; } - btr_rec_set_deleted<true>(btr_pcur_get_block(&pcur), - btr_pcur_get_rec(&pcur), + btr_rec_set_deleted<true>(btr_cur_get_block(btr_cur), + btr_cur_get_rec(btr_cur), &mtr); #ifdef WITH_WSREP if (!referenced && foreign @@ -2011,7 +2009,6 @@ found: } } -close: btr_pcur_close(&pcur); mtr_commit(&mtr); |